Added possibility to skip token stream until the end
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
index ba1590d..c80aeb1 100755 (executable)
@@ -12,25 +12,35 @@ package eu.svjatoslav.commons.string.tokenizer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Stack;
+import java.util.stream.Stream;
 
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
 
 public class Tokenizer {
 
-    final Stack<Integer> tokenIndexes = new Stack<>();
+    private final Stack<Integer> tokenIndexes = new Stack<>();
     private final List<Terminator> terminators = new ArrayList<>();
     private String source;
     private int currentIndex = 0;
 
+    private int cachedTerminatorIndex = -1;
+    private Terminator cachedTerminator;
+
     public Tokenizer(final String source) {
         this.source = source;
     }
 
-    public Tokenizer(){}
+    public Tokenizer() {
+    }
 
-    public Tokenizer setSource(String source){
+    public Tokenizer setSource(String source) {
         this.source = source;
         currentIndex = 0;
+        tokenIndexes.clear();
+
+        cachedTerminatorIndex = -1;
+        cachedTerminator = null;
         return this;
     }
 
@@ -46,7 +56,7 @@ public class Tokenizer {
         return this;
     }
 
-    public void expectNextToken(final String value)
+    public void expectAndConsumeNextToken(final String value)
             throws InvalidSyntaxException {
         final TokenizerMatch match = getNextToken();
         if (!value.equals(match.token))
@@ -54,102 +64,143 @@ public class Tokenizer {
                     + "\" but got \"" + match.token + "\" instead.");
     }
 
-    public TokenizerMatch getNextToken() {
+
+    public TokenizerMatch getNextToken() throws InvalidSyntaxException {
         tokenIndexes.push(currentIndex);
-        final StringBuilder result = new StringBuilder();
+
+        StringBuilder tokenAccumulator = new StringBuilder();
 
         while (true) {
-            if (currentIndex >= source.length())
-                return null;
-
-            boolean accumulateCurrentChar = true;
-
-            for (final Terminator terminator : terminators)
-                if (sequenceMatches(terminator.startSequence))
-
-                    if (terminator.termination == DROP) {
-                        currentIndex += terminator.startSequence.length();
-
-                        if (terminator.endSequence != null)
-                            skipUntilSequence(terminator.endSequence);
-
-                        if (result.length() > 0)
-                            return new TokenizerMatch(result.toString(),
-                                    terminator);
-                        else {
-                            accumulateCurrentChar = false;
-                            break;
-                        }
-                    } else if (result.length() > 0)
-                        return new TokenizerMatch(result.toString(), terminator);
-                    else {
-                        currentIndex += terminator.startSequence.length();
-                        return new TokenizerMatch(terminator.startSequence,
-                                terminator);
-                    }
-
-            if (accumulateCurrentChar) {
-                result.append(source.charAt(currentIndex));
+
+            if (currentIndex >= source.length()) { // reached end of input
+                if (hasAccumulatedToken(tokenAccumulator))
+                    return new TokenizerMatch(tokenAccumulator.toString(), null, null);
+                else
+                    return null;
+            }
+
+            if (isOngoingToken()) {
+                tokenAccumulator.append(source.charAt(currentIndex));
                 currentIndex++;
+                continue;
+            }
+
+            Terminator terminator = getOrFindTokenTerminator();
+
+            if (terminator.termination == PRESERVE)
+                return buildPreservedToken(tokenAccumulator, terminator);
+            else if (terminator.termination == DROP) {
+                skipUntilTerminatorEnd(terminator);
+
+                if (hasAccumulatedToken(tokenAccumulator))
+                    return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
             }
         }
 
     }
 
-    public boolean consumeIfNextToken(final String token) {
-        if (token.equals(getNextToken().token))
-            return true;
+    private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
+        if (terminator.hasEndSequence())
+            currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
+        else
+            currentIndex += terminator.startSequence.length();
+    }
 
-        unreadToken();
-        return false;
+    private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+        if (hasAccumulatedToken(token))
+            return new TokenizerMatch(token.toString(), null, terminator);
+
+        if (terminator.hasEndSequence())
+            return buildComplexPreservedToken(terminator);
+        else
+            return buildSimplePreservedToken(terminator);
     }
 
-    public TokenizerMatch peekNextToken(){
-        TokenizerMatch result = getNextToken();
-        unreadToken();
-        return result;
+    private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+        currentIndex += terminator.startSequence.length();
+        return new TokenizerMatch(terminator.startSequence, null, terminator);
     }
 
-    public boolean peekIsOneOf(String [] ... possibilities){
-        TokenizerMatch nextToken = peekNextToken();
+    private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+        int endSequenceIndex = getEndSequenceIndex(terminator);
+        String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+        currentIndex = endSequenceIndex + terminator.endSequence.length();
 
-        for (String[] possibility : possibilities)
-            if (possibility.equals(nextToken))
-                return true;
+        return new TokenizerMatch(terminator.startSequence, reminder, terminator);
+    }
 
-        return false;
+    private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
+        int endSequenceIndex = source.indexOf(terminator.endSequence,
+                currentIndex + terminator.startSequence.length());
+
+        if (endSequenceIndex < 0)
+            throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+
+        return endSequenceIndex;
+    }
+
+    private boolean hasAccumulatedToken(StringBuilder token) {
+        return token.length() > 0;
     }
 
+    private boolean isOngoingToken() {
+        return getOrFindTokenTerminator() == null;
+    }
 
-    public boolean sequenceMatches(final String sequence) {
-        if ((currentIndex + sequence.length()) > source.length())
-            return false;
+    public boolean hasMoreTokens() {
+        return currentIndex < source.length();
+    }
 
-        for (int i = 0; i < sequence.length(); i++)
-            if (sequence.charAt(i) != source.charAt(i + currentIndex))
-                return false;
+    /**
+     * Attempts to cache terminator search result.
+     */
+    public Terminator getOrFindTokenTerminator() {
+        if (currentIndex == cachedTerminatorIndex)
+            return cachedTerminator;
 
-        return true;
+        cachedTerminatorIndex = currentIndex;
+        cachedTerminator = findTokenTerminator();
+        return cachedTerminator;
     }
 
-    public void skipUntilDataEnd() {
-        tokenIndexes.push(currentIndex);
-        currentIndex = source.length();
+    private Terminator findTokenTerminator() {
+        for (Terminator terminator : terminators)
+            if (terminator.matches(source, currentIndex))
+                return terminator;
+        return null;
     }
 
-    public void skipUntilSequence(final String sequence) {
-        while (currentIndex < source.length()) {
-            if (sequenceMatches(sequence)) {
-                currentIndex += sequence.length();
-                return;
-            }
+    public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
+        if (token.equals(getNextToken().token))
+            return true;
 
-            currentIndex++;
-        }
+        unreadToken();
+        return false;
+    }
+
+    public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
+        TokenizerMatch result = getNextToken();
+        unreadToken();
+        return result;
+    }
+
+    public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
+        String nextToken = peekNextToken().token;
+        return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
+    }
+
+    public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
+        if (peekIsOneOf(possibilities))
+            throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
     }
 
     public void unreadToken() {
         currentIndex = tokenIndexes.pop();
     }
 
+    public void skipUntilDataEnd() {
+        tokenIndexes.push(currentIndex);
+        currentIndex = source.length();
+    }
+
 }