Reimplemented getNextToken
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
index 899f8c4..e92ccd7 100755 (executable)
@@ -12,8 +12,10 @@ package eu.svjatoslav.commons.string.tokenizer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Stack;
+import java.util.stream.Stream;
 
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
 
 public class Tokenizer {
 
@@ -31,6 +33,7 @@ public class Tokenizer {
     public Tokenizer setSource(String source){
         this.source = source;
         currentIndex = 0;
+        tokenIndexes.clear();
         return this;
     }
 
@@ -46,7 +49,7 @@ public class Tokenizer {
         return this;
     }
 
-    public void expectNextToken(final String value)
+    public void expectAndConsumeNextToken(final String value)
             throws InvalidSyntaxException {
         final TokenizerMatch match = getNextToken();
         if (!value.equals(match.token))
@@ -56,46 +59,52 @@ public class Tokenizer {
 
     public TokenizerMatch getNextToken() {
         tokenIndexes.push(currentIndex);
-        final StringBuilder result = new StringBuilder();
-
-        while (true) {
-            if (currentIndex >= source.length())
-                return null;
-
-            boolean accumulateCurrentChar = true;
-
-            for (final Terminator terminator : terminators)
-                if (sequenceMatches(terminator.startSequence))
-
-                    if (terminator.termination == DROP) {
-                        currentIndex += terminator.startSequence.length();
-
-                        if (terminator.endSequence != null)
-                            skipUntilSequence(terminator.endSequence);
-
-                        if (result.length() > 0)
-                            return new TokenizerMatch(result.toString(),
-                                    terminator);
-                        else {
-                            accumulateCurrentChar = false;
-                            break;
-                        }
-                    } else if (result.length() > 0)
-                        return new TokenizerMatch(result.toString(), terminator);
-                    else {
-                        currentIndex += terminator.startSequence.length();
-                        return new TokenizerMatch(terminator.startSequence,
-                                terminator);
-                    }
 
-            if (accumulateCurrentChar) {
-                result.append(source.charAt(currentIndex));
+        StringBuilder token = new StringBuilder();
+
+        while (true){
+            if (isTokenTermination()){
+                Terminator tokenTerminator = findTokenTerminator();
+
+                if (tokenTerminator.termination == PRESERVE){
+                    if (hasAccumulatedToken(token)){
+                        // already assembled some token
+                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
+                    } else {
+                        currentIndex++;
+                        return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
+                    }
+                } else if (tokenTerminator.termination == DROP){
+                    if (hasAccumulatedToken(token)){
+                        currentIndex++;
+                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
+                    } else {
+                        currentIndex++;
+                    }
+                }
+            } else {
+                token.append(source.charAt(currentIndex));
                 currentIndex++;
             }
         }
 
     }
 
+    private boolean hasAccumulatedToken(StringBuilder token) {
+        return token.length() > 0;
+    }
+
+    private boolean isTokenTermination() {
+        return findTokenTerminator() != null;
+    }
+
+    public Terminator findTokenTerminator() {
+        for (Terminator terminator : terminators)
+            if (terminator.matches(source, currentIndex))
+                return terminator;
+        return null;
+    }
+
     public boolean consumeIfNextToken(final String token) {
         if (token.equals(getNextToken().token))
             return true;
@@ -104,6 +113,23 @@ public class Tokenizer {
         return false;
     }
 
+    public TokenizerMatch peekNextToken(){
+        TokenizerMatch result = getNextToken();
+        unreadToken();
+        return result;
+    }
+
+    public boolean peekIsOneOf(String ... possibilities){
+        String nextToken = peekNextToken().token;
+        return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
+    }
+
+    public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
+        if (peekIsOneOf(possibilities))
+            throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
+    }
+
+
     public boolean sequenceMatches(final String sequence) {
         if ((currentIndex + sequence.length()) > source.length())
             return false;