Added possibility to skip token stream until the end
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
index e92ccd7..c80aeb1 100755 (executable)
@@ -19,21 +19,28 @@ import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrat
 
 public class Tokenizer {
 
-    final Stack<Integer> tokenIndexes = new Stack<>();
+    private final Stack<Integer> tokenIndexes = new Stack<>();
     private final List<Terminator> terminators = new ArrayList<>();
     private String source;
     private int currentIndex = 0;
 
+    private int cachedTerminatorIndex = -1;
+    private Terminator cachedTerminator;
+
     public Tokenizer(final String source) {
         this.source = source;
     }
 
-    public Tokenizer(){}
+    public Tokenizer() {
+    }
 
-    public Tokenizer setSource(String source){
+    public Tokenizer setSource(String source) {
         this.source = source;
         currentIndex = 0;
         tokenIndexes.clear();
+
+        cachedTerminatorIndex = -1;
+        cachedTerminator = null;
         return this;
     }
 
@@ -57,55 +64,113 @@ public class Tokenizer {
                     + "\" but got \"" + match.token + "\" instead.");
     }
 
-    public TokenizerMatch getNextToken() {
+
+    public TokenizerMatch getNextToken() throws InvalidSyntaxException {
         tokenIndexes.push(currentIndex);
 
-        StringBuilder token = new StringBuilder();
-
-        while (true){
-            if (isTokenTermination()){
-                Terminator tokenTerminator = findTokenTerminator();
-
-                if (tokenTerminator.termination == PRESERVE){
-                    if (hasAccumulatedToken(token)){
-                        // already assembled some token
-                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
-                    } else {
-                        currentIndex++;
-                        return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
-                    }
-                } else if (tokenTerminator.termination == DROP){
-                    if (hasAccumulatedToken(token)){
-                        currentIndex++;
-                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
-                    } else {
-                        currentIndex++;
-                    }
-                }
-            } else {
-                token.append(source.charAt(currentIndex));
+        StringBuilder tokenAccumulator = new StringBuilder();
+
+        while (true) {
+
+            if (currentIndex >= source.length()) { // reached end of input
+                if (hasAccumulatedToken(tokenAccumulator))
+                    return new TokenizerMatch(tokenAccumulator.toString(), null, null);
+                else
+                    return null;
+            }
+
+            if (isOngoingToken()) {
+                tokenAccumulator.append(source.charAt(currentIndex));
                 currentIndex++;
+                continue;
+            }
+
+            Terminator terminator = getOrFindTokenTerminator();
+
+            if (terminator.termination == PRESERVE)
+                return buildPreservedToken(tokenAccumulator, terminator);
+            else if (terminator.termination == DROP) {
+                skipUntilTerminatorEnd(terminator);
+
+                if (hasAccumulatedToken(tokenAccumulator))
+                    return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
             }
         }
 
     }
 
+    private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
+        if (terminator.hasEndSequence())
+            currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
+        else
+            currentIndex += terminator.startSequence.length();
+    }
+
+    private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+        if (hasAccumulatedToken(token))
+            return new TokenizerMatch(token.toString(), null, terminator);
+
+        if (terminator.hasEndSequence())
+            return buildComplexPreservedToken(terminator);
+        else
+            return buildSimplePreservedToken(terminator);
+    }
+
+    private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+        currentIndex += terminator.startSequence.length();
+        return new TokenizerMatch(terminator.startSequence, null, terminator);
+    }
+
+    private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+        int endSequenceIndex = getEndSequenceIndex(terminator);
+        String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+        currentIndex = endSequenceIndex + terminator.endSequence.length();
+
+        return new TokenizerMatch(terminator.startSequence, reminder, terminator);
+    }
+
+    private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
+        int endSequenceIndex = source.indexOf(terminator.endSequence,
+                currentIndex + terminator.startSequence.length());
+
+        if (endSequenceIndex < 0)
+            throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+
+        return endSequenceIndex;
+    }
+
     private boolean hasAccumulatedToken(StringBuilder token) {
         return token.length() > 0;
     }
 
-    private boolean isTokenTermination() {
-        return findTokenTerminator() != null;
+    private boolean isOngoingToken() {
+        return getOrFindTokenTerminator() == null;
     }
 
-    public Terminator findTokenTerminator() {
+    public boolean hasMoreTokens() {
+        return currentIndex < source.length();
+    }
+
+    /**
+     * Attempts to cache terminator search result.
+     */
+    public Terminator getOrFindTokenTerminator() {
+        if (currentIndex == cachedTerminatorIndex)
+            return cachedTerminator;
+
+        cachedTerminatorIndex = currentIndex;
+        cachedTerminator = findTokenTerminator();
+        return cachedTerminator;
+    }
+
+    private Terminator findTokenTerminator() {
         for (Terminator terminator : terminators)
             if (terminator.matches(source, currentIndex))
                 return terminator;
         return null;
     }
 
-    public boolean consumeIfNextToken(final String token) {
+    public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
         if (token.equals(getNextToken().token))
             return true;
 
@@ -113,32 +178,24 @@ public class Tokenizer {
         return false;
     }
 
-    public TokenizerMatch peekNextToken(){
+    public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
         TokenizerMatch result = getNextToken();
         unreadToken();
         return result;
     }
 
-    public boolean peekIsOneOf(String ... possibilities){
+    public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
         String nextToken = peekNextToken().token;
         return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
     }
 
-    public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
+    public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
         if (peekIsOneOf(possibilities))
             throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
     }
 
-
-    public boolean sequenceMatches(final String sequence) {
-        if ((currentIndex + sequence.length()) > source.length())
-            return false;
-
-        for (int i = 0; i < sequence.length(); i++)
-            if (sequence.charAt(i) != source.charAt(i + currentIndex))
-                return false;
-
-        return true;
+    public void unreadToken() {
+        currentIndex = tokenIndexes.pop();
     }
 
     public void skipUntilDataEnd() {
@@ -146,19 +203,4 @@ public class Tokenizer {
         currentIndex = source.length();
     }
 
-    public void skipUntilSequence(final String sequence) {
-        while (currentIndex < source.length()) {
-            if (sequenceMatches(sequence)) {
-                currentIndex += sequence.length();
-                return;
-            }
-
-            currentIndex++;
-        }
-    }
-
-    public void unreadToken() {
-        currentIndex = tokenIndexes.pop();
-    }
-
 }