Handle end of input. Speed improvements. Deleted legacy code.
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
index 939ede9..722e17a 100755 (executable)
@@ -24,6 +24,9 @@ public class Tokenizer {
     private String source;
     private int currentIndex = 0;
 
+    int cachedTerminatorIndex = -1;
+    Terminator cachedTerminator;
+
     public Tokenizer(final String source) {
         this.source = source;
     }
@@ -34,6 +37,9 @@ public class Tokenizer {
         this.source = source;
         currentIndex = 0;
         tokenIndexes.clear();
+
+        cachedTerminatorIndex = -1;
+        cachedTerminator = null;
         return this;
     }
 
@@ -57,53 +63,80 @@ public class Tokenizer {
                     + "\" but got \"" + match.token + "\" instead.");
     }
 
+
+
     public TokenizerMatch getNextToken() throws InvalidSyntaxException {
         tokenIndexes.push(currentIndex);
 
-        StringBuilder token = new StringBuilder();
+        StringBuilder tokenAccumulator = new StringBuilder();
 
         while (true){
+
+            if (currentIndex >= source.length()){ // reached end of input
+                if (hasAccumulatedToken(tokenAccumulator))
+                    return new TokenizerMatch(tokenAccumulator.toString(), null, null);
+                else
+                    return null;
+            }
+
             if (isOngoingToken()) {
-                token.append(source.charAt(currentIndex));
+                tokenAccumulator.append(source.charAt(currentIndex));
                 currentIndex++;
                 continue;
             }
 
-            Terminator tokenTerminator = findTokenTerminator();
-
-            if (tokenTerminator.termination == PRESERVE){
-                return buildPreservedToken(token, tokenTerminator);
-            } else if (tokenTerminator.termination == DROP){
-                if (hasAccumulatedToken(token)){
-                    currentIndex++;
-                    return new TokenizerMatch(token.toString(), "", tokenTerminator);
-                } else {
-                    currentIndex++;
-                }
+            Terminator terminator = getOrFindTokenTerminator();
+
+            if (terminator.termination == PRESERVE)
+                return buildPreservedToken(tokenAccumulator, terminator);
+            else if (terminator.termination == DROP){
+                skipUntilTerminatorEnd(terminator);
+
+                if (hasAccumulatedToken(tokenAccumulator))
+                    return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
             }
         }
 
     }
 
+    private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
+        if (terminator.hasEndSequence())
+            currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
+        else
+            currentIndex += terminator.startSequence.length();
+    }
+
     private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
         if (hasAccumulatedToken(token))
-            return new TokenizerMatch(token.toString(), "", terminator);
+            return new TokenizerMatch(token.toString(), null, terminator);
 
-        if (terminator.hasEndSequence()){
-            int endSequenceIndex = source.indexOf(terminator.endSequence,
-                    currentIndex + terminator.startSequence.length());
+        if (terminator.hasEndSequence())
+            return buildComplexPreservedToken(terminator);
+        else
+            return buildSimplePreservedToken(terminator);
+    }
 
-            if (endSequenceIndex < 0)
-                throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+    private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+        currentIndex += terminator.startSequence.length();
+        return new TokenizerMatch(terminator.startSequence, null, terminator);
+    }
 
-            String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
-            currentIndex = endSequenceIndex + terminator.endSequence.length();
+    private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+        int endSequenceIndex = getEndSequenceIndex(terminator);
+        String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+        currentIndex = endSequenceIndex + terminator.endSequence.length();
 
-            return new TokenizerMatch(terminator.startSequence, reminder, terminator);
-        } else {
-            currentIndex += terminator.startSequence.length();
-            return new TokenizerMatch(terminator.startSequence, "", terminator);
-        }
+        return new TokenizerMatch(terminator.startSequence, reminder, terminator);
+    }
+
+    private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
+        int endSequenceIndex = source.indexOf(terminator.endSequence,
+                currentIndex + terminator.startSequence.length());
+
+        if (endSequenceIndex < 0)
+            throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+
+        return endSequenceIndex;
     }
 
     private boolean hasAccumulatedToken(StringBuilder token) {
@@ -111,10 +144,26 @@ public class Tokenizer {
     }
 
     private boolean isOngoingToken() {
-        return findTokenTerminator() == null;
+        return getOrFindTokenTerminator() == null;
     }
 
-    public Terminator findTokenTerminator() {
+    public boolean hasMoreTokens(){
+        return currentIndex < source.length();
+    }
+
+    /**
+     * Attempts to cache terminator search result.
+     */
+    public Terminator getOrFindTokenTerminator() {
+        if (currentIndex == cachedTerminatorIndex)
+            return cachedTerminator;
+
+        cachedTerminatorIndex = currentIndex;
+        cachedTerminator = findTokenTerminator();
+        return cachedTerminator;
+    }
+
+    private Terminator findTokenTerminator() {
         for (Terminator terminator : terminators)
             if (terminator.matches(source, currentIndex))
                 return terminator;
@@ -144,35 +193,7 @@ public class Tokenizer {
         if (peekIsOneOf(possibilities))
             throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
     }
-
-
-    public boolean sequenceMatches(final String sequence) {
-        if ((currentIndex + sequence.length()) > source.length())
-            return false;
-
-        for (int i = 0; i < sequence.length(); i++)
-            if (sequence.charAt(i) != source.charAt(i + currentIndex))
-                return false;
-
-        return true;
-    }
-
-    public void skipUntilDataEnd() {
-        tokenIndexes.push(currentIndex);
-        currentIndex = source.length();
-    }
-
-    public void skipUntilSequence(final String sequence) {
-        while (currentIndex < source.length()) {
-            if (sequenceMatches(sequence)) {
-                currentIndex += sequence.length();
-                return;
-            }
-
-            currentIndex++;
-        }
-    }
-
+    
     public void unreadToken() {
         currentIndex = tokenIndexes.pop();
     }