Better tokenizer usability
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sat, 1 Aug 2020 09:51:41 +0000 (12:51 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sat, 1 Aug 2020 09:51:41 +0000 (12:51 +0300)
src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java
src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java

index b716989..cc20369 100755 (executable)
@@ -11,12 +11,22 @@ import java.util.stream.Stream;
 
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
+import static java.lang.System.out;
 
 public class Tokenizer {
 
+    /**
+     * Stack of token indexes. This allows to walk back in history and un-consume the token.
+     */
     private final Stack<Integer> tokenIndexes = new Stack<>();
+
+    /**
+     * Terminators that will be searched for by given tokenizer within given source string.
+     */
     private final List<Terminator> terminators = new ArrayList<>();
-    private String source;
+
+    private String source; // string to be tokenized
+
     private int currentIndex = 0;
 
     private int cachedTerminatorIndex = -1;
@@ -45,6 +55,11 @@ public class Tokenizer {
         return this;
     }
 
+    public Tokenizer addTerminator(Terminator terminator) {
+        terminators.add(terminator);
+        return this;
+    }
+
     public Tokenizer addTerminator(final String startSequence,
                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
@@ -59,9 +74,7 @@ public class Tokenizer {
                     + "\" but got \"" + match.token + "\" instead.");
     }
 
-
     /**
-     *
      * @return next @TokenizerMatch or <code>null</code> if end of input is reached.
      * @throws InvalidSyntaxException
      */
@@ -154,7 +167,7 @@ public class Tokenizer {
         return getOrFindTokenTerminator() == null;
     }
 
-    public boolean hasMoreTokens() {
+    public boolean hasMoreContent() {
         return currentIndex < source.length();
     }
 
@@ -205,6 +218,26 @@ public class Tokenizer {
         currentIndex = tokenIndexes.pop();
     }
 
+    /**
+     * For debugging
+     */
+    public void enlistRemainingTokens(){
+        int redTokenCount = 0;
+
+        try {
+            while (hasMoreContent()) {
+                out.println(getNextToken().toString());
+                redTokenCount++;
+            }
+        } catch (InvalidSyntaxException e){
+            out.println("There is syntax exception");
+        }
+
+        // restore pointer to original location
+        for (int i = 0; i< redTokenCount; i++ ) unreadToken();
+    }
+
+
     public void skipUntilDataEnd() {
         tokenIndexes.push(currentIndex);
         currentIndex = source.length();
index ae68386..9f35367 100644 (file)
@@ -41,7 +41,7 @@ public class TokenizerTest {
         assertTokenEquals("test", null, tokenizer);
 
         assertNull(tokenizer.getNextToken());
-        assertFalse(tokenizer.hasMoreTokens());
+        assertFalse(tokenizer.hasMoreContent());
     }
 
     private void assertTokenEquals(String token, String reminder, Tokenizer tokenizer) throws InvalidSyntaxException {