X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTokenizer.java;h=cc2036949557778171a48380e1b991226518bb73;hb=67f7af91a79bc2ff50071389b6333a28755a4bff;hp=140773218e9fdb448c091b54ab8fd562662f8bdf;hpb=b8bd1e820265fc15c39c1ee8c06289ea8b8e2c1c;p=svjatoslav_commons.git

diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java
index 1407732..cc20369 100755
--- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java
+++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java
@@ -1,12 +1,7 @@
 /*
- * Svjatoslav Commons - shared library of common functionality.
- * Copyright Â©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 3 of the GNU Lesser General Public License
- * or later as published by the Free Software Foundation.
+ * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
+ * This project is released under Creative Commons Zero (CC0) license.
  */
-
 package eu.svjatoslav.commons.string.tokenizer;
 
 import java.util.ArrayList;
@@ -16,12 +11,22 @@ import java.util.stream.Stream;
 
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
+import static java.lang.System.out;
 
 public class Tokenizer {
 
+    /**
+     * Stack of token indexes. This allows to walk back in history and un-consume the token.
+     */
     private final Stack<Integer> tokenIndexes = new Stack<>();
+
+    /**
+     * Terminators that will be searched for by given tokenizer within given source string.
+     */
     private final List<Terminator> terminators = new ArrayList<>();
-    private String source;
+
+    private String source; // string to be tokenized
+
     private int currentIndex = 0;
 
     private int cachedTerminatorIndex = -1;
@@ -50,6 +55,11 @@ public class Tokenizer {
         return this;
     }
 
+    public Tokenizer addTerminator(Terminator terminator) {
+        terminators.add(terminator);
+        return this;
+    }
+
     public Tokenizer addTerminator(final String startSequence,
                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
@@ -64,7 +74,10 @@ public class Tokenizer {
                     + "\" but got \"" + match.token + "\" instead.");
     }
 
-
+    /**
+     * @return next @TokenizerMatch or <code>null</code> if end of input is reached.
+     * @throws InvalidSyntaxException
+     */
     public TokenizerMatch getNextToken() throws InvalidSyntaxException {
         tokenIndexes.push(currentIndex);
 
@@ -106,22 +119,26 @@ public class Tokenizer {
             currentIndex += terminator.startSequence.length();
     }
 
-    private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+    /**
+     * @throws InvalidSyntaxException if end sequence is not found as is expected by given token.
+     */
+    private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator)
+            throws InvalidSyntaxException {
         if (hasAccumulatedToken(token))
             return new TokenizerMatch(token.toString(), null, terminator);
 
         if (terminator.hasEndSequence())
-            return buildComplexPreservedToken(terminator);
+            return buildTokenWithExpectedENdSequence(terminator);
         else
-            return buildSimplePreservedToken(terminator);
+            return buildTokenWithoutEndSequence(terminator);
     }
 
-    private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+    private TokenizerMatch buildTokenWithoutEndSequence(Terminator terminator) {
         currentIndex += terminator.startSequence.length();
         return new TokenizerMatch(terminator.startSequence, null, terminator);
     }
 
-    private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+    private TokenizerMatch buildTokenWithExpectedENdSequence(Terminator terminator) throws InvalidSyntaxException {
         int endSequenceIndex = getEndSequenceIndex(terminator);
         String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
         currentIndex = endSequenceIndex + terminator.endSequence.length();
@@ -129,6 +146,9 @@ public class Tokenizer {
         return new TokenizerMatch(terminator.startSequence, reminder, terminator);
     }
 
+    /**
+     * @throws InvalidSyntaxException if end of input is reached without finding expected end sequence.
+     */
     private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
         int endSequenceIndex = source.indexOf(terminator.endSequence,
                 currentIndex + terminator.startSequence.length());
@@ -147,7 +167,7 @@ public class Tokenizer {
         return getOrFindTokenTerminator() == null;
     }
 
-    public boolean hasMoreTokens() {
+    public boolean hasMoreContent() {
         return currentIndex < source.length();
     }
 
@@ -198,6 +218,26 @@ public class Tokenizer {
         currentIndex = tokenIndexes.pop();
     }
 
+    /**
+     * For debugging
+     */
+    public void enlistRemainingTokens(){
+        int redTokenCount = 0;
+
+        try {
+            while (hasMoreContent()) {
+                out.println(getNextToken().toString());
+                redTokenCount++;
+            }
+        } catch (InvalidSyntaxException e){
+            out.println("There is syntax exception");
+        }
+
+        // restore pointer to original location
+        for (int i = 0; i< redTokenCount; i++ ) unreadToken();
+    }
+
+
     public void skipUntilDataEnd() {
         tokenIndexes.push(currentIndex);
         currentIndex = source.length();