Handle complex content preserving terminators.
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
index e92ccd7..939ede9 100755 (executable)
@@ -57,45 +57,61 @@ public class Tokenizer {
                     + "\" but got \"" + match.token + "\" instead.");
     }
 
-    public TokenizerMatch getNextToken() {
+    public TokenizerMatch getNextToken() throws InvalidSyntaxException {
         tokenIndexes.push(currentIndex);
 
         StringBuilder token = new StringBuilder();
 
         while (true){
-            if (isTokenTermination()){
-                Terminator tokenTerminator = findTokenTerminator();
-
-                if (tokenTerminator.termination == PRESERVE){
-                    if (hasAccumulatedToken(token)){
-                        // already assembled some token
-                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
-                    } else {
-                        currentIndex++;
-                        return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
-                    }
-                } else if (tokenTerminator.termination == DROP){
-                    if (hasAccumulatedToken(token)){
-                        currentIndex++;
-                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
-                    } else {
-                        currentIndex++;
-                    }
-                }
-            } else {
+            if (isOngoingToken()) {
                 token.append(source.charAt(currentIndex));
                 currentIndex++;
+                continue;
+            }
+
+            Terminator tokenTerminator = findTokenTerminator();
+
+            if (tokenTerminator.termination == PRESERVE){
+                return buildPreservedToken(token, tokenTerminator);
+            } else if (tokenTerminator.termination == DROP){
+                if (hasAccumulatedToken(token)){
+                    currentIndex++;
+                    return new TokenizerMatch(token.toString(), "", tokenTerminator);
+                } else {
+                    currentIndex++;
+                }
             }
         }
 
     }
 
+    private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+        if (hasAccumulatedToken(token))
+            return new TokenizerMatch(token.toString(), "", terminator);
+
+        if (terminator.hasEndSequence()){
+            int endSequenceIndex = source.indexOf(terminator.endSequence,
+                    currentIndex + terminator.startSequence.length());
+
+            if (endSequenceIndex < 0)
+                throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+
+            String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+            currentIndex = endSequenceIndex + terminator.endSequence.length();
+
+            return new TokenizerMatch(terminator.startSequence, reminder, terminator);
+        } else {
+            currentIndex += terminator.startSequence.length();
+            return new TokenizerMatch(terminator.startSequence, "", terminator);
+        }
+    }
+
     private boolean hasAccumulatedToken(StringBuilder token) {
         return token.length() > 0;
     }
 
-    private boolean isTokenTermination() {
-        return findTokenTerminator() != null;
+    private boolean isOngoingToken() {
+        return findTokenTerminator() == null;
     }
 
     public Terminator findTokenTerminator() {
@@ -105,7 +121,7 @@ public class Tokenizer {
         return null;
     }
 
-    public boolean consumeIfNextToken(final String token) {
+    public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
         if (token.equals(getNextToken().token))
             return true;
 
@@ -113,13 +129,13 @@ public class Tokenizer {
         return false;
     }
 
-    public TokenizerMatch peekNextToken(){
+    public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
         TokenizerMatch result = getNextToken();
         unreadToken();
         return result;
     }
 
-    public boolean peekIsOneOf(String ... possibilities){
+    public boolean peekIsOneOf(String ... possibilities) throws InvalidSyntaxException {
         String nextToken = peekNextToken().token;
         return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
     }