Reimplemented getNextToken
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Thu, 12 Oct 2017 07:24:50 +0000 (10:24 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Thu, 12 Oct 2017 07:24:50 +0000 (10:24 +0300)
src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java
src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java
src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java
src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java [new file with mode: 0644]
src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java

index fa07a5b..a298538 100755 (executable)
@@ -27,6 +27,19 @@ public class Terminator {
         this.termination = termination;
     }
 
+    public boolean matches(String source, int index) {
+        // boundary check
+        if (source.length() < (index + startSequence.length()))
+            return false;
+
+        // match check
+        for (int i = 0; i < startSequence.length(); i++)
+            if (startSequence.charAt(i) != source.charAt(index + i))
+                return false;
+
+        return true;
+    }
+
     public enum TerminationStrategy {
         PRESERVE,
         DROP
index c4ca4d2..e92ccd7 100755 (executable)
@@ -15,6 +15,7 @@ import java.util.Stack;
 import java.util.stream.Stream;
 
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
 
 public class Tokenizer {
 
@@ -32,6 +33,7 @@ public class Tokenizer {
     public Tokenizer setSource(String source){
         this.source = source;
         currentIndex = 0;
+        tokenIndexes.clear();
         return this;
     }
 
@@ -57,46 +59,52 @@ public class Tokenizer {
 
     public TokenizerMatch getNextToken() {
         tokenIndexes.push(currentIndex);
-        final StringBuilder result = new StringBuilder();
-
-        while (true) {
-            if (currentIndex >= source.length())
-                return null;
-
-            boolean accumulateCurrentChar = true;
-
-            for (final Terminator terminator : terminators)
-                if (sequenceMatches(terminator.startSequence))
-
-                    if (terminator.termination == DROP) {
-                        currentIndex += terminator.startSequence.length();
-
-                        if (terminator.endSequence != null)
-                            skipUntilSequence(terminator.endSequence);
-
-                        if (result.length() > 0)
-                            return new TokenizerMatch(result.toString(),
-                                    terminator);
-                        else {
-                            accumulateCurrentChar = false;
-                            break;
-                        }
-                    } else if (result.length() > 0)
-                        return new TokenizerMatch(result.toString(), terminator);
-                    else {
-                        currentIndex += terminator.startSequence.length();
-                        return new TokenizerMatch(terminator.startSequence,
-                                terminator);
-                    }
 
-            if (accumulateCurrentChar) {
-                result.append(source.charAt(currentIndex));
+        StringBuilder token = new StringBuilder();
+
+        while (true){
+            if (isTokenTermination()){
+                Terminator tokenTerminator = findTokenTerminator();
+
+                if (tokenTerminator.termination == PRESERVE){
+                    if (hasAccumulatedToken(token)){
+                        // already assembled some token
+                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
+                    } else {
+                        currentIndex++;
+                        return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
+                    }
+                } else if (tokenTerminator.termination == DROP){
+                    if (hasAccumulatedToken(token)){
+                        currentIndex++;
+                        return new TokenizerMatch(token.toString(), "", tokenTerminator);
+                    } else {
+                        currentIndex++;
+                    }
+                }
+            } else {
+                token.append(source.charAt(currentIndex));
                 currentIndex++;
             }
         }
 
     }
 
+    private boolean hasAccumulatedToken(StringBuilder token) {
+        return token.length() > 0;
+    }
+
+    private boolean isTokenTermination() {
+        return findTokenTerminator() != null;
+    }
+
+    public Terminator findTokenTerminator() {
+        for (Terminator terminator : terminators)
+            if (terminator.matches(source, currentIndex))
+                return terminator;
+        return null;
+    }
+
     public boolean consumeIfNextToken(final String token) {
         if (token.equals(getNextToken().token))
             return true;
index 86d7a1b..f005bc1 100755 (executable)
@@ -12,10 +12,12 @@ package eu.svjatoslav.commons.string.tokenizer;
 public class TokenizerMatch {
 
     public final String token;
+    public final String reminder;
     public final Terminator terminator;
 
-    public TokenizerMatch(final String token, final Terminator terminator) {
+    public TokenizerMatch(final String token, final String reminder, final Terminator terminator) {
         this.token = token;
+        this.reminder = reminder;
         this.terminator = terminator;
     }
 
@@ -23,6 +25,7 @@ public class TokenizerMatch {
     public String toString() {
         return "TokenizerMatch{" +
                 "token='" + token + '\'' +
+                ", reminder='" + reminder + '\'' +
                 ", terminator=" + terminator +
                 '}';
     }
diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java
new file mode 100644 (file)
index 0000000..f782949
--- /dev/null
@@ -0,0 +1,25 @@
+package eu.svjatoslav.commons.string.tokenizer;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TerminatorTest {
+
+    @Test
+    public void testMatches(){
+        Terminator terminator = new Terminator(
+                "/*", "*/", Terminator.TerminationStrategy.PRESERVE);
+
+        // must find
+        assertTrue(terminator.matches("/* bla bla bla */", 0));
+
+        // must not find
+        assertFalse(terminator.matches("/* bla bla bla */", 1));
+
+        // must not overflow
+        assertFalse(terminator.matches("/", 0));
+    }
+
+}
\ No newline at end of file
index e40b401..e72b936 100644 (file)
@@ -2,15 +2,26 @@ package eu.svjatoslav.commons.string.tokenizer;
 
 import org.junit.Test;
 
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
 import static org.junit.Assert.assertEquals;
 
 public class TokenizerTest {
+    @Test
+    public void findTokenTerminator() throws Exception {
+
+        Tokenizer tokenizer = new Tokenizer("this /* comment */ a test")
+                .addTerminator("/*", "*/", PRESERVE);
+
+
+
+    }
 
 
     @Test
-    public void peekNextToken() throws Exception {
-        Tokenizer tokenizer = new Tokenizer("this is a test")
-                .addTerminator(" ", Terminator.TerminationStrategy.DROP);
+    public void you_can_peek() throws Exception {
+        Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test")
+                .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+                .addTerminator("N'", "'", PRESERVE);
 
         tokenizer.expectAndConsumeNextToken("this");
 
@@ -21,4 +32,56 @@ public class TokenizerTest {
         assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
     }
 
+    @Test
+    public void complexTerminator() throws Exception {
+        Tokenizer tokenizer = new Tokenizer("   this((\"hello\"  /* comment */   ((  is a N'2015-03-18 09:48:54.360' test")
+                .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+                .addTerminator("(", Terminator.TerminationStrategy.PRESERVE)
+                .addTerminator("\"", "\"" ,Terminator.TerminationStrategy.PRESERVE)
+                .addTerminator("/*", "*/" ,Terminator.TerminationStrategy.DROP)
+                ;
+
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+        System.out.println(tokenizer.getNextToken().token);
+
+
+//        tokenizer.expectAndConsumeNextToken("this");
+//
+//        assertEquals("is", tokenizer.peekNextToken().token);
+//
+//        assertEquals("is", tokenizer.peekNextToken().token);
+//
+//        assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
+    }
+
+
+    @Test
+    public void testComplexTerminator() throws Exception {
+        Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360'  /* thoe unto u */ test")
+                .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+                .addTerminator("/*", "*/", PRESERVE);
+
+//        tokenizer.expectAndConsumeNextToken("this");
+
+//        assertEquals("2015-03-18 09:48:54.360", tokenizer.getNextToken().token);
+
+        System.out.println("1st: " + tokenizer.getNextToken().token);
+
+        System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+        System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+        System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+        System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+    }
+
 }
\ No newline at end of file