From: Svjatoslav Agejenko Date: Thu, 12 Oct 2017 07:24:50 +0000 (+0300) Subject: Reimplemented getNextToken X-Git-Tag: svjatoslavcommons-1.8~50 X-Git-Url: http://www2.svjatoslav.eu/gitweb/?p=svjatoslav_commons.git;a=commitdiff_plain;h=798a165906826270924f0d5d58cfa04cf6bdc113 Reimplemented getNextToken --- diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java index fa07a5b..a298538 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java @@ -27,6 +27,19 @@ public class Terminator { this.termination = termination; } + public boolean matches(String source, int index) { + // boundary check + if (source.length() < (index + startSequence.length())) + return false; + + // match check + for (int i = 0; i < startSequence.length(); i++) + if (startSequence.charAt(i) != source.charAt(index + i)) + return false; + + return true; + } + public enum TerminationStrategy { PRESERVE, DROP diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index c4ca4d2..e92ccd7 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -15,6 +15,7 @@ import java.util.Stack; import java.util.stream.Stream; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; public class Tokenizer { @@ -32,6 +33,7 @@ public class Tokenizer { public Tokenizer setSource(String source){ this.source = source; currentIndex = 0; + tokenIndexes.clear(); return this; } @@ -57,46 +59,52 @@ public class Tokenizer { public TokenizerMatch getNextToken() { tokenIndexes.push(currentIndex); - final StringBuilder result = new StringBuilder(); - - while (true) { - if (currentIndex >= source.length()) - return null; - - boolean accumulateCurrentChar = true; - - for (final Terminator terminator : terminators) - if (sequenceMatches(terminator.startSequence)) - - if (terminator.termination == DROP) { - currentIndex += terminator.startSequence.length(); - - if (terminator.endSequence != null) - skipUntilSequence(terminator.endSequence); - - if (result.length() > 0) - return new TokenizerMatch(result.toString(), - terminator); - else { - accumulateCurrentChar = false; - break; - } - } else if (result.length() > 0) - return new TokenizerMatch(result.toString(), terminator); - else { - currentIndex += terminator.startSequence.length(); - return new TokenizerMatch(terminator.startSequence, - terminator); - } - if (accumulateCurrentChar) { - result.append(source.charAt(currentIndex)); + StringBuilder token = new StringBuilder(); + + while (true){ + if (isTokenTermination()){ + Terminator tokenTerminator = findTokenTerminator(); + + if (tokenTerminator.termination == PRESERVE){ + if (hasAccumulatedToken(token)){ + // already assembled some token + return new TokenizerMatch(token.toString(), "", tokenTerminator); + } else { + currentIndex++; + return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator); + } + } else if (tokenTerminator.termination == DROP){ + if (hasAccumulatedToken(token)){ + currentIndex++; + return new TokenizerMatch(token.toString(), "", tokenTerminator); + } else { + currentIndex++; + } + } + } else { + token.append(source.charAt(currentIndex)); currentIndex++; } } } + private boolean hasAccumulatedToken(StringBuilder token) { + return token.length() > 0; + } + + private boolean isTokenTermination() { + return findTokenTerminator() != null; + } + + public Terminator findTokenTerminator() { + for (Terminator terminator : terminators) + if (terminator.matches(source, currentIndex)) + return terminator; + return null; + } + public boolean consumeIfNextToken(final String token) { if (token.equals(getNextToken().token)) return true; diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java index 86d7a1b..f005bc1 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java @@ -12,10 +12,12 @@ package eu.svjatoslav.commons.string.tokenizer; public class TokenizerMatch { public final String token; + public final String reminder; public final Terminator terminator; - public TokenizerMatch(final String token, final Terminator terminator) { + public TokenizerMatch(final String token, final String reminder, final Terminator terminator) { this.token = token; + this.reminder = reminder; this.terminator = terminator; } @@ -23,6 +25,7 @@ public class TokenizerMatch { public String toString() { return "TokenizerMatch{" + "token='" + token + '\'' + + ", reminder='" + reminder + '\'' + ", terminator=" + terminator + '}'; } diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java new file mode 100644 index 0000000..f782949 --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java @@ -0,0 +1,25 @@ +package eu.svjatoslav.commons.string.tokenizer; + +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TerminatorTest { + + @Test + public void testMatches(){ + Terminator terminator = new Terminator( + "/*", "*/", Terminator.TerminationStrategy.PRESERVE); + + // must find + assertTrue(terminator.matches("/* bla bla bla */", 0)); + + // must not find + assertFalse(terminator.matches("/* bla bla bla */", 1)); + + // must not overflow + assertFalse(terminator.matches("/", 0)); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java index e40b401..e72b936 100644 --- a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java @@ -2,15 +2,26 @@ package eu.svjatoslav.commons.string.tokenizer; import org.junit.Test; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; import static org.junit.Assert.assertEquals; public class TokenizerTest { + @Test + public void findTokenTerminator() throws Exception { + + Tokenizer tokenizer = new Tokenizer("this /* comment */ a test") + .addTerminator("/*", "*/", PRESERVE); + + + + } @Test - public void peekNextToken() throws Exception { - Tokenizer tokenizer = new Tokenizer("this is a test") - .addTerminator(" ", Terminator.TerminationStrategy.DROP); + public void you_can_peek() throws Exception { + Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test") + .addTerminator(" ", Terminator.TerminationStrategy.DROP) + .addTerminator("N'", "'", PRESERVE); tokenizer.expectAndConsumeNextToken("this"); @@ -21,4 +32,56 @@ public class TokenizerTest { assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that")); } + @Test + public void complexTerminator() throws Exception { + Tokenizer tokenizer = new Tokenizer(" this((\"hello\" /* comment */ (( is a N'2015-03-18 09:48:54.360' test") + .addTerminator(" ", Terminator.TerminationStrategy.DROP) + .addTerminator("(", Terminator.TerminationStrategy.PRESERVE) + .addTerminator("\"", "\"" ,Terminator.TerminationStrategy.PRESERVE) + .addTerminator("/*", "*/" ,Terminator.TerminationStrategy.DROP) + ; + + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + System.out.println(tokenizer.getNextToken().token); + + +// tokenizer.expectAndConsumeNextToken("this"); +// +// assertEquals("is", tokenizer.peekNextToken().token); +// +// assertEquals("is", tokenizer.peekNextToken().token); +// +// assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that")); + } + + + @Test + public void testComplexTerminator() throws Exception { + Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360' /* thoe unto u */ test") + .addTerminator(" ", Terminator.TerminationStrategy.DROP) + .addTerminator("/*", "*/", PRESERVE); + +// tokenizer.expectAndConsumeNextToken("this"); + +// assertEquals("2015-03-18 09:48:54.360", tokenizer.getNextToken().token); + + System.out.println("1st: " + tokenizer.getNextToken().token); + + System.out.println("2nd: " + tokenizer.getNextToken().token); + + System.out.println("2nd: " + tokenizer.getNextToken().token); + + System.out.println("2nd: " + tokenizer.getNextToken().token); + + System.out.println("2nd: " + tokenizer.getNextToken().token); + + } + } \ No newline at end of file