From: Svjatoslav Agejenko Date: Thu, 12 Oct 2017 09:29:33 +0000 (+0300) Subject: Handle complex content preserving terminators. X-Git-Tag: svjatoslavcommons-1.8~49 X-Git-Url: http://www2.svjatoslav.eu/gitweb/?p=svjatoslav_commons.git;a=commitdiff_plain;h=3bc3db3ceb288b82e48c349bf27dfafda2bcd444 Handle complex content preserving terminators. --- diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java index a298538..c1d1983 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java @@ -45,6 +45,10 @@ public class Terminator { DROP } + public boolean hasEndSequence(){ + return endSequence != null; + } + @Override public String toString() { return "Terminator{" + diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index e92ccd7..939ede9 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -57,45 +57,61 @@ public class Tokenizer { + "\" but got \"" + match.token + "\" instead."); } - public TokenizerMatch getNextToken() { + public TokenizerMatch getNextToken() throws InvalidSyntaxException { tokenIndexes.push(currentIndex); StringBuilder token = new StringBuilder(); while (true){ - if (isTokenTermination()){ - Terminator tokenTerminator = findTokenTerminator(); - - if (tokenTerminator.termination == PRESERVE){ - if (hasAccumulatedToken(token)){ - // already assembled some token - return new TokenizerMatch(token.toString(), "", tokenTerminator); - } else { - currentIndex++; - return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator); - } - } else if (tokenTerminator.termination == DROP){ - if (hasAccumulatedToken(token)){ - currentIndex++; - return new TokenizerMatch(token.toString(), "", tokenTerminator); - } else { - currentIndex++; - } - } - } else { + if (isOngoingToken()) { token.append(source.charAt(currentIndex)); currentIndex++; + continue; + } + + Terminator tokenTerminator = findTokenTerminator(); + + if (tokenTerminator.termination == PRESERVE){ + return buildPreservedToken(token, tokenTerminator); + } else if (tokenTerminator.termination == DROP){ + if (hasAccumulatedToken(token)){ + currentIndex++; + return new TokenizerMatch(token.toString(), "", tokenTerminator); + } else { + currentIndex++; + } } } } + private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException { + if (hasAccumulatedToken(token)) + return new TokenizerMatch(token.toString(), "", terminator); + + if (terminator.hasEndSequence()){ + int endSequenceIndex = source.indexOf(terminator.endSequence, + currentIndex + terminator.startSequence.length()); + + if (endSequenceIndex < 0) + throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found."); + + String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex); + currentIndex = endSequenceIndex + terminator.endSequence.length(); + + return new TokenizerMatch(terminator.startSequence, reminder, terminator); + } else { + currentIndex += terminator.startSequence.length(); + return new TokenizerMatch(terminator.startSequence, "", terminator); + } + } + private boolean hasAccumulatedToken(StringBuilder token) { return token.length() > 0; } - private boolean isTokenTermination() { - return findTokenTerminator() != null; + private boolean isOngoingToken() { + return findTokenTerminator() == null; } public Terminator findTokenTerminator() { @@ -105,7 +121,7 @@ public class Tokenizer { return null; } - public boolean consumeIfNextToken(final String token) { + public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException { if (token.equals(getNextToken().token)) return true; @@ -113,13 +129,13 @@ public class Tokenizer { return false; } - public TokenizerMatch peekNextToken(){ + public TokenizerMatch peekNextToken() throws InvalidSyntaxException { TokenizerMatch result = getNextToken(); unreadToken(); return result; } - public boolean peekIsOneOf(String ... possibilities){ + public boolean peekIsOneOf(String ... possibilities) throws InvalidSyntaxException { String nextToken = peekNextToken().token; return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken)); } diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java index e72b936..ddb2662 100644 --- a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java @@ -2,6 +2,7 @@ package eu.svjatoslav.commons.string.tokenizer; import org.junit.Test; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; import static org.junit.Assert.assertEquals; @@ -20,7 +21,7 @@ public class TokenizerTest { @Test public void you_can_peek() throws Exception { Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test") - .addTerminator(" ", Terminator.TerminationStrategy.DROP) + .addTerminator(" ", DROP) .addTerminator("N'", "'", PRESERVE); tokenizer.expectAndConsumeNextToken("this"); @@ -34,14 +35,15 @@ public class TokenizerTest { @Test public void complexTerminator() throws Exception { - Tokenizer tokenizer = new Tokenizer(" this((\"hello\" /* comment */ (( is a N'2015-03-18 09:48:54.360' test") - .addTerminator(" ", Terminator.TerminationStrategy.DROP) - .addTerminator("(", Terminator.TerminationStrategy.PRESERVE) - .addTerminator("\"", "\"" ,Terminator.TerminationStrategy.PRESERVE) - .addTerminator("/*", "*/" ,Terminator.TerminationStrategy.DROP) + Tokenizer tokenizer = new Tokenizer("/* hello */ /** comment **/ (( is a N'2015-03-18 09:48:54.360' test") + .addTerminator(" ", DROP) + .addTerminator("(", PRESERVE) + .addTerminator("\"", "\"" , PRESERVE) + .addTerminator("/*", "*/" , PRESERVE) ; - System.out.println(tokenizer.getNextToken().token); + TokenizerMatch nextToken = tokenizer.getNextToken(); + System.out.println("T: \"" + nextToken.token + "\", R: \"" + nextToken.reminder + "\""); System.out.println(tokenizer.getNextToken().token); System.out.println(tokenizer.getNextToken().token); System.out.println(tokenizer.getNextToken().token); @@ -65,7 +67,7 @@ public class TokenizerTest { @Test public void testComplexTerminator() throws Exception { Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360' /* thoe unto u */ test") - .addTerminator(" ", Terminator.TerminationStrategy.DROP) + .addTerminator(" ", DROP) .addTerminator("/*", "*/", PRESERVE); // tokenizer.expectAndConsumeNextToken("this");