From 67f7af91a79bc2ff50071389b6333a28755a4bff Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Sat, 1 Aug 2020 12:51:41 +0300 Subject: [PATCH] Better tokenizer usability --- .../commons/string/tokenizer/Tokenizer.java | 41 +++++++++++++++++-- .../string/tokenizer/TokenizerTest.java | 2 +- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index b716989..cc20369 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -11,12 +11,22 @@ import java.util.stream.Stream; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; +import static java.lang.System.out; public class Tokenizer { + /** + * Stack of token indexes. This allows to walk back in history and un-consume the token. + */ private final Stack tokenIndexes = new Stack<>(); + + /** + * Terminators that will be searched for by given tokenizer within given source string. + */ private final List terminators = new ArrayList<>(); - private String source; + + private String source; // string to be tokenized + private int currentIndex = 0; private int cachedTerminatorIndex = -1; @@ -45,6 +55,11 @@ public class Tokenizer { return this; } + public Tokenizer addTerminator(Terminator terminator) { + terminators.add(terminator); + return this; + } + public Tokenizer addTerminator(final String startSequence, final String endSequence, final Terminator.TerminationStrategy terminationStrategy) { terminators.add(new Terminator(startSequence, endSequence, terminationStrategy)); @@ -59,9 +74,7 @@ public class Tokenizer { + "\" but got \"" + match.token + "\" instead."); } - /** - * * @return next @TokenizerMatch or null if end of input is reached. * @throws InvalidSyntaxException */ @@ -154,7 +167,7 @@ public class Tokenizer { return getOrFindTokenTerminator() == null; } - public boolean hasMoreTokens() { + public boolean hasMoreContent() { return currentIndex < source.length(); } @@ -205,6 +218,26 @@ public class Tokenizer { currentIndex = tokenIndexes.pop(); } + /** + * For debugging + */ + public void enlistRemainingTokens(){ + int redTokenCount = 0; + + try { + while (hasMoreContent()) { + out.println(getNextToken().toString()); + redTokenCount++; + } + } catch (InvalidSyntaxException e){ + out.println("There is syntax exception"); + } + + // restore pointer to original location + for (int i = 0; i< redTokenCount; i++ ) unreadToken(); + } + + public void skipUntilDataEnd() { tokenIndexes.push(currentIndex); currentIndex = source.length(); diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java index ae68386..9f35367 100644 --- a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java @@ -41,7 +41,7 @@ public class TokenizerTest { assertTokenEquals("test", null, tokenizer); assertNull(tokenizer.getNextToken()); - assertFalse(tokenizer.hasMoreTokens()); + assertFalse(tokenizer.hasMoreContent()); } private void assertTokenEquals(String token, String reminder, Tokenizer tokenizer) throws InvalidSyntaxException { -- 2.20.1