X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTokenizer.java;h=cc2036949557778171a48380e1b991226518bb73;hb=67f7af91a79bc2ff50071389b6333a28755a4bff;hp=140773218e9fdb448c091b54ab8fd562662f8bdf;hpb=b8bd1e820265fc15c39c1ee8c06289ea8b8e2c1c;p=svjatoslav_commons.git diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index 1407732..cc20369 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -1,12 +1,7 @@ /* - * Svjatoslav Commons - shared library of common functionality. - * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 3 of the GNU Lesser General Public License - * or later as published by the Free Software Foundation. + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. */ - package eu.svjatoslav.commons.string.tokenizer; import java.util.ArrayList; @@ -16,12 +11,22 @@ import java.util.stream.Stream; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; +import static java.lang.System.out; public class Tokenizer { + /** + * Stack of token indexes. This allows to walk back in history and un-consume the token. + */ private final Stack tokenIndexes = new Stack<>(); + + /** + * Terminators that will be searched for by given tokenizer within given source string. + */ private final List terminators = new ArrayList<>(); - private String source; + + private String source; // string to be tokenized + private int currentIndex = 0; private int cachedTerminatorIndex = -1; @@ -50,6 +55,11 @@ public class Tokenizer { return this; } + public Tokenizer addTerminator(Terminator terminator) { + terminators.add(terminator); + return this; + } + public Tokenizer addTerminator(final String startSequence, final String endSequence, final Terminator.TerminationStrategy terminationStrategy) { terminators.add(new Terminator(startSequence, endSequence, terminationStrategy)); @@ -64,7 +74,10 @@ public class Tokenizer { + "\" but got \"" + match.token + "\" instead."); } - + /** + * @return next @TokenizerMatch or null if end of input is reached. + * @throws InvalidSyntaxException + */ public TokenizerMatch getNextToken() throws InvalidSyntaxException { tokenIndexes.push(currentIndex); @@ -106,22 +119,26 @@ public class Tokenizer { currentIndex += terminator.startSequence.length(); } - private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException { + /** + * @throws InvalidSyntaxException if end sequence is not found as is expected by given token. + */ + private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) + throws InvalidSyntaxException { if (hasAccumulatedToken(token)) return new TokenizerMatch(token.toString(), null, terminator); if (terminator.hasEndSequence()) - return buildComplexPreservedToken(terminator); + return buildTokenWithExpectedENdSequence(terminator); else - return buildSimplePreservedToken(terminator); + return buildTokenWithoutEndSequence(terminator); } - private TokenizerMatch buildSimplePreservedToken(Terminator terminator) { + private TokenizerMatch buildTokenWithoutEndSequence(Terminator terminator) { currentIndex += terminator.startSequence.length(); return new TokenizerMatch(terminator.startSequence, null, terminator); } - private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException { + private TokenizerMatch buildTokenWithExpectedENdSequence(Terminator terminator) throws InvalidSyntaxException { int endSequenceIndex = getEndSequenceIndex(terminator); String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex); currentIndex = endSequenceIndex + terminator.endSequence.length(); @@ -129,6 +146,9 @@ public class Tokenizer { return new TokenizerMatch(terminator.startSequence, reminder, terminator); } + /** + * @throws InvalidSyntaxException if end of input is reached without finding expected end sequence. + */ private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException { int endSequenceIndex = source.indexOf(terminator.endSequence, currentIndex + terminator.startSequence.length()); @@ -147,7 +167,7 @@ public class Tokenizer { return getOrFindTokenTerminator() == null; } - public boolean hasMoreTokens() { + public boolean hasMoreContent() { return currentIndex < source.length(); } @@ -198,6 +218,26 @@ public class Tokenizer { currentIndex = tokenIndexes.pop(); } + /** + * For debugging + */ + public void enlistRemainingTokens(){ + int redTokenCount = 0; + + try { + while (hasMoreContent()) { + out.println(getNextToken().toString()); + redTokenCount++; + } + } catch (InvalidSyntaxException e){ + out.println("There is syntax exception"); + } + + // restore pointer to original location + for (int i = 0; i< redTokenCount; i++ ) unreadToken(); + } + + public void skipUntilDataEnd() { tokenIndexes.push(currentIndex); currentIndex = source.length();