X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTokenizer.java;h=722e17a0a723fb2ed1de79626298600bb83f39bb;hb=ba10494483d7eaf1e9d58ddb1402806c8fc58178;hp=e92ccd791ce88893ea49610dd4e21391731c8e71;hpb=798a165906826270924f0d5d58cfa04cf6bdc113;p=svjatoslav_commons.git diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index e92ccd7..722e17a 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -24,6 +24,9 @@ public class Tokenizer { private String source; private int currentIndex = 0; + int cachedTerminatorIndex = -1; + Terminator cachedTerminator; + public Tokenizer(final String source) { this.source = source; } @@ -34,6 +37,9 @@ public class Tokenizer { this.source = source; currentIndex = 0; tokenIndexes.clear(); + + cachedTerminatorIndex = -1; + cachedTerminator = null; return this; } @@ -57,55 +63,114 @@ public class Tokenizer { + "\" but got \"" + match.token + "\" instead."); } - public TokenizerMatch getNextToken() { + + + public TokenizerMatch getNextToken() throws InvalidSyntaxException { tokenIndexes.push(currentIndex); - StringBuilder token = new StringBuilder(); + StringBuilder tokenAccumulator = new StringBuilder(); while (true){ - if (isTokenTermination()){ - Terminator tokenTerminator = findTokenTerminator(); - - if (tokenTerminator.termination == PRESERVE){ - if (hasAccumulatedToken(token)){ - // already assembled some token - return new TokenizerMatch(token.toString(), "", tokenTerminator); - } else { - currentIndex++; - return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator); - } - } else if (tokenTerminator.termination == DROP){ - if (hasAccumulatedToken(token)){ - currentIndex++; - return new TokenizerMatch(token.toString(), "", tokenTerminator); - } else { - currentIndex++; - } - } - } else { - token.append(source.charAt(currentIndex)); + + if (currentIndex >= source.length()){ // reached end of input + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, null); + else + return null; + } + + if (isOngoingToken()) { + tokenAccumulator.append(source.charAt(currentIndex)); currentIndex++; + continue; + } + + Terminator terminator = getOrFindTokenTerminator(); + + if (terminator.termination == PRESERVE) + return buildPreservedToken(tokenAccumulator, terminator); + else if (terminator.termination == DROP){ + skipUntilTerminatorEnd(terminator); + + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, terminator); } } } + private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException { + if (terminator.hasEndSequence()) + currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length(); + else + currentIndex += terminator.startSequence.length(); + } + + private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException { + if (hasAccumulatedToken(token)) + return new TokenizerMatch(token.toString(), null, terminator); + + if (terminator.hasEndSequence()) + return buildComplexPreservedToken(terminator); + else + return buildSimplePreservedToken(terminator); + } + + private TokenizerMatch buildSimplePreservedToken(Terminator terminator) { + currentIndex += terminator.startSequence.length(); + return new TokenizerMatch(terminator.startSequence, null, terminator); + } + + private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException { + int endSequenceIndex = getEndSequenceIndex(terminator); + String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex); + currentIndex = endSequenceIndex + terminator.endSequence.length(); + + return new TokenizerMatch(terminator.startSequence, reminder, terminator); + } + + private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException { + int endSequenceIndex = source.indexOf(terminator.endSequence, + currentIndex + terminator.startSequence.length()); + + if (endSequenceIndex < 0) + throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found."); + + return endSequenceIndex; + } + private boolean hasAccumulatedToken(StringBuilder token) { return token.length() > 0; } - private boolean isTokenTermination() { - return findTokenTerminator() != null; + private boolean isOngoingToken() { + return getOrFindTokenTerminator() == null; } - public Terminator findTokenTerminator() { + public boolean hasMoreTokens(){ + return currentIndex < source.length(); + } + + /** + * Attempts to cache terminator search result. + */ + public Terminator getOrFindTokenTerminator() { + if (currentIndex == cachedTerminatorIndex) + return cachedTerminator; + + cachedTerminatorIndex = currentIndex; + cachedTerminator = findTokenTerminator(); + return cachedTerminator; + } + + private Terminator findTokenTerminator() { for (Terminator terminator : terminators) if (terminator.matches(source, currentIndex)) return terminator; return null; } - public boolean consumeIfNextToken(final String token) { + public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException { if (token.equals(getNextToken().token)) return true; @@ -113,13 +178,13 @@ public class Tokenizer { return false; } - public TokenizerMatch peekNextToken(){ + public TokenizerMatch peekNextToken() throws InvalidSyntaxException { TokenizerMatch result = getNextToken(); unreadToken(); return result; } - public boolean peekIsOneOf(String ... possibilities){ + public boolean peekIsOneOf(String ... possibilities) throws InvalidSyntaxException { String nextToken = peekNextToken().token; return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken)); } @@ -128,35 +193,7 @@ public class Tokenizer { if (peekIsOneOf(possibilities)) throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here."); } - - - public boolean sequenceMatches(final String sequence) { - if ((currentIndex + sequence.length()) > source.length()) - return false; - - for (int i = 0; i < sequence.length(); i++) - if (sequence.charAt(i) != source.charAt(i + currentIndex)) - return false; - - return true; - } - - public void skipUntilDataEnd() { - tokenIndexes.push(currentIndex); - currentIndex = source.length(); - } - - public void skipUntilSequence(final String sequence) { - while (currentIndex < source.length()) { - if (sequenceMatches(sequence)) { - currentIndex += sequence.length(); - return; - } - - currentIndex++; - } - } - + public void unreadToken() { currentIndex = tokenIndexes.pop(); }