X-Git-Url: http://www2.svjatoslav.eu/gitweb/?p=svjatoslav_commons.git;a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTokenizer.java;h=c80aeb1c533f66e47ca1047b95aabd0cc8d2a642;hp=cd5e5c028d77128b2439efca3478795c01d07ed5;hb=7139ae895dd50d9dd1cbd2b3110c93f546cef798;hpb=7c46b3418fe1061183aa204a17e38227762c6979 diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index cd5e5c0..c80aeb1 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -12,25 +12,35 @@ package eu.svjatoslav.commons.string.tokenizer; import java.util.ArrayList; import java.util.List; import java.util.Stack; +import java.util.stream.Stream; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; public class Tokenizer { - final Stack tokenIndexes = new Stack<>(); + private final Stack tokenIndexes = new Stack<>(); private final List terminators = new ArrayList<>(); private String source; private int currentIndex = 0; + private int cachedTerminatorIndex = -1; + private Terminator cachedTerminator; + public Tokenizer(final String source) { this.source = source; } - public Tokenizer(){} + public Tokenizer() { + } - public Tokenizer setSource(String source){ + public Tokenizer setSource(String source) { this.source = source; currentIndex = 0; + tokenIndexes.clear(); + + cachedTerminatorIndex = -1; + cachedTerminator = null; return this; } @@ -46,7 +56,7 @@ public class Tokenizer { return this; } - public void expectNextToken(final String value) + public void expectAndConsumeNextToken(final String value) throws InvalidSyntaxException { final TokenizerMatch match = getNextToken(); if (!value.equals(match.token)) @@ -54,110 +64,143 @@ public class Tokenizer { + "\" but got \"" + match.token + "\" instead."); } - public TokenizerMatch getNextToken() { + + public TokenizerMatch getNextToken() throws InvalidSyntaxException { tokenIndexes.push(currentIndex); - final StringBuilder result = new StringBuilder(); + + StringBuilder tokenAccumulator = new StringBuilder(); while (true) { - if (currentIndex >= source.length()) - return null; - - boolean accumulateCurrentChar = true; - - for (final Terminator terminator : terminators) - if (sequenceMatches(terminator.startSequence)) - - if (terminator.termination == DROP) { - currentIndex += terminator.startSequence.length(); - - if (terminator.endSequence != null) - skipUntilSequence(terminator.endSequence); - - if (result.length() > 0) - return new TokenizerMatch(result.toString(), - terminator); - else { - accumulateCurrentChar = false; - break; - } - } else if (result.length() > 0) - return new TokenizerMatch(result.toString(), terminator); - else { - currentIndex += terminator.startSequence.length(); - return new TokenizerMatch(terminator.startSequence, - terminator); - } - - if (accumulateCurrentChar) { - result.append(source.charAt(currentIndex)); + + if (currentIndex >= source.length()) { // reached end of input + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, null); + else + return null; + } + + if (isOngoingToken()) { + tokenAccumulator.append(source.charAt(currentIndex)); currentIndex++; + continue; + } + + Terminator terminator = getOrFindTokenTerminator(); + + if (terminator.termination == PRESERVE) + return buildPreservedToken(tokenAccumulator, terminator); + else if (terminator.termination == DROP) { + skipUntilTerminatorEnd(terminator); + + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, terminator); } } } - public boolean consumeIfNextToken(final String token) { - if (token.equals(getNextToken().token)) - return true; + private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException { + if (terminator.hasEndSequence()) + currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length(); + else + currentIndex += terminator.startSequence.length(); + } - unreadToken(); - return false; + private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException { + if (hasAccumulatedToken(token)) + return new TokenizerMatch(token.toString(), null, terminator); + + if (terminator.hasEndSequence()) + return buildComplexPreservedToken(terminator); + else + return buildSimplePreservedToken(terminator); } - public TokenizerMatch peekNextToken(){ - TokenizerMatch result = getNextToken(); - unreadToken(); - return result; + private TokenizerMatch buildSimplePreservedToken(Terminator terminator) { + currentIndex += terminator.startSequence.length(); + return new TokenizerMatch(terminator.startSequence, null, terminator); + } + + private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException { + int endSequenceIndex = getEndSequenceIndex(terminator); + String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex); + currentIndex = endSequenceIndex + terminator.endSequence.length(); + + return new TokenizerMatch(terminator.startSequence, reminder, terminator); } - public boolean peekIsOneOf(String [] ... possibilities){ - TokenizerMatch nextToken = peekNextToken(); + private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException { + int endSequenceIndex = source.indexOf(terminator.endSequence, + currentIndex + terminator.startSequence.length()); - for (String[] possibility : possibilities) - if (possibility.equals(nextToken)) - return true; + if (endSequenceIndex < 0) + throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found."); - return false; + return endSequenceIndex; } - public void peekExpectNoneOf(String [] ... possibilities) throws InvalidSyntaxException { - TokenizerMatch nextToken = peekNextToken(); + private boolean hasAccumulatedToken(StringBuilder token) { + return token.length() > 0; + } - for (String[] possibility : possibilities) - if (possibility.equals(nextToken)) - throw new InvalidSyntaxException("Not expected \"" + nextToken + "\" here."); + private boolean isOngoingToken() { + return getOrFindTokenTerminator() == null; } + public boolean hasMoreTokens() { + return currentIndex < source.length(); + } - public boolean sequenceMatches(final String sequence) { - if ((currentIndex + sequence.length()) > source.length()) - return false; + /** + * Attempts to cache terminator search result. + */ + public Terminator getOrFindTokenTerminator() { + if (currentIndex == cachedTerminatorIndex) + return cachedTerminator; - for (int i = 0; i < sequence.length(); i++) - if (sequence.charAt(i) != source.charAt(i + currentIndex)) - return false; + cachedTerminatorIndex = currentIndex; + cachedTerminator = findTokenTerminator(); + return cachedTerminator; + } - return true; + private Terminator findTokenTerminator() { + for (Terminator terminator : terminators) + if (terminator.matches(source, currentIndex)) + return terminator; + return null; } - public void skipUntilDataEnd() { - tokenIndexes.push(currentIndex); - currentIndex = source.length(); + public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException { + if (token.equals(getNextToken().token)) + return true; + + unreadToken(); + return false; } - public void skipUntilSequence(final String sequence) { - while (currentIndex < source.length()) { - if (sequenceMatches(sequence)) { - currentIndex += sequence.length(); - return; - } + public TokenizerMatch peekNextToken() throws InvalidSyntaxException { + TokenizerMatch result = getNextToken(); + unreadToken(); + return result; + } - currentIndex++; - } + public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException { + String nextToken = peekNextToken().token; + return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken)); + } + + public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException { + if (peekIsOneOf(possibilities)) + throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here."); } public void unreadToken() { currentIndex = tokenIndexes.pop(); } + public void skipUntilDataEnd() { + tokenIndexes.push(currentIndex); + currentIndex = source.length(); + } + }