import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
+import java.util.stream.Stream;
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
public class Tokenizer {
- final Stack<Integer> tokenIndexes = new Stack<>();
+ private final Stack<Integer> tokenIndexes = new Stack<>();
private final List<Terminator> terminators = new ArrayList<>();
private String source;
private int currentIndex = 0;
+ private int cachedTerminatorIndex = -1;
+ private Terminator cachedTerminator;
+
public Tokenizer(final String source) {
this.source = source;
}
- public Tokenizer(){}
+ public Tokenizer() {
+ }
- public Tokenizer setSource(String source){
+ public Tokenizer setSource(String source) {
this.source = source;
currentIndex = 0;
+ tokenIndexes.clear();
+
+ cachedTerminatorIndex = -1;
+ cachedTerminator = null;
return this;
}
return this;
}
- public void expectNextToken(final String value)
+ public void expectAndConsumeNextToken(final String value)
throws InvalidSyntaxException {
final TokenizerMatch match = getNextToken();
if (!value.equals(match.token))
+ "\" but got \"" + match.token + "\" instead.");
}
- public TokenizerMatch getNextToken() {
+
+ public TokenizerMatch getNextToken() throws InvalidSyntaxException {
tokenIndexes.push(currentIndex);
- final StringBuilder result = new StringBuilder();
+
+ StringBuilder tokenAccumulator = new StringBuilder();
while (true) {
- if (currentIndex >= source.length())
- return null;
-
- boolean accumulateCurrentChar = true;
-
- for (final Terminator terminator : terminators)
- if (sequenceMatches(terminator.startSequence))
-
- if (terminator.termination == DROP) {
- currentIndex += terminator.startSequence.length();
-
- if (terminator.endSequence != null)
- skipUntilSequence(terminator.endSequence);
-
- if (result.length() > 0)
- return new TokenizerMatch(result.toString(),
- terminator);
- else {
- accumulateCurrentChar = false;
- break;
- }
- } else if (result.length() > 0)
- return new TokenizerMatch(result.toString(), terminator);
- else {
- currentIndex += terminator.startSequence.length();
- return new TokenizerMatch(terminator.startSequence,
- terminator);
- }
-
- if (accumulateCurrentChar) {
- result.append(source.charAt(currentIndex));
+
+ if (currentIndex >= source.length()) { // reached end of input
+ if (hasAccumulatedToken(tokenAccumulator))
+ return new TokenizerMatch(tokenAccumulator.toString(), null, null);
+ else
+ return null;
+ }
+
+ if (isOngoingToken()) {
+ tokenAccumulator.append(source.charAt(currentIndex));
currentIndex++;
+ continue;
+ }
+
+ Terminator terminator = getOrFindTokenTerminator();
+
+ if (terminator.termination == PRESERVE)
+ return buildPreservedToken(tokenAccumulator, terminator);
+ else if (terminator.termination == DROP) {
+ skipUntilTerminatorEnd(terminator);
+
+ if (hasAccumulatedToken(tokenAccumulator))
+ return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
}
}
}
- public boolean consumeIfNextToken(final String token) {
- if (token.equals(getNextToken().token))
- return true;
+ private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
+ if (terminator.hasEndSequence())
+ currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
+ else
+ currentIndex += terminator.startSequence.length();
+ }
- unreadToken();
- return false;
+ private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+ if (hasAccumulatedToken(token))
+ return new TokenizerMatch(token.toString(), null, terminator);
+
+ if (terminator.hasEndSequence())
+ return buildComplexPreservedToken(terminator);
+ else
+ return buildSimplePreservedToken(terminator);
}
- public TokenizerMatch peekNextToken(){
- TokenizerMatch result = getNextToken();
- unreadToken();
- return result;
+ private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+ currentIndex += terminator.startSequence.length();
+ return new TokenizerMatch(terminator.startSequence, null, terminator);
+ }
+
+ private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+ int endSequenceIndex = getEndSequenceIndex(terminator);
+ String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+ currentIndex = endSequenceIndex + terminator.endSequence.length();
+
+ return new TokenizerMatch(terminator.startSequence, reminder, terminator);
}
- public boolean peekIsOneOf(String ... possibilities){
- TokenizerMatch nextToken = peekNextToken();
+ private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
+ int endSequenceIndex = source.indexOf(terminator.endSequence,
+ currentIndex + terminator.startSequence.length());
- for (String possibility : possibilities)
- if (possibility.equals(nextToken))
- return true;
+ if (endSequenceIndex < 0)
+ throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
- return false;
+ return endSequenceIndex;
}
- public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
- TokenizerMatch nextToken = peekNextToken();
+ private boolean hasAccumulatedToken(StringBuilder token) {
+ return token.length() > 0;
+ }
- for (String possibility : possibilities)
- if (possibility.equals(nextToken))
- throw new InvalidSyntaxException("Not expected \"" + nextToken + "\" here.");
+ private boolean isOngoingToken() {
+ return getOrFindTokenTerminator() == null;
}
+ public boolean hasMoreTokens() {
+ return currentIndex < source.length();
+ }
- public boolean sequenceMatches(final String sequence) {
- if ((currentIndex + sequence.length()) > source.length())
- return false;
+ /**
+ * Attempts to cache terminator search result.
+ */
+ public Terminator getOrFindTokenTerminator() {
+ if (currentIndex == cachedTerminatorIndex)
+ return cachedTerminator;
- for (int i = 0; i < sequence.length(); i++)
- if (sequence.charAt(i) != source.charAt(i + currentIndex))
- return false;
+ cachedTerminatorIndex = currentIndex;
+ cachedTerminator = findTokenTerminator();
+ return cachedTerminator;
+ }
- return true;
+ private Terminator findTokenTerminator() {
+ for (Terminator terminator : terminators)
+ if (terminator.matches(source, currentIndex))
+ return terminator;
+ return null;
}
- public void skipUntilDataEnd() {
- tokenIndexes.push(currentIndex);
- currentIndex = source.length();
+ public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
+ if (token.equals(getNextToken().token))
+ return true;
+
+ unreadToken();
+ return false;
}
- public void skipUntilSequence(final String sequence) {
- while (currentIndex < source.length()) {
- if (sequenceMatches(sequence)) {
- currentIndex += sequence.length();
- return;
- }
+ public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
+ TokenizerMatch result = getNextToken();
+ unreadToken();
+ return result;
+ }
- currentIndex++;
- }
+ public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
+ String nextToken = peekNextToken().token;
+ return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
+ }
+
+ public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
+ if (peekIsOneOf(possibilities))
+ throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
}
public void unreadToken() {
currentIndex = tokenIndexes.pop();
}
+ public void skipUntilDataEnd() {
+ tokenIndexes.push(currentIndex);
+ currentIndex = source.length();
+ }
+
}