import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
+import java.util.stream.Stream;
+
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
public class Tokenizer {
public Tokenizer setSource(String source){
this.source = source;
currentIndex = 0;
+ tokenIndexes.clear();
return this;
}
public Tokenizer addTerminator(final String startSequence,
- final boolean ignoreTerminator) {
- terminators.add(new Terminator(startSequence, ignoreTerminator));
+ final Terminator.TerminationStrategy terminationStrategy) {
+ terminators.add(new Terminator(startSequence, terminationStrategy));
return this;
}
public Tokenizer addTerminator(final String startSequence,
- final String endSequence, final boolean ignoreTerminator) {
- terminators.add(new Terminator(startSequence, endSequence,
- ignoreTerminator));
+ final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
+ terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
return this;
}
- public void expectNextToken(final String value)
+ public void expectAndConsumeNextToken(final String value)
throws InvalidSyntaxException {
final TokenizerMatch match = getNextToken();
if (!value.equals(match.token))
public TokenizerMatch getNextToken() {
tokenIndexes.push(currentIndex);
- final StringBuilder result = new StringBuilder();
-
- while (true) {
- if (currentIndex >= source.length())
- return null;
-
- boolean accumulateCurrentChar = true;
-
- for (final Terminator terminator : terminators)
- if (sequenceMatches(terminator.startSequence))
-
- if (terminator.ignoreTerminator) {
- currentIndex += terminator.startSequence.length();
-
- if (terminator.endSequence != null)
- skipUntilSequence(terminator.endSequence);
-
- if (result.length() > 0)
- return new TokenizerMatch(result.toString(),
- terminator);
- else {
- accumulateCurrentChar = false;
- break;
- }
- } else if (result.length() > 0)
- return new TokenizerMatch(result.toString(), terminator);
- else {
- currentIndex += terminator.startSequence.length();
- return new TokenizerMatch(terminator.startSequence,
- terminator);
- }
- if (accumulateCurrentChar) {
- result.append(source.charAt(currentIndex));
+ StringBuilder token = new StringBuilder();
+
+ while (true){
+ if (isTokenTermination()){
+ Terminator tokenTerminator = findTokenTerminator();
+
+ if (tokenTerminator.termination == PRESERVE){
+ if (hasAccumulatedToken(token)){
+ // already assembled some token
+ return new TokenizerMatch(token.toString(), "", tokenTerminator);
+ } else {
+ currentIndex++;
+ return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
+ }
+ } else if (tokenTerminator.termination == DROP){
+ if (hasAccumulatedToken(token)){
+ currentIndex++;
+ return new TokenizerMatch(token.toString(), "", tokenTerminator);
+ } else {
+ currentIndex++;
+ }
+ }
+ } else {
+ token.append(source.charAt(currentIndex));
currentIndex++;
}
}
}
- public boolean isNextToken(final String token) {
+ private boolean hasAccumulatedToken(StringBuilder token) {
+ return token.length() > 0;
+ }
+
+ private boolean isTokenTermination() {
+ return findTokenTerminator() != null;
+ }
+
+ public Terminator findTokenTerminator() {
+ for (Terminator terminator : terminators)
+ if (terminator.matches(source, currentIndex))
+ return terminator;
+ return null;
+ }
+
+ public boolean consumeIfNextToken(final String token) {
if (token.equals(getNextToken().token))
return true;
return false;
}
+ public TokenizerMatch peekNextToken(){
+ TokenizerMatch result = getNextToken();
+ unreadToken();
+ return result;
+ }
+
+ public boolean peekIsOneOf(String ... possibilities){
+ String nextToken = peekNextToken().token;
+ return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
+ }
+
+ public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
+ if (peekIsOneOf(possibilities))
+ throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
+ }
+
+
public boolean sequenceMatches(final String sequence) {
if ((currentIndex + sequence.length()) > source.length())
return false;