import java.util.ArrayList;
import java.util.List;
+import java.util.Stack;
public class Tokenizer {
private final List<Terminator> terminators = new ArrayList<Terminator>();
private final String source;
+ Stack<Integer> tokenIndexes = new Stack<Integer>();
+
private int currentIndex = 0;
public Tokenizer(final String source) {
this.source = source;
}
- public void addTerminator(final String terminator, final boolean empty) {
+ public void addTerminator(final String startSequence,
+ final boolean ignoreTerminator) {
+ terminators.add(new Terminator(startSequence, ignoreTerminator));
+ }
- terminators.add(new Terminator(terminator, empty));
+ public void addTerminator(final String startSequence,
+ final String endSequence, final boolean ignoreTerminator) {
+ terminators.add(new Terminator(startSequence, endSequence,
+ ignoreTerminator));
}
- public TokenizerMatch getToken() {
+ public void expectNextToken(final String value)
+ throws InvalidSyntaxException {
+ final TokenizerMatch match = getNextToken();
+ if (!value.equals(match.token))
+ throw new InvalidSyntaxException("Expected \"" + value
+ + "\" but got \"" + match.token + "\" instead.");
+ }
+
+ public TokenizerMatch getNextToken() {
+ tokenIndexes.push(currentIndex);
final StringBuffer result = new StringBuffer();
while (true) {
boolean accumulateCurrentChar = true;
findTerminator: for (final Terminator terminator : terminators)
- if (terminatorMatches(terminator))
- // empty space detected
- if (terminator.empty) {
- currentIndex += terminator.value.length();
+ if (sequenceMatches(terminator.startSequence))
+
+ if (terminator.ignoreTerminator) {
+ currentIndex += terminator.startSequence.length();
+
+ if (terminator.endSequence != null)
+ skipUntilSequence(terminator.endSequence);
+
if (result.length() > 0)
return new TokenizerMatch(result.toString(),
terminator);
} else if (result.length() > 0)
return new TokenizerMatch(result.toString(), terminator);
else {
- currentIndex += terminator.value.length();
- return new TokenizerMatch(terminator.value, terminator);
+ currentIndex += terminator.startSequence.length();
+ return new TokenizerMatch(terminator.startSequence,
+ terminator);
}
if (accumulateCurrentChar) {
}
- public boolean terminatorMatches(final Terminator terminator) {
- if ((currentIndex + terminator.value.length()) > source.length())
+ public boolean probeNextToken(final String token) {
+ if (token.equals(getNextToken().token))
+ return true;
+
+ unreadToken();
+ return false;
+ }
+
+ public boolean sequenceMatches(final String sequence) {
+ if ((currentIndex + sequence.length()) > source.length())
return false;
- for (int i = 0; i < terminator.value.length(); i++)
- if (terminator.value.charAt(i) != source.charAt(i + currentIndex))
+ for (int i = 0; i < sequence.length(); i++)
+ if (sequence.charAt(i) != source.charAt(i + currentIndex))
return false;
return true;
}
+ public void skipUntilDataEnd() {
+ tokenIndexes.push(currentIndex);
+ currentIndex = source.length();
+ }
+
+ public void skipUntilSequence(final String sequence) {
+ while (currentIndex < source.length()) {
+ if (sequenceMatches(sequence)) {
+ currentIndex += sequence.length();
+ return;
+ }
+
+ currentIndex++;
+ }
+ }
+
+ public void unreadToken() {
+ currentIndex = tokenIndexes.pop();
+ }
+
}