1 package eu.svjatoslav.inspector.tokenizer;
3 import java.util.ArrayList;
5 import java.util.Stack;
7 public class Tokenizer {
9 private final List<Terminator> terminators = new ArrayList<Terminator>();
10 private final String source;
12 Stack<Integer> tokenIndexes = new Stack<Integer>();
14 private int currentIndex = 0;
16 public Tokenizer(final String source) {
20 public void addTerminator(final String terminator, final boolean empty) {
22 terminators.add(new Terminator(terminator, empty));
25 public void expectToken(final String value) throws InvalidSyntaxException {
26 final TokenizerMatch match = getToken();
27 if (!value.equals(match.token))
28 throw new InvalidSyntaxException("Expected \"" + value
29 + "\" but got \"" + match.token + "\" instead.");
32 public TokenizerMatch getToken() {
33 tokenIndexes.push(currentIndex);
34 final StringBuffer result = new StringBuffer();
37 if (currentIndex >= source.length())
40 boolean accumulateCurrentChar = true;
42 findTerminator: for (final Terminator terminator : terminators)
43 if (terminatorMatches(terminator))
44 // empty space detected
45 if (terminator.empty) {
46 currentIndex += terminator.value.length();
47 if (result.length() > 0)
48 return new TokenizerMatch(result.toString(),
51 accumulateCurrentChar = false;
54 } else if (result.length() > 0)
55 return new TokenizerMatch(result.toString(), terminator);
57 currentIndex += terminator.value.length();
58 return new TokenizerMatch(terminator.value, terminator);
61 if (accumulateCurrentChar) {
62 result.append(source.charAt(currentIndex));
69 public boolean isNextToken(final String token) {
70 if (token.equals(getToken().token))
77 public void rollbackToken() {
78 currentIndex = tokenIndexes.pop();
81 public void skipUtilEnd() {
82 tokenIndexes.push(currentIndex);
83 currentIndex = source.length();
86 public boolean terminatorMatches(final Terminator terminator) {
87 if ((currentIndex + terminator.value.length()) > source.length())
90 for (int i = 0; i < terminator.value.length(); i++)
91 if (terminator.value.charAt(i) != source.charAt(i + currentIndex))