ec13b1a048160503c5f550264eab58b89daa6ab4
[javainspect.git] / src / main / java / eu / svjatoslav / inspector / tokenizer / Tokenizer.java
1 package eu.svjatoslav.inspector.tokenizer;
2
3 import java.util.ArrayList;
4 import java.util.List;
5 import java.util.Stack;
6
7 public class Tokenizer {
8
9         private final List<Terminator> terminators = new ArrayList<Terminator>();
10         private final String source;
11
12         Stack<Integer> tokenIndexes = new Stack<Integer>();
13
14         private int currentIndex = 0;
15
16         public Tokenizer(final String source) {
17                 this.source = source;
18         }
19
20         public void addTerminator(final String startSequence,
21                         final boolean ignoreTerminator) {
22                 terminators.add(new Terminator(startSequence, ignoreTerminator));
23         }
24
25         public void addTerminator(final String startSequence,
26                         final String endSequence, final boolean ignoreTerminator) {
27                 terminators.add(new Terminator(startSequence, endSequence,
28                                 ignoreTerminator));
29         }
30
31         public void expectNextToken(final String value)
32                         throws InvalidSyntaxException {
33                 final TokenizerMatch match = getNextToken();
34                 if (!value.equals(match.token))
35                         throw new InvalidSyntaxException("Expected \"" + value
36                                         + "\" but got \"" + match.token + "\" instead.");
37         }
38
39         public TokenizerMatch getNextToken() {
40                 tokenIndexes.push(currentIndex);
41                 final StringBuffer result = new StringBuffer();
42
43                 while (true) {
44                         if (currentIndex >= source.length())
45                                 return null;
46
47                         boolean accumulateCurrentChar = true;
48
49                         findTerminator: for (final Terminator terminator : terminators)
50                                 if (sequenceMatches(terminator.startSequence))
51
52                                         if (terminator.ignoreTerminator) {
53                                                 currentIndex += terminator.startSequence.length();
54
55                                                 if (terminator.endSequence != null)
56                                                         skipUntilSequence(terminator.endSequence);
57
58                                                 if (result.length() > 0)
59                                                         return new TokenizerMatch(result.toString(),
60                                                                         terminator);
61                                                 else {
62                                                         accumulateCurrentChar = false;
63                                                         break findTerminator;
64                                                 }
65                                         } else if (result.length() > 0)
66                                                 return new TokenizerMatch(result.toString(), terminator);
67                                         else {
68                                                 currentIndex += terminator.startSequence.length();
69                                                 return new TokenizerMatch(terminator.startSequence,
70                                                                 terminator);
71                                         }
72
73                         if (accumulateCurrentChar) {
74                                 result.append(source.charAt(currentIndex));
75                                 currentIndex++;
76                         }
77                 }
78
79         }
80
81         public boolean probeNextToken(final String token) {
82                 if (token.equals(getNextToken().token))
83                         return true;
84
85                 unreadToken();
86                 return false;
87         }
88
89         public boolean sequenceMatches(final String sequence) {
90                 if ((currentIndex + sequence.length()) > source.length())
91                         return false;
92
93                 for (int i = 0; i < sequence.length(); i++)
94                         if (sequence.charAt(i) != source.charAt(i + currentIndex))
95                                 return false;
96
97                 return true;
98         }
99
100         public void skipUntilDataEnd() {
101                 tokenIndexes.push(currentIndex);
102                 currentIndex = source.length();
103         }
104
105         public void skipUntilSequence(final String sequence) {
106                 while (currentIndex < source.length()) {
107                         if (sequenceMatches(sequence)) {
108                                 currentIndex += sequence.length();
109                                 return;
110                         }
111
112                         currentIndex++;
113                 }
114         }
115
116         public void unreadToken() {
117                 currentIndex = tokenIndexes.pop();
118         }
119
120 }