java syntax parser
[javainspect.git] / src / main / java / eu / svjatoslav / inspector / tokenizer / Tokenizer.java
1 package eu.svjatoslav.inspector.tokenizer;
2
3 import java.util.ArrayList;
4 import java.util.List;
5 import java.util.Stack;
6
7 public class Tokenizer {
8
9         private final List<Terminator> terminators = new ArrayList<Terminator>();
10         private final String source;
11
12         Stack<Integer> tokenIndexes = new Stack<Integer>();
13
14         private int currentIndex = 0;
15
16         public Tokenizer(final String source) {
17                 this.source = source;
18         }
19
20         public void addTerminator(final String terminator, final boolean empty) {
21
22                 terminators.add(new Terminator(terminator, empty));
23         }
24
25         public void expectToken(final String value) throws InvalidSyntaxException {
26                 final TokenizerMatch match = getToken();
27                 if (!value.equals(match.token))
28                         throw new InvalidSyntaxException("Expected \"" + value
29                                         + "\" but got \"" + match.token + "\" instead.");
30         }
31
32         public TokenizerMatch getToken() {
33                 tokenIndexes.push(currentIndex);
34                 final StringBuffer result = new StringBuffer();
35
36                 while (true) {
37                         if (currentIndex >= source.length())
38                                 return null;
39
40                         boolean accumulateCurrentChar = true;
41
42                         findTerminator: for (final Terminator terminator : terminators)
43                                 if (terminatorMatches(terminator))
44                                         // empty space detected
45                                         if (terminator.empty) {
46                                                 currentIndex += terminator.value.length();
47                                                 if (result.length() > 0)
48                                                         return new TokenizerMatch(result.toString(),
49                                                                         terminator);
50                                                 else {
51                                                         accumulateCurrentChar = false;
52                                                         break findTerminator;
53                                                 }
54                                         } else if (result.length() > 0)
55                                                 return new TokenizerMatch(result.toString(), terminator);
56                                         else {
57                                                 currentIndex += terminator.value.length();
58                                                 return new TokenizerMatch(terminator.value, terminator);
59                                         }
60
61                         if (accumulateCurrentChar) {
62                                 result.append(source.charAt(currentIndex));
63                                 currentIndex++;
64                         }
65                 }
66
67         }
68
69         public boolean isNextToken(final String token) {
70                 if (token.equals(getToken().token))
71                         return true;
72
73                 rollbackToken();
74                 return false;
75         }
76
77         public void rollbackToken() {
78                 currentIndex = tokenIndexes.pop();
79         }
80
81         public void skipUtilEnd() {
82                 tokenIndexes.push(currentIndex);
83                 currentIndex = source.length();
84         }
85
86         public boolean terminatorMatches(final Terminator terminator) {
87                 if ((currentIndex + terminator.value.length()) > source.length())
88                         return false;
89
90                 for (int i = 0; i < terminator.value.length(); i++)
91                         if (terminator.value.charAt(i) != source.charAt(i + currentIndex))
92                                 return false;
93
94                 return true;
95         }
96
97 }