Changed license to LGPLv3 or later.
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2014, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15
16 public class Tokenizer {
17
18         private final List<Terminator> terminators = new ArrayList<Terminator>();
19         private final String source;
20
21         Stack<Integer> tokenIndexes = new Stack<Integer>();
22
23         private int currentIndex = 0;
24
25         public Tokenizer(final String source) {
26                 this.source = source;
27         }
28
29         public void addTerminator(final String startSequence,
30                         final boolean ignoreTerminator) {
31                 terminators.add(new Terminator(startSequence, ignoreTerminator));
32         }
33
34         public void addTerminator(final String startSequence,
35                         final String endSequence, final boolean ignoreTerminator) {
36                 terminators.add(new Terminator(startSequence, endSequence,
37                                 ignoreTerminator));
38         }
39
40         public void expectNextToken(final String value)
41                         throws InvalidSyntaxException {
42                 final TokenizerMatch match = getNextToken();
43                 if (!value.equals(match.token))
44                         throw new InvalidSyntaxException("Expected \"" + value
45                                         + "\" but got \"" + match.token + "\" instead.");
46         }
47
48         public TokenizerMatch getNextToken() {
49                 tokenIndexes.push(currentIndex);
50                 final StringBuffer result = new StringBuffer();
51
52                 while (true) {
53                         if (currentIndex >= source.length())
54                                 return null;
55
56                         boolean accumulateCurrentChar = true;
57
58                         findTerminator: for (final Terminator terminator : terminators)
59                                 if (sequenceMatches(terminator.startSequence))
60
61                                         if (terminator.ignoreTerminator) {
62                                                 currentIndex += terminator.startSequence.length();
63
64                                                 if (terminator.endSequence != null)
65                                                         skipUntilSequence(terminator.endSequence);
66
67                                                 if (result.length() > 0)
68                                                         return new TokenizerMatch(result.toString(),
69                                                                         terminator);
70                                                 else {
71                                                         accumulateCurrentChar = false;
72                                                         break findTerminator;
73                                                 }
74                                         } else if (result.length() > 0)
75                                                 return new TokenizerMatch(result.toString(), terminator);
76                                         else {
77                                                 currentIndex += terminator.startSequence.length();
78                                                 return new TokenizerMatch(terminator.startSequence,
79                                                                 terminator);
80                                         }
81
82                         if (accumulateCurrentChar) {
83                                 result.append(source.charAt(currentIndex));
84                                 currentIndex++;
85                         }
86                 }
87
88         }
89
90         public boolean probeNextToken(final String token) {
91                 if (token.equals(getNextToken().token))
92                         return true;
93
94                 unreadToken();
95                 return false;
96         }
97
98         public boolean sequenceMatches(final String sequence) {
99                 if ((currentIndex + sequence.length()) > source.length())
100                         return false;
101
102                 for (int i = 0; i < sequence.length(); i++)
103                         if (sequence.charAt(i) != source.charAt(i + currentIndex))
104                                 return false;
105
106                 return true;
107         }
108
109         public void skipUntilDataEnd() {
110                 tokenIndexes.push(currentIndex);
111                 currentIndex = source.length();
112         }
113
114         public void skipUntilSequence(final String sequence) {
115                 while (currentIndex < source.length()) {
116                         if (sequenceMatches(sequence)) {
117                                 currentIndex += sequence.length();
118                                 return;
119                         }
120
121                         currentIndex++;
122                 }
123         }
124
125         public void unreadToken() {
126                 currentIndex = tokenIndexes.pop();
127         }
128
129 }