Changed license to CC0
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
3  * This project is released under Creative Commons Zero (CC0) license.
4  */
5 package eu.svjatoslav.commons.string.tokenizer;
6
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.Stack;
10 import java.util.stream.Stream;
11
12 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
13 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
14
15 public class Tokenizer {
16
17     private final Stack<Integer> tokenIndexes = new Stack<>();
18     private final List<Terminator> terminators = new ArrayList<>();
19     private String source;
20     private int currentIndex = 0;
21
22     private int cachedTerminatorIndex = -1;
23     private Terminator cachedTerminator;
24
25     public Tokenizer(final String source) {
26         this.source = source;
27     }
28
29     public Tokenizer() {
30     }
31
32     public Tokenizer setSource(String source) {
33         this.source = source;
34         currentIndex = 0;
35         tokenIndexes.clear();
36
37         cachedTerminatorIndex = -1;
38         cachedTerminator = null;
39         return this;
40     }
41
42     public Tokenizer addTerminator(final String startSequence,
43                                    final Terminator.TerminationStrategy terminationStrategy) {
44         terminators.add(new Terminator(startSequence, terminationStrategy));
45         return this;
46     }
47
48     public Tokenizer addTerminator(final String startSequence,
49                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
50         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
51         return this;
52     }
53
54     public void expectAndConsumeNextToken(final String value)
55             throws InvalidSyntaxException {
56         final TokenizerMatch match = getNextToken();
57         if (!value.equals(match.token))
58             throw new InvalidSyntaxException("Expected \"" + value
59                     + "\" but got \"" + match.token + "\" instead.");
60     }
61
62
63     public TokenizerMatch getNextToken() throws InvalidSyntaxException {
64         tokenIndexes.push(currentIndex);
65
66         StringBuilder tokenAccumulator = new StringBuilder();
67
68         while (true) {
69
70             if (currentIndex >= source.length()) { // reached end of input
71                 if (hasAccumulatedToken(tokenAccumulator))
72                     return new TokenizerMatch(tokenAccumulator.toString(), null, null);
73                 else
74                     return null;
75             }
76
77             if (isOngoingToken()) {
78                 tokenAccumulator.append(source.charAt(currentIndex));
79                 currentIndex++;
80                 continue;
81             }
82
83             Terminator terminator = getOrFindTokenTerminator();
84
85             if (terminator.termination == PRESERVE)
86                 return buildPreservedToken(tokenAccumulator, terminator);
87             else if (terminator.termination == DROP) {
88                 skipUntilTerminatorEnd(terminator);
89
90                 if (hasAccumulatedToken(tokenAccumulator))
91                     return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
92             }
93         }
94
95     }
96
97     private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
98         if (terminator.hasEndSequence())
99             currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
100         else
101             currentIndex += terminator.startSequence.length();
102     }
103
104     private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
105         if (hasAccumulatedToken(token))
106             return new TokenizerMatch(token.toString(), null, terminator);
107
108         if (terminator.hasEndSequence())
109             return buildComplexPreservedToken(terminator);
110         else
111             return buildSimplePreservedToken(terminator);
112     }
113
114     private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
115         currentIndex += terminator.startSequence.length();
116         return new TokenizerMatch(terminator.startSequence, null, terminator);
117     }
118
119     private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
120         int endSequenceIndex = getEndSequenceIndex(terminator);
121         String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
122         currentIndex = endSequenceIndex + terminator.endSequence.length();
123
124         return new TokenizerMatch(terminator.startSequence, reminder, terminator);
125     }
126
127     private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
128         int endSequenceIndex = source.indexOf(terminator.endSequence,
129                 currentIndex + terminator.startSequence.length());
130
131         if (endSequenceIndex < 0)
132             throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
133
134         return endSequenceIndex;
135     }
136
137     private boolean hasAccumulatedToken(StringBuilder token) {
138         return token.length() > 0;
139     }
140
141     private boolean isOngoingToken() {
142         return getOrFindTokenTerminator() == null;
143     }
144
145     public boolean hasMoreTokens() {
146         return currentIndex < source.length();
147     }
148
149     /**
150      * Attempts to cache terminator search result.
151      */
152     public Terminator getOrFindTokenTerminator() {
153         if (currentIndex == cachedTerminatorIndex)
154             return cachedTerminator;
155
156         cachedTerminatorIndex = currentIndex;
157         cachedTerminator = findTokenTerminator();
158         return cachedTerminator;
159     }
160
161     private Terminator findTokenTerminator() {
162         for (Terminator terminator : terminators)
163             if (terminator.matches(source, currentIndex))
164                 return terminator;
165         return null;
166     }
167
168     public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
169         if (token.equals(getNextToken().token))
170             return true;
171
172         unreadToken();
173         return false;
174     }
175
176     public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
177         TokenizerMatch result = getNextToken();
178         unreadToken();
179         return result;
180     }
181
182     public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
183         String nextToken = peekNextToken().token;
184         return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
185     }
186
187     public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
188         if (peekIsOneOf(possibilities))
189             throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
190     }
191
192     public void unreadToken() {
193         currentIndex = tokenIndexes.pop();
194     }
195
196     public void skipUntilDataEnd() {
197         tokenIndexes.push(currentIndex);
198         currentIndex = source.length();
199     }
200
201 }