140773218e9fdb448c091b54ab8fd562662f8bdf
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
16
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
19
20 public class Tokenizer {
21
22     private final Stack<Integer> tokenIndexes = new Stack<>();
23     private final List<Terminator> terminators = new ArrayList<>();
24     private String source;
25     private int currentIndex = 0;
26
27     private int cachedTerminatorIndex = -1;
28     private Terminator cachedTerminator;
29
30     public Tokenizer(final String source) {
31         this.source = source;
32     }
33
34     public Tokenizer() {
35     }
36
37     public Tokenizer setSource(String source) {
38         this.source = source;
39         currentIndex = 0;
40         tokenIndexes.clear();
41
42         cachedTerminatorIndex = -1;
43         cachedTerminator = null;
44         return this;
45     }
46
47     public Tokenizer addTerminator(final String startSequence,
48                                    final Terminator.TerminationStrategy terminationStrategy) {
49         terminators.add(new Terminator(startSequence, terminationStrategy));
50         return this;
51     }
52
53     public Tokenizer addTerminator(final String startSequence,
54                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
55         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
56         return this;
57     }
58
59     public void expectAndConsumeNextToken(final String value)
60             throws InvalidSyntaxException {
61         final TokenizerMatch match = getNextToken();
62         if (!value.equals(match.token))
63             throw new InvalidSyntaxException("Expected \"" + value
64                     + "\" but got \"" + match.token + "\" instead.");
65     }
66
67
68     public TokenizerMatch getNextToken() throws InvalidSyntaxException {
69         tokenIndexes.push(currentIndex);
70
71         StringBuilder tokenAccumulator = new StringBuilder();
72
73         while (true) {
74
75             if (currentIndex >= source.length()) { // reached end of input
76                 if (hasAccumulatedToken(tokenAccumulator))
77                     return new TokenizerMatch(tokenAccumulator.toString(), null, null);
78                 else
79                     return null;
80             }
81
82             if (isOngoingToken()) {
83                 tokenAccumulator.append(source.charAt(currentIndex));
84                 currentIndex++;
85                 continue;
86             }
87
88             Terminator terminator = getOrFindTokenTerminator();
89
90             if (terminator.termination == PRESERVE)
91                 return buildPreservedToken(tokenAccumulator, terminator);
92             else if (terminator.termination == DROP) {
93                 skipUntilTerminatorEnd(terminator);
94
95                 if (hasAccumulatedToken(tokenAccumulator))
96                     return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
97             }
98         }
99
100     }
101
102     private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
103         if (terminator.hasEndSequence())
104             currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
105         else
106             currentIndex += terminator.startSequence.length();
107     }
108
109     private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
110         if (hasAccumulatedToken(token))
111             return new TokenizerMatch(token.toString(), null, terminator);
112
113         if (terminator.hasEndSequence())
114             return buildComplexPreservedToken(terminator);
115         else
116             return buildSimplePreservedToken(terminator);
117     }
118
119     private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
120         currentIndex += terminator.startSequence.length();
121         return new TokenizerMatch(terminator.startSequence, null, terminator);
122     }
123
124     private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
125         int endSequenceIndex = getEndSequenceIndex(terminator);
126         String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
127         currentIndex = endSequenceIndex + terminator.endSequence.length();
128
129         return new TokenizerMatch(terminator.startSequence, reminder, terminator);
130     }
131
132     private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
133         int endSequenceIndex = source.indexOf(terminator.endSequence,
134                 currentIndex + terminator.startSequence.length());
135
136         if (endSequenceIndex < 0)
137             throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
138
139         return endSequenceIndex;
140     }
141
142     private boolean hasAccumulatedToken(StringBuilder token) {
143         return token.length() > 0;
144     }
145
146     private boolean isOngoingToken() {
147         return getOrFindTokenTerminator() == null;
148     }
149
150     public boolean hasMoreTokens() {
151         return currentIndex < source.length();
152     }
153
154     /**
155      * Attempts to cache terminator search result.
156      */
157     public Terminator getOrFindTokenTerminator() {
158         if (currentIndex == cachedTerminatorIndex)
159             return cachedTerminator;
160
161         cachedTerminatorIndex = currentIndex;
162         cachedTerminator = findTokenTerminator();
163         return cachedTerminator;
164     }
165
166     private Terminator findTokenTerminator() {
167         for (Terminator terminator : terminators)
168             if (terminator.matches(source, currentIndex))
169                 return terminator;
170         return null;
171     }
172
173     public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
174         if (token.equals(getNextToken().token))
175             return true;
176
177         unreadToken();
178         return false;
179     }
180
181     public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
182         TokenizerMatch result = getNextToken();
183         unreadToken();
184         return result;
185     }
186
187     public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
188         String nextToken = peekNextToken().token;
189         return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
190     }
191
192     public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
193         if (peekIsOneOf(possibilities))
194             throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
195     }
196
197     public void unreadToken() {
198         currentIndex = tokenIndexes.pop();
199     }
200
201     public void skipUntilDataEnd() {
202         tokenIndexes.push(currentIndex);
203         currentIndex = source.length();
204     }
205
206 }