2 * Svjatoslav Commons - shared library of common functionality.
3 * Copyright ©2012-2019, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 3 of the GNU Lesser General Public License
7 * or later as published by the Free Software Foundation.
10 package eu.svjatoslav.commons.string.tokenizer;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
20 public class Tokenizer {
22 private final Stack<Integer> tokenIndexes = new Stack<>();
23 private final List<Terminator> terminators = new ArrayList<>();
24 private String source;
25 private int currentIndex = 0;
27 private int cachedTerminatorIndex = -1;
28 private Terminator cachedTerminator;
30 public Tokenizer(final String source) {
37 public Tokenizer setSource(String source) {
42 cachedTerminatorIndex = -1;
43 cachedTerminator = null;
47 public Tokenizer addTerminator(final String startSequence,
48 final Terminator.TerminationStrategy terminationStrategy) {
49 terminators.add(new Terminator(startSequence, terminationStrategy));
53 public Tokenizer addTerminator(final String startSequence,
54 final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
55 terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
59 public void expectAndConsumeNextToken(final String value)
60 throws InvalidSyntaxException {
61 final TokenizerMatch match = getNextToken();
62 if (!value.equals(match.token))
63 throw new InvalidSyntaxException("Expected \"" + value
64 + "\" but got \"" + match.token + "\" instead.");
68 public TokenizerMatch getNextToken() throws InvalidSyntaxException {
69 tokenIndexes.push(currentIndex);
71 StringBuilder tokenAccumulator = new StringBuilder();
75 if (currentIndex >= source.length()) { // reached end of input
76 if (hasAccumulatedToken(tokenAccumulator))
77 return new TokenizerMatch(tokenAccumulator.toString(), null, null);
82 if (isOngoingToken()) {
83 tokenAccumulator.append(source.charAt(currentIndex));
88 Terminator terminator = getOrFindTokenTerminator();
90 if (terminator.termination == PRESERVE)
91 return buildPreservedToken(tokenAccumulator, terminator);
92 else if (terminator.termination == DROP) {
93 skipUntilTerminatorEnd(terminator);
95 if (hasAccumulatedToken(tokenAccumulator))
96 return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
102 private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
103 if (terminator.hasEndSequence())
104 currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
106 currentIndex += terminator.startSequence.length();
109 private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
110 if (hasAccumulatedToken(token))
111 return new TokenizerMatch(token.toString(), null, terminator);
113 if (terminator.hasEndSequence())
114 return buildComplexPreservedToken(terminator);
116 return buildSimplePreservedToken(terminator);
119 private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
120 currentIndex += terminator.startSequence.length();
121 return new TokenizerMatch(terminator.startSequence, null, terminator);
124 private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
125 int endSequenceIndex = getEndSequenceIndex(terminator);
126 String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
127 currentIndex = endSequenceIndex + terminator.endSequence.length();
129 return new TokenizerMatch(terminator.startSequence, reminder, terminator);
132 private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
133 int endSequenceIndex = source.indexOf(terminator.endSequence,
134 currentIndex + terminator.startSequence.length());
136 if (endSequenceIndex < 0)
137 throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
139 return endSequenceIndex;
142 private boolean hasAccumulatedToken(StringBuilder token) {
143 return token.length() > 0;
146 private boolean isOngoingToken() {
147 return getOrFindTokenTerminator() == null;
150 public boolean hasMoreTokens() {
151 return currentIndex < source.length();
155 * Attempts to cache terminator search result.
157 public Terminator getOrFindTokenTerminator() {
158 if (currentIndex == cachedTerminatorIndex)
159 return cachedTerminator;
161 cachedTerminatorIndex = currentIndex;
162 cachedTerminator = findTokenTerminator();
163 return cachedTerminator;
166 private Terminator findTokenTerminator() {
167 for (Terminator terminator : terminators)
168 if (terminator.matches(source, currentIndex))
173 public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
174 if (token.equals(getNextToken().token))
181 public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
182 TokenizerMatch result = getNextToken();
187 public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
188 String nextToken = peekNextToken().token;
189 return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
192 public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
193 if (peekIsOneOf(possibilities))
194 throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
197 public void unreadToken() {
198 currentIndex = tokenIndexes.pop();
201 public void skipUntilDataEnd() {
202 tokenIndexes.push(currentIndex);
203 currentIndex = source.length();