2 * Svjatoslav Commons - shared library of common functionality.
3 * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 3 of the GNU Lesser General Public License
7 * or later as published by the Free Software Foundation.
10 package eu.svjatoslav.commons.string.tokenizer;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
20 public class Tokenizer {
22 final Stack<Integer> tokenIndexes = new Stack<>();
23 private final List<Terminator> terminators = new ArrayList<>();
24 private String source;
25 private int currentIndex = 0;
27 public Tokenizer(final String source) {
33 public Tokenizer setSource(String source){
40 public Tokenizer addTerminator(final String startSequence,
41 final Terminator.TerminationStrategy terminationStrategy) {
42 terminators.add(new Terminator(startSequence, terminationStrategy));
46 public Tokenizer addTerminator(final String startSequence,
47 final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
48 terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
52 public void expectAndConsumeNextToken(final String value)
53 throws InvalidSyntaxException {
54 final TokenizerMatch match = getNextToken();
55 if (!value.equals(match.token))
56 throw new InvalidSyntaxException("Expected \"" + value
57 + "\" but got \"" + match.token + "\" instead.");
60 public TokenizerMatch getNextToken() throws InvalidSyntaxException {
61 tokenIndexes.push(currentIndex);
63 StringBuilder token = new StringBuilder();
66 if (isOngoingToken()) {
67 token.append(source.charAt(currentIndex));
72 Terminator tokenTerminator = findTokenTerminator();
74 if (tokenTerminator.termination == PRESERVE){
75 return buildPreservedToken(token, tokenTerminator);
76 } else if (tokenTerminator.termination == DROP){
77 if (hasAccumulatedToken(token)){
79 return new TokenizerMatch(token.toString(), "", tokenTerminator);
88 private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
89 if (hasAccumulatedToken(token))
90 return new TokenizerMatch(token.toString(), "", terminator);
92 if (terminator.hasEndSequence()){
93 int endSequenceIndex = source.indexOf(terminator.endSequence,
94 currentIndex + terminator.startSequence.length());
96 if (endSequenceIndex < 0)
97 throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
99 String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
100 currentIndex = endSequenceIndex + terminator.endSequence.length();
102 return new TokenizerMatch(terminator.startSequence, reminder, terminator);
104 currentIndex += terminator.startSequence.length();
105 return new TokenizerMatch(terminator.startSequence, "", terminator);
109 private boolean hasAccumulatedToken(StringBuilder token) {
110 return token.length() > 0;
113 private boolean isOngoingToken() {
114 return findTokenTerminator() == null;
117 public Terminator findTokenTerminator() {
118 for (Terminator terminator : terminators)
119 if (terminator.matches(source, currentIndex))
124 public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
125 if (token.equals(getNextToken().token))
132 public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
133 TokenizerMatch result = getNextToken();
138 public boolean peekIsOneOf(String ... possibilities) throws InvalidSyntaxException {
139 String nextToken = peekNextToken().token;
140 return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
143 public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
144 if (peekIsOneOf(possibilities))
145 throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
149 public boolean sequenceMatches(final String sequence) {
150 if ((currentIndex + sequence.length()) > source.length())
153 for (int i = 0; i < sequence.length(); i++)
154 if (sequence.charAt(i) != source.charAt(i + currentIndex))
160 public void skipUntilDataEnd() {
161 tokenIndexes.push(currentIndex);
162 currentIndex = source.length();
165 public void skipUntilSequence(final String sequence) {
166 while (currentIndex < source.length()) {
167 if (sequenceMatches(sequence)) {
168 currentIndex += sequence.length();
176 public void unreadToken() {
177 currentIndex = tokenIndexes.pop();