2 * Svjatoslav Commons - shared library of common functionality.
3 * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 3 of the GNU Lesser General Public License
7 * or later as published by the Free Software Foundation.
10 package eu.svjatoslav.commons.string.tokenizer;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
16 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18 public class Tokenizer {
20 final Stack<Integer> tokenIndexes = new Stack<>();
21 private final List<Terminator> terminators = new ArrayList<>();
22 private String source;
23 private int currentIndex = 0;
25 public Tokenizer(final String source) {
31 public Tokenizer setSource(String source){
37 public Tokenizer addTerminator(final String startSequence,
38 final Terminator.TerminationStrategy terminationStrategy) {
39 terminators.add(new Terminator(startSequence, terminationStrategy));
43 public Tokenizer addTerminator(final String startSequence,
44 final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
45 terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
49 public void expectAndConsumeNextToken(final String value)
50 throws InvalidSyntaxException {
51 final TokenizerMatch match = getNextToken();
52 if (!value.equals(match.token))
53 throw new InvalidSyntaxException("Expected \"" + value
54 + "\" but got \"" + match.token + "\" instead.");
57 public TokenizerMatch getNextToken() {
58 tokenIndexes.push(currentIndex);
59 final StringBuilder result = new StringBuilder();
62 if (currentIndex >= source.length())
65 boolean accumulateCurrentChar = true;
67 for (final Terminator terminator : terminators)
68 if (sequenceMatches(terminator.startSequence))
70 if (terminator.termination == DROP) {
71 currentIndex += terminator.startSequence.length();
73 if (terminator.endSequence != null)
74 skipUntilSequence(terminator.endSequence);
76 if (result.length() > 0)
77 return new TokenizerMatch(result.toString(),
80 accumulateCurrentChar = false;
83 } else if (result.length() > 0)
84 return new TokenizerMatch(result.toString(), terminator);
86 currentIndex += terminator.startSequence.length();
87 return new TokenizerMatch(terminator.startSequence,
91 if (accumulateCurrentChar) {
92 result.append(source.charAt(currentIndex));
99 public boolean consumeIfNextToken(final String token) {
100 if (token.equals(getNextToken().token))
107 public TokenizerMatch peekNextToken(){
108 TokenizerMatch result = getNextToken();
113 public boolean peekIsOneOf(String ... possibilities){
114 String nextToken = peekNextToken().token;
116 for (String possibility : possibilities)
117 if (possibility.equals(nextToken))
123 public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
124 TokenizerMatch nextToken = peekNextToken();
126 for (String possibility : possibilities)
127 if (possibility.equals(nextToken))
128 throw new InvalidSyntaxException("Not expected \"" + nextToken + "\" here.");
132 public boolean sequenceMatches(final String sequence) {
133 if ((currentIndex + sequence.length()) > source.length())
136 for (int i = 0; i < sequence.length(); i++)
137 if (sequence.charAt(i) != source.charAt(i + currentIndex))
143 public void skipUntilDataEnd() {
144 tokenIndexes.push(currentIndex);
145 currentIndex = source.length();
148 public void skipUntilSequence(final String sequence) {
149 while (currentIndex < source.length()) {
150 if (sequenceMatches(sequence)) {
151 currentIndex += sequence.length();
159 public void unreadToken() {
160 currentIndex = tokenIndexes.pop();