2 * Svjatoslav Commons - shared library of common functionality.
3 * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 3 of the GNU Lesser General Public License
7 * or later as published by the Free Software Foundation.
10 package eu.svjatoslav.commons.string.tokenizer;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
19 public class Tokenizer {
21 final Stack<Integer> tokenIndexes = new Stack<>();
22 private final List<Terminator> terminators = new ArrayList<>();
23 private String source;
24 private int currentIndex = 0;
26 public Tokenizer(final String source) {
32 public Tokenizer setSource(String source){
38 public Tokenizer addTerminator(final String startSequence,
39 final Terminator.TerminationStrategy terminationStrategy) {
40 terminators.add(new Terminator(startSequence, terminationStrategy));
44 public Tokenizer addTerminator(final String startSequence,
45 final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
46 terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
50 public void expectAndConsumeNextToken(final String value)
51 throws InvalidSyntaxException {
52 final TokenizerMatch match = getNextToken();
53 if (!value.equals(match.token))
54 throw new InvalidSyntaxException("Expected \"" + value
55 + "\" but got \"" + match.token + "\" instead.");
58 public TokenizerMatch getNextToken() {
59 tokenIndexes.push(currentIndex);
60 final StringBuilder result = new StringBuilder();
63 if (currentIndex >= source.length())
66 boolean accumulateCurrentChar = true;
68 for (final Terminator terminator : terminators)
69 if (sequenceMatches(terminator.startSequence))
71 if (terminator.termination == DROP) {
72 currentIndex += terminator.startSequence.length();
74 if (terminator.endSequence != null)
75 skipUntilSequence(terminator.endSequence);
77 if (result.length() > 0)
78 return new TokenizerMatch(result.toString(),
81 accumulateCurrentChar = false;
84 } else if (result.length() > 0)
85 return new TokenizerMatch(result.toString(), terminator);
87 currentIndex += terminator.startSequence.length();
88 return new TokenizerMatch(terminator.startSequence,
92 if (accumulateCurrentChar) {
93 result.append(source.charAt(currentIndex));
100 public boolean consumeIfNextToken(final String token) {
101 if (token.equals(getNextToken().token))
108 public TokenizerMatch peekNextToken(){
109 TokenizerMatch result = getNextToken();
114 public boolean peekIsOneOf(String ... possibilities){
115 String nextToken = peekNextToken().token;
116 return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
119 public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
120 if (peekIsOneOf(possibilities))
121 throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
125 public boolean sequenceMatches(final String sequence) {
126 if ((currentIndex + sequence.length()) > source.length())
129 for (int i = 0; i < sequence.length(); i++)
130 if (sequence.charAt(i) != source.charAt(i + currentIndex))
136 public void skipUntilDataEnd() {
137 tokenIndexes.push(currentIndex);
138 currentIndex = source.length();
141 public void skipUntilSequence(final String sequence) {
142 while (currentIndex < source.length()) {
143 if (sequenceMatches(sequence)) {
144 currentIndex += sequence.length();
152 public void unreadToken() {
153 currentIndex = tokenIndexes.pop();