2 * Svjatoslav Commons - shared library of common functionality.
3 * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 3 of the GNU Lesser General Public License
7 * or later as published by the Free Software Foundation.
10 package eu.svjatoslav.commons.string.tokenizer;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
20 public class Tokenizer {
22 final Stack<Integer> tokenIndexes = new Stack<>();
23 private final List<Terminator> terminators = new ArrayList<>();
24 private String source;
25 private int currentIndex = 0;
27 public Tokenizer(final String source) {
33 public Tokenizer setSource(String source){
40 public Tokenizer addTerminator(final String startSequence,
41 final Terminator.TerminationStrategy terminationStrategy) {
42 terminators.add(new Terminator(startSequence, terminationStrategy));
46 public Tokenizer addTerminator(final String startSequence,
47 final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
48 terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
52 public void expectAndConsumeNextToken(final String value)
53 throws InvalidSyntaxException {
54 final TokenizerMatch match = getNextToken();
55 if (!value.equals(match.token))
56 throw new InvalidSyntaxException("Expected \"" + value
57 + "\" but got \"" + match.token + "\" instead.");
60 public TokenizerMatch getNextToken() {
61 tokenIndexes.push(currentIndex);
63 StringBuilder token = new StringBuilder();
66 if (isTokenTermination()){
67 Terminator tokenTerminator = findTokenTerminator();
69 if (tokenTerminator.termination == PRESERVE){
70 if (hasAccumulatedToken(token)){
71 // already assembled some token
72 return new TokenizerMatch(token.toString(), "", tokenTerminator);
75 return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
77 } else if (tokenTerminator.termination == DROP){
78 if (hasAccumulatedToken(token)){
80 return new TokenizerMatch(token.toString(), "", tokenTerminator);
86 token.append(source.charAt(currentIndex));
93 private boolean hasAccumulatedToken(StringBuilder token) {
94 return token.length() > 0;
97 private boolean isTokenTermination() {
98 return findTokenTerminator() != null;
101 public Terminator findTokenTerminator() {
102 for (Terminator terminator : terminators)
103 if (terminator.matches(source, currentIndex))
108 public boolean consumeIfNextToken(final String token) {
109 if (token.equals(getNextToken().token))
116 public TokenizerMatch peekNextToken(){
117 TokenizerMatch result = getNextToken();
122 public boolean peekIsOneOf(String ... possibilities){
123 String nextToken = peekNextToken().token;
124 return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
127 public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
128 if (peekIsOneOf(possibilities))
129 throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
133 public boolean sequenceMatches(final String sequence) {
134 if ((currentIndex + sequence.length()) > source.length())
137 for (int i = 0; i < sequence.length(); i++)
138 if (sequence.charAt(i) != source.charAt(i + currentIndex))
144 public void skipUntilDataEnd() {
145 tokenIndexes.push(currentIndex);
146 currentIndex = source.length();
149 public void skipUntilSequence(final String sequence) {
150 while (currentIndex < source.length()) {
151 if (sequenceMatches(sequence)) {
152 currentIndex += sequence.length();
160 public void unreadToken() {
161 currentIndex = tokenIndexes.pop();