2 * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
3 * This project is released under Creative Commons Zero (CC0) license.
5 package eu.svjatoslav.commons.string.tokenizer;
7 import java.util.ArrayList;
9 import java.util.Stack;
10 import java.util.regex.Matcher;
11 import java.util.stream.Stream;
13 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
14 import static java.lang.System.out;
16 public class Tokenizer {
19 * Stack of token indexes. This allows to walk back in history and un-consume the token.
21 private final Stack<Integer> tokenIndexes = new Stack<>();
24 * Terminators that will be searched for by given tokenizer within given source string.
26 private final List<Terminator> terminators = new ArrayList<>();
28 private String source; // string to be tokenized
30 private int currentIndex = 0;
32 public Tokenizer(final String source) {
39 public Tokenizer setSource(String source) {
46 public Terminator addTerminator(final Terminator.TerminationStrategy terminationStrategy, String regexp) {
47 Terminator terminator = new Terminator(terminationStrategy, regexp,null);
48 terminators.add(terminator);
52 public Terminator addTerminator(final Terminator.TerminationStrategy terminationStrategy,
53 String regexp, String group) {
54 Terminator terminator = new Terminator(terminationStrategy, regexp,group);
55 terminators.add(terminator);
60 public Terminator addTerminator(Terminator terminator) {
61 terminators.add(terminator);
65 public void expectAndConsumeNextStringToken(final String value)
66 throws InvalidSyntaxException {
67 final TokenizerMatch match = getNextToken();
68 if (!value.equals(match.token))
69 throw new InvalidSyntaxException("Expected \"" + value
70 + "\" but got \"" + match.token + "\" instead.");
73 public TokenizerMatch expectAndConsumeNextTerminatorToken(Terminator terminator)
74 throws InvalidSyntaxException {
75 final TokenizerMatch match = getNextToken();
77 if (match.terminator != terminator)
78 throw new InvalidSyntaxException("Expected terminator \"" + terminator
79 + "\" but got \"" + match.terminator + "\" instead.");
86 * @return next @TokenizerMatch or <code>null</code> if end of input is reached.
88 public TokenizerMatch getNextToken() {
89 tokenIndexes.push(currentIndex);
91 StringBuilder tokenAccumulator = new StringBuilder();
95 if (currentIndex >= source.length()) { // reached end of input
96 if (hasAccumulatedToken(tokenAccumulator))
97 return new TokenizerMatch(tokenAccumulator.toString(), null, null, this);
102 TokenizerMatch matchResult = findTerminatorMatch();
103 if (matchResult == null) {
104 tokenAccumulator.append(source.charAt(currentIndex));
109 if (matchResult.terminator.termination == PRESERVE) {
110 if (hasAccumulatedToken(tokenAccumulator))
111 return new TokenizerMatch(tokenAccumulator.toString(), null, null, this);
113 currentIndex = matchResult.matcher.end();
116 currentIndex = matchResult.matcher.end();
118 if (hasAccumulatedToken(tokenAccumulator))
119 return new TokenizerMatch(tokenAccumulator.toString(), null, null, this);
124 public TokenizerMatch findTerminatorMatch(){
125 for (Terminator terminator : terminators)
126 if (terminator.active) {
127 Matcher match = terminator.match(source, currentIndex);
129 String token = source.substring(match.start(), match.end());
130 return new TokenizerMatch(token, terminator, match, this);
136 private boolean hasAccumulatedToken(StringBuilder tokenAccumulator) {
137 return tokenAccumulator.length() > 0;
140 public boolean hasMoreContent() {
141 if (source == null) return false;
142 return currentIndex < source.length();
145 public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
146 if (token.equals(getNextToken().token))
153 public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
154 TokenizerMatch result = getNextToken();
159 public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException {
160 String nextToken = peekNextToken().token;
161 return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
164 public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException {
165 if (peekIsOneOf(possibilities))
166 throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
169 public void unreadToken() {
170 currentIndex = tokenIndexes.pop();
176 public void enlistRemainingTokens(){
177 int redTokenCount = 0;
179 while (hasMoreContent()) {
180 out.println(getNextToken().toString());
184 // restore pointer to original location
185 for (int i = 0; i< redTokenCount; i++ ) unreadToken();
189 public void skipUntilDataEnd() {
190 tokenIndexes.push(currentIndex);
191 currentIndex = source.length();