2 * Svjatoslav Commons - shared library of common functionality.
3 * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 3 of the GNU Lesser General Public License
7 * or later as published by the Free Software Foundation.
10 package eu.svjatoslav.commons.string.tokenizer;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
16 public class Tokenizer {
18 final Stack<Integer> tokenIndexes = new Stack<>();
19 private final List<Terminator> terminators = new ArrayList<>();
20 private String source;
21 private int currentIndex = 0;
23 public Tokenizer(final String source) {
29 public Tokenizer setSource(String source){
35 public Tokenizer addTerminator(final String startSequence,
36 final boolean ignoreTerminator) {
37 terminators.add(new Terminator(startSequence, ignoreTerminator));
41 public Tokenizer addTerminator(final String startSequence,
42 final String endSequence, final boolean ignoreTerminator) {
43 terminators.add(new Terminator(startSequence, endSequence,
48 public void expectNextToken(final String value)
49 throws InvalidSyntaxException {
50 final TokenizerMatch match = getNextToken();
51 if (!value.equals(match.token))
52 throw new InvalidSyntaxException("Expected \"" + value
53 + "\" but got \"" + match.token + "\" instead.");
56 public TokenizerMatch getNextToken() {
57 tokenIndexes.push(currentIndex);
58 final StringBuilder result = new StringBuilder();
61 if (currentIndex >= source.length())
64 boolean accumulateCurrentChar = true;
66 for (final Terminator terminator : terminators)
67 if (sequenceMatches(terminator.startSequence))
69 if (terminator.ignoreTerminator) {
70 currentIndex += terminator.startSequence.length();
72 if (terminator.endSequence != null)
73 skipUntilSequence(terminator.endSequence);
75 if (result.length() > 0)
76 return new TokenizerMatch(result.toString(),
79 accumulateCurrentChar = false;
82 } else if (result.length() > 0)
83 return new TokenizerMatch(result.toString(), terminator);
85 currentIndex += terminator.startSequence.length();
86 return new TokenizerMatch(terminator.startSequence,
90 if (accumulateCurrentChar) {
91 result.append(source.charAt(currentIndex));
98 public boolean isNextToken(final String token) {
99 if (token.equals(getNextToken().token))
106 public boolean sequenceMatches(final String sequence) {
107 if ((currentIndex + sequence.length()) > source.length())
110 for (int i = 0; i < sequence.length(); i++)
111 if (sequence.charAt(i) != source.charAt(i + currentIndex))
117 public void skipUntilDataEnd() {
118 tokenIndexes.push(currentIndex);
119 currentIndex = source.length();
122 public void skipUntilSequence(final String sequence) {
123 while (currentIndex < source.length()) {
124 if (sequenceMatches(sequence)) {
125 currentIndex += sequence.length();
133 public void unreadToken() {
134 currentIndex = tokenIndexes.pop();