Reimplemented getNextToken
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
16
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
19
20 public class Tokenizer {
21
22     final Stack<Integer> tokenIndexes = new Stack<>();
23     private final List<Terminator> terminators = new ArrayList<>();
24     private String source;
25     private int currentIndex = 0;
26
27     public Tokenizer(final String source) {
28         this.source = source;
29     }
30
31     public Tokenizer(){}
32
33     public Tokenizer setSource(String source){
34         this.source = source;
35         currentIndex = 0;
36         tokenIndexes.clear();
37         return this;
38     }
39
40     public Tokenizer addTerminator(final String startSequence,
41                                    final Terminator.TerminationStrategy terminationStrategy) {
42         terminators.add(new Terminator(startSequence, terminationStrategy));
43         return this;
44     }
45
46     public Tokenizer addTerminator(final String startSequence,
47                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
48         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
49         return this;
50     }
51
52     public void expectAndConsumeNextToken(final String value)
53             throws InvalidSyntaxException {
54         final TokenizerMatch match = getNextToken();
55         if (!value.equals(match.token))
56             throw new InvalidSyntaxException("Expected \"" + value
57                     + "\" but got \"" + match.token + "\" instead.");
58     }
59
60     public TokenizerMatch getNextToken() {
61         tokenIndexes.push(currentIndex);
62
63         StringBuilder token = new StringBuilder();
64
65         while (true){
66             if (isTokenTermination()){
67                 Terminator tokenTerminator = findTokenTerminator();
68
69                 if (tokenTerminator.termination == PRESERVE){
70                     if (hasAccumulatedToken(token)){
71                         // already assembled some token
72                         return new TokenizerMatch(token.toString(), "", tokenTerminator);
73                     } else {
74                         currentIndex++;
75                         return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
76                     }
77                 } else if (tokenTerminator.termination == DROP){
78                     if (hasAccumulatedToken(token)){
79                         currentIndex++;
80                         return new TokenizerMatch(token.toString(), "", tokenTerminator);
81                     } else {
82                         currentIndex++;
83                     }
84                 }
85             } else {
86                 token.append(source.charAt(currentIndex));
87                 currentIndex++;
88             }
89         }
90
91     }
92
93     private boolean hasAccumulatedToken(StringBuilder token) {
94         return token.length() > 0;
95     }
96
97     private boolean isTokenTermination() {
98         return findTokenTerminator() != null;
99     }
100
101     public Terminator findTokenTerminator() {
102         for (Terminator terminator : terminators)
103             if (terminator.matches(source, currentIndex))
104                 return terminator;
105         return null;
106     }
107
108     public boolean consumeIfNextToken(final String token) {
109         if (token.equals(getNextToken().token))
110             return true;
111
112         unreadToken();
113         return false;
114     }
115
116     public TokenizerMatch peekNextToken(){
117         TokenizerMatch result = getNextToken();
118         unreadToken();
119         return result;
120     }
121
122     public boolean peekIsOneOf(String ... possibilities){
123         String nextToken = peekNextToken().token;
124         return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
125     }
126
127     public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
128         if (peekIsOneOf(possibilities))
129             throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
130     }
131
132
133     public boolean sequenceMatches(final String sequence) {
134         if ((currentIndex + sequence.length()) > source.length())
135             return false;
136
137         for (int i = 0; i < sequence.length(); i++)
138             if (sequence.charAt(i) != source.charAt(i + currentIndex))
139                 return false;
140
141         return true;
142     }
143
144     public void skipUntilDataEnd() {
145         tokenIndexes.push(currentIndex);
146         currentIndex = source.length();
147     }
148
149     public void skipUntilSequence(final String sequence) {
150         while (currentIndex < source.length()) {
151             if (sequenceMatches(sequence)) {
152                 currentIndex += sequence.length();
153                 return;
154             }
155
156             currentIndex++;
157         }
158     }
159
160     public void unreadToken() {
161         currentIndex = tokenIndexes.pop();
162     }
163
164 }