4855c188e57a48142a255db1deb0ac57f694125c
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15
16 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
17
18 public class Tokenizer {
19
20     final Stack<Integer> tokenIndexes = new Stack<>();
21     private final List<Terminator> terminators = new ArrayList<>();
22     private String source;
23     private int currentIndex = 0;
24
25     public Tokenizer(final String source) {
26         this.source = source;
27     }
28
29     public Tokenizer(){}
30
31     public Tokenizer setSource(String source){
32         this.source = source;
33         currentIndex = 0;
34         return this;
35     }
36
37     public Tokenizer addTerminator(final String startSequence,
38                                    final Terminator.TerminationStrategy terminationStrategy) {
39         terminators.add(new Terminator(startSequence, terminationStrategy));
40         return this;
41     }
42
43     public Tokenizer addTerminator(final String startSequence,
44                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
45         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
46         return this;
47     }
48
49     public void expectNextToken(final String value)
50             throws InvalidSyntaxException {
51         final TokenizerMatch match = getNextToken();
52         if (!value.equals(match.token))
53             throw new InvalidSyntaxException("Expected \"" + value
54                     + "\" but got \"" + match.token + "\" instead.");
55     }
56
57     public TokenizerMatch getNextToken() {
58         tokenIndexes.push(currentIndex);
59         final StringBuilder result = new StringBuilder();
60
61         while (true) {
62             if (currentIndex >= source.length())
63                 return null;
64
65             boolean accumulateCurrentChar = true;
66
67             for (final Terminator terminator : terminators)
68                 if (sequenceMatches(terminator.startSequence))
69
70                     if (terminator.termination == DROP) {
71                         currentIndex += terminator.startSequence.length();
72
73                         if (terminator.endSequence != null)
74                             skipUntilSequence(terminator.endSequence);
75
76                         if (result.length() > 0)
77                             return new TokenizerMatch(result.toString(),
78                                     terminator);
79                         else {
80                             accumulateCurrentChar = false;
81                             break;
82                         }
83                     } else if (result.length() > 0)
84                         return new TokenizerMatch(result.toString(), terminator);
85                     else {
86                         currentIndex += terminator.startSequence.length();
87                         return new TokenizerMatch(terminator.startSequence,
88                                 terminator);
89                     }
90
91             if (accumulateCurrentChar) {
92                 result.append(source.charAt(currentIndex));
93                 currentIndex++;
94             }
95         }
96
97     }
98
99     public boolean consumeIfNextToken(final String token) {
100         if (token.equals(getNextToken().token))
101             return true;
102
103         unreadToken();
104         return false;
105     }
106
107     public TokenizerMatch peekNextToken(){
108         TokenizerMatch result = getNextToken();
109         unreadToken();
110         return result;
111     }
112
113     public boolean peekIsOneOf(String ... possibilities){
114         TokenizerMatch nextToken = peekNextToken();
115
116         for (String possibility : possibilities)
117             if (possibility.equals(nextToken))
118                 return true;
119
120         return false;
121     }
122
123     public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
124         TokenizerMatch nextToken = peekNextToken();
125
126         for (String possibility : possibilities)
127             if (possibility.equals(nextToken))
128                 throw new InvalidSyntaxException("Not expected \"" + nextToken + "\" here.");
129     }
130
131
132     public boolean sequenceMatches(final String sequence) {
133         if ((currentIndex + sequence.length()) > source.length())
134             return false;
135
136         for (int i = 0; i < sequence.length(); i++)
137             if (sequence.charAt(i) != source.charAt(i + currentIndex))
138                 return false;
139
140         return true;
141     }
142
143     public void skipUntilDataEnd() {
144         tokenIndexes.push(currentIndex);
145         currentIndex = source.length();
146     }
147
148     public void skipUntilSequence(final String sequence) {
149         while (currentIndex < source.length()) {
150             if (sequenceMatches(sequence)) {
151                 currentIndex += sequence.length();
152                 return;
153             }
154
155             currentIndex++;
156         }
157     }
158
159     public void unreadToken() {
160         currentIndex = tokenIndexes.pop();
161     }
162
163 }