c4ca4d2e8dcd01e2b3a3218ea23224b1aa47ce47
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15 import java.util.stream.Stream;
16
17 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
18
19 public class Tokenizer {
20
21     final Stack<Integer> tokenIndexes = new Stack<>();
22     private final List<Terminator> terminators = new ArrayList<>();
23     private String source;
24     private int currentIndex = 0;
25
26     public Tokenizer(final String source) {
27         this.source = source;
28     }
29
30     public Tokenizer(){}
31
32     public Tokenizer setSource(String source){
33         this.source = source;
34         currentIndex = 0;
35         return this;
36     }
37
38     public Tokenizer addTerminator(final String startSequence,
39                                    final Terminator.TerminationStrategy terminationStrategy) {
40         terminators.add(new Terminator(startSequence, terminationStrategy));
41         return this;
42     }
43
44     public Tokenizer addTerminator(final String startSequence,
45                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
46         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
47         return this;
48     }
49
50     public void expectAndConsumeNextToken(final String value)
51             throws InvalidSyntaxException {
52         final TokenizerMatch match = getNextToken();
53         if (!value.equals(match.token))
54             throw new InvalidSyntaxException("Expected \"" + value
55                     + "\" but got \"" + match.token + "\" instead.");
56     }
57
58     public TokenizerMatch getNextToken() {
59         tokenIndexes.push(currentIndex);
60         final StringBuilder result = new StringBuilder();
61
62         while (true) {
63             if (currentIndex >= source.length())
64                 return null;
65
66             boolean accumulateCurrentChar = true;
67
68             for (final Terminator terminator : terminators)
69                 if (sequenceMatches(terminator.startSequence))
70
71                     if (terminator.termination == DROP) {
72                         currentIndex += terminator.startSequence.length();
73
74                         if (terminator.endSequence != null)
75                             skipUntilSequence(terminator.endSequence);
76
77                         if (result.length() > 0)
78                             return new TokenizerMatch(result.toString(),
79                                     terminator);
80                         else {
81                             accumulateCurrentChar = false;
82                             break;
83                         }
84                     } else if (result.length() > 0)
85                         return new TokenizerMatch(result.toString(), terminator);
86                     else {
87                         currentIndex += terminator.startSequence.length();
88                         return new TokenizerMatch(terminator.startSequence,
89                                 terminator);
90                     }
91
92             if (accumulateCurrentChar) {
93                 result.append(source.charAt(currentIndex));
94                 currentIndex++;
95             }
96         }
97
98     }
99
100     public boolean consumeIfNextToken(final String token) {
101         if (token.equals(getNextToken().token))
102             return true;
103
104         unreadToken();
105         return false;
106     }
107
108     public TokenizerMatch peekNextToken(){
109         TokenizerMatch result = getNextToken();
110         unreadToken();
111         return result;
112     }
113
114     public boolean peekIsOneOf(String ... possibilities){
115         String nextToken = peekNextToken().token;
116         return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
117     }
118
119     public void peekExpectNoneOf(String ... possibilities) throws InvalidSyntaxException {
120         if (peekIsOneOf(possibilities))
121             throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
122     }
123
124
125     public boolean sequenceMatches(final String sequence) {
126         if ((currentIndex + sequence.length()) > source.length())
127             return false;
128
129         for (int i = 0; i < sequence.length(); i++)
130             if (sequence.charAt(i) != source.charAt(i + currentIndex))
131                 return false;
132
133         return true;
134     }
135
136     public void skipUntilDataEnd() {
137         tokenIndexes.push(currentIndex);
138         currentIndex = source.length();
139     }
140
141     public void skipUntilSequence(final String sequence) {
142         while (currentIndex < source.length()) {
143             if (sequenceMatches(sequence)) {
144                 currentIndex += sequence.length();
145                 return;
146             }
147
148             currentIndex++;
149         }
150     }
151
152     public void unreadToken() {
153         currentIndex = tokenIndexes.pop();
154     }
155
156 }