899f8c4d7ed8fe0da58a944a38f52d910957cc1f
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15
16 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
17
18 public class Tokenizer {
19
20     final Stack<Integer> tokenIndexes = new Stack<>();
21     private final List<Terminator> terminators = new ArrayList<>();
22     private String source;
23     private int currentIndex = 0;
24
25     public Tokenizer(final String source) {
26         this.source = source;
27     }
28
29     public Tokenizer(){}
30
31     public Tokenizer setSource(String source){
32         this.source = source;
33         currentIndex = 0;
34         return this;
35     }
36
37     public Tokenizer addTerminator(final String startSequence,
38                                    final Terminator.TerminationStrategy terminationStrategy) {
39         terminators.add(new Terminator(startSequence, terminationStrategy));
40         return this;
41     }
42
43     public Tokenizer addTerminator(final String startSequence,
44                                    final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
45         terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
46         return this;
47     }
48
49     public void expectNextToken(final String value)
50             throws InvalidSyntaxException {
51         final TokenizerMatch match = getNextToken();
52         if (!value.equals(match.token))
53             throw new InvalidSyntaxException("Expected \"" + value
54                     + "\" but got \"" + match.token + "\" instead.");
55     }
56
57     public TokenizerMatch getNextToken() {
58         tokenIndexes.push(currentIndex);
59         final StringBuilder result = new StringBuilder();
60
61         while (true) {
62             if (currentIndex >= source.length())
63                 return null;
64
65             boolean accumulateCurrentChar = true;
66
67             for (final Terminator terminator : terminators)
68                 if (sequenceMatches(terminator.startSequence))
69
70                     if (terminator.termination == DROP) {
71                         currentIndex += terminator.startSequence.length();
72
73                         if (terminator.endSequence != null)
74                             skipUntilSequence(terminator.endSequence);
75
76                         if (result.length() > 0)
77                             return new TokenizerMatch(result.toString(),
78                                     terminator);
79                         else {
80                             accumulateCurrentChar = false;
81                             break;
82                         }
83                     } else if (result.length() > 0)
84                         return new TokenizerMatch(result.toString(), terminator);
85                     else {
86                         currentIndex += terminator.startSequence.length();
87                         return new TokenizerMatch(terminator.startSequence,
88                                 terminator);
89                     }
90
91             if (accumulateCurrentChar) {
92                 result.append(source.charAt(currentIndex));
93                 currentIndex++;
94             }
95         }
96
97     }
98
99     public boolean consumeIfNextToken(final String token) {
100         if (token.equals(getNextToken().token))
101             return true;
102
103         unreadToken();
104         return false;
105     }
106
107     public boolean sequenceMatches(final String sequence) {
108         if ((currentIndex + sequence.length()) > source.length())
109             return false;
110
111         for (int i = 0; i < sequence.length(); i++)
112             if (sequence.charAt(i) != source.charAt(i + currentIndex))
113                 return false;
114
115         return true;
116     }
117
118     public void skipUntilDataEnd() {
119         tokenIndexes.push(currentIndex);
120         currentIndex = source.length();
121     }
122
123     public void skipUntilSequence(final String sequence) {
124         while (currentIndex < source.length()) {
125             if (sequenceMatches(sequence)) {
126                 currentIndex += sequence.length();
127                 return;
128             }
129
130             currentIndex++;
131         }
132     }
133
134     public void unreadToken() {
135         currentIndex = tokenIndexes.pop();
136     }
137
138 }