a89677b0df47366c37aa4053b4295dda1a21c5d7
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15
16 public class Tokenizer {
17
18     final Stack<Integer> tokenIndexes = new Stack<>();
19     private final List<Terminator> terminators = new ArrayList<>();
20     private String source;
21     private int currentIndex = 0;
22
23     public Tokenizer(final String source) {
24         this.source = source;
25     }
26
27     public Tokenizer(){}
28
29     public Tokenizer setSource(String source){
30         this.source = source;
31         currentIndex = 0;
32         return this;
33     }
34
35     public Tokenizer addTerminator(final String startSequence,
36                                    final boolean ignoreTerminator) {
37         terminators.add(new Terminator(startSequence, ignoreTerminator));
38         return this;
39     }
40
41     public Tokenizer addTerminator(final String startSequence,
42                                    final String endSequence, final boolean ignoreTerminator) {
43         terminators.add(new Terminator(startSequence, endSequence,
44                 ignoreTerminator));
45         return this;
46     }
47
48     public void expectNextToken(final String value)
49             throws InvalidSyntaxException {
50         final TokenizerMatch match = getNextToken();
51         if (!value.equals(match.token))
52             throw new InvalidSyntaxException("Expected \"" + value
53                     + "\" but got \"" + match.token + "\" instead.");
54     }
55
56     public TokenizerMatch getNextToken() {
57         tokenIndexes.push(currentIndex);
58         final StringBuilder result = new StringBuilder();
59
60         while (true) {
61             if (currentIndex >= source.length())
62                 return null;
63
64             boolean accumulateCurrentChar = true;
65
66             for (final Terminator terminator : terminators)
67                 if (sequenceMatches(terminator.startSequence))
68
69                     if (terminator.ignoreTerminator) {
70                         currentIndex += terminator.startSequence.length();
71
72                         if (terminator.endSequence != null)
73                             skipUntilSequence(terminator.endSequence);
74
75                         if (result.length() > 0)
76                             return new TokenizerMatch(result.toString(),
77                                     terminator);
78                         else {
79                             accumulateCurrentChar = false;
80                             break;
81                         }
82                     } else if (result.length() > 0)
83                         return new TokenizerMatch(result.toString(), terminator);
84                     else {
85                         currentIndex += terminator.startSequence.length();
86                         return new TokenizerMatch(terminator.startSequence,
87                                 terminator);
88                     }
89
90             if (accumulateCurrentChar) {
91                 result.append(source.charAt(currentIndex));
92                 currentIndex++;
93             }
94         }
95
96     }
97
98     public boolean isNextToken(final String token) {
99         if (token.equals(getNextToken().token))
100             return true;
101
102         unreadToken();
103         return false;
104     }
105
106     public boolean sequenceMatches(final String sequence) {
107         if ((currentIndex + sequence.length()) > source.length())
108             return false;
109
110         for (int i = 0; i < sequence.length(); i++)
111             if (sequence.charAt(i) != source.charAt(i + currentIndex))
112                 return false;
113
114         return true;
115     }
116
117     public void skipUntilDataEnd() {
118         tokenIndexes.push(currentIndex);
119         currentIndex = source.length();
120     }
121
122     public void skipUntilSequence(final String sequence) {
123         while (currentIndex < source.length()) {
124             if (sequenceMatches(sequence)) {
125                 currentIndex += sequence.length();
126                 return;
127             }
128
129             currentIndex++;
130         }
131     }
132
133     public void unreadToken() {
134         currentIndex = tokenIndexes.pop();
135     }
136
137 }