Code cleanup and formatting. Migrated to java 1.8.
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Tokenizer.java
1 /*
2  * Svjatoslav Commons - shared library of common functionality.
3  * Copyright ©2012-2014, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 3 of the GNU Lesser General Public License
7  * or later as published by the Free Software Foundation.
8  */
9
10 package eu.svjatoslav.commons.string.tokenizer;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Stack;
15
16 public class Tokenizer {
17
18     final Stack<Integer> tokenIndexes = new Stack<>();
19     private final List<Terminator> terminators = new ArrayList<>();
20     private final String source;
21     private int currentIndex = 0;
22
23     public Tokenizer(final String source) {
24         this.source = source;
25     }
26
27     public void addTerminator(final String startSequence,
28                               final boolean ignoreTerminator) {
29         terminators.add(new Terminator(startSequence, ignoreTerminator));
30     }
31
32     public void addTerminator(final String startSequence,
33                               final String endSequence, final boolean ignoreTerminator) {
34         terminators.add(new Terminator(startSequence, endSequence,
35                 ignoreTerminator));
36     }
37
38     public void expectNextToken(final String value)
39             throws InvalidSyntaxException {
40         final TokenizerMatch match = getNextToken();
41         if (!value.equals(match.token))
42             throw new InvalidSyntaxException("Expected \"" + value
43                     + "\" but got \"" + match.token + "\" instead.");
44     }
45
46     public TokenizerMatch getNextToken() {
47         tokenIndexes.push(currentIndex);
48         final StringBuilder result = new StringBuilder();
49
50         while (true) {
51             if (currentIndex >= source.length())
52                 return null;
53
54             boolean accumulateCurrentChar = true;
55
56             for (final Terminator terminator : terminators)
57                 if (sequenceMatches(terminator.startSequence))
58
59                     if (terminator.ignoreTerminator) {
60                         currentIndex += terminator.startSequence.length();
61
62                         if (terminator.endSequence != null)
63                             skipUntilSequence(terminator.endSequence);
64
65                         if (result.length() > 0)
66                             return new TokenizerMatch(result.toString(),
67                                     terminator);
68                         else {
69                             accumulateCurrentChar = false;
70                             break;
71                         }
72                     } else if (result.length() > 0)
73                         return new TokenizerMatch(result.toString(), terminator);
74                     else {
75                         currentIndex += terminator.startSequence.length();
76                         return new TokenizerMatch(terminator.startSequence,
77                                 terminator);
78                     }
79
80             if (accumulateCurrentChar) {
81                 result.append(source.charAt(currentIndex));
82                 currentIndex++;
83             }
84         }
85
86     }
87
88     public boolean probeNextToken(final String token) {
89         if (token.equals(getNextToken().token))
90             return true;
91
92         unreadToken();
93         return false;
94     }
95
96     public boolean sequenceMatches(final String sequence) {
97         if ((currentIndex + sequence.length()) > source.length())
98             return false;
99
100         for (int i = 0; i < sequence.length(); i++)
101             if (sequence.charAt(i) != source.charAt(i + currentIndex))
102                 return false;
103
104         return true;
105     }
106
107     public void skipUntilDataEnd() {
108         tokenIndexes.push(currentIndex);
109         currentIndex = source.length();
110     }
111
112     public void skipUntilSequence(final String sequence) {
113         while (currentIndex < source.length()) {
114             if (sequenceMatches(sequence)) {
115                 currentIndex += sequence.length();
116                 return;
117             }
118
119             currentIndex++;
120         }
121     }
122
123     public void unreadToken() {
124         currentIndex = tokenIndexes.pop();
125     }
126
127 }