added quick intro
[javainspect.git] / src / main / java / eu / svjatoslav / inspector / tokenizer / Tokenizer.java
index 97f1224..ec13b1a 100644 (file)
@@ -2,24 +2,42 @@ package eu.svjatoslav.inspector.tokenizer;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Stack;
 
 public class Tokenizer {
 
        private final List<Terminator> terminators = new ArrayList<Terminator>();
        private final String source;
 
+       Stack<Integer> tokenIndexes = new Stack<Integer>();
+
        private int currentIndex = 0;
 
        public Tokenizer(final String source) {
                this.source = source;
        }
 
-       public void addTerminator(final String terminator, final boolean empty) {
+       public void addTerminator(final String startSequence,
+                       final boolean ignoreTerminator) {
+               terminators.add(new Terminator(startSequence, ignoreTerminator));
+       }
 
-               terminators.add(new Terminator(terminator, empty));
+       public void addTerminator(final String startSequence,
+                       final String endSequence, final boolean ignoreTerminator) {
+               terminators.add(new Terminator(startSequence, endSequence,
+                               ignoreTerminator));
        }
 
-       public TokenizerMatch getToken() {
+       public void expectNextToken(final String value)
+                       throws InvalidSyntaxException {
+               final TokenizerMatch match = getNextToken();
+               if (!value.equals(match.token))
+                       throw new InvalidSyntaxException("Expected \"" + value
+                                       + "\" but got \"" + match.token + "\" instead.");
+       }
+
+       public TokenizerMatch getNextToken() {
+               tokenIndexes.push(currentIndex);
                final StringBuffer result = new StringBuffer();
 
                while (true) {
@@ -29,10 +47,14 @@ public class Tokenizer {
                        boolean accumulateCurrentChar = true;
 
                        findTerminator: for (final Terminator terminator : terminators)
-                               if (terminatorMatches(terminator))
-                                       // empty space detected
-                                       if (terminator.empty) {
-                                               currentIndex += terminator.value.length();
+                               if (sequenceMatches(terminator.startSequence))
+
+                                       if (terminator.ignoreTerminator) {
+                                               currentIndex += terminator.startSequence.length();
+
+                                               if (terminator.endSequence != null)
+                                                       skipUntilSequence(terminator.endSequence);
+
                                                if (result.length() > 0)
                                                        return new TokenizerMatch(result.toString(),
                                                                        terminator);
@@ -43,8 +65,9 @@ public class Tokenizer {
                                        } else if (result.length() > 0)
                                                return new TokenizerMatch(result.toString(), terminator);
                                        else {
-                                               currentIndex += terminator.value.length();
-                                               return new TokenizerMatch(terminator.value, terminator);
+                                               currentIndex += terminator.startSequence.length();
+                                               return new TokenizerMatch(terminator.startSequence,
+                                                               terminator);
                                        }
 
                        if (accumulateCurrentChar) {
@@ -55,15 +78,43 @@ public class Tokenizer {
 
        }
 
-       public boolean terminatorMatches(final Terminator terminator) {
-               if ((currentIndex + terminator.value.length()) > source.length())
+       public boolean probeNextToken(final String token) {
+               if (token.equals(getNextToken().token))
+                       return true;
+
+               unreadToken();
+               return false;
+       }
+
+       public boolean sequenceMatches(final String sequence) {
+               if ((currentIndex + sequence.length()) > source.length())
                        return false;
 
-               for (int i = 0; i < terminator.value.length(); i++)
-                       if (terminator.value.charAt(i) != source.charAt(i + currentIndex))
+               for (int i = 0; i < sequence.length(); i++)
+                       if (sequence.charAt(i) != source.charAt(i + currentIndex))
                                return false;
 
                return true;
        }
 
+       public void skipUntilDataEnd() {
+               tokenIndexes.push(currentIndex);
+               currentIndex = source.length();
+       }
+
+       public void skipUntilSequence(final String sequence) {
+               while (currentIndex < source.length()) {
+                       if (sequenceMatches(sequence)) {
+                               currentIndex += sequence.length();
+                               return;
+                       }
+
+                       currentIndex++;
+               }
+       }
+
+       public void unreadToken() {
+               currentIndex = tokenIndexes.pop();
+       }
+
 }