Use regular expressions as terminators
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Terminator.java
index 1a6c5ee..8946b32 100755 (executable)
@@ -4,52 +4,53 @@
  */
 package eu.svjatoslav.commons.string.tokenizer;
 
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 public class Terminator {
 
-    public final String startSequence;
-    public final String endSequence;
+    String regexp;
     public final TerminationStrategy termination;
+    public final String group;
+    public boolean active = true;
+    public final Pattern pattern;
 
-    public Terminator(final String startSequence, TerminationStrategy termination) {
-        this.startSequence = startSequence;
-        this.endSequence = null;
-        this.termination = termination;
-    }
-
-    public Terminator(final String startSequence, final String endSequence, TerminationStrategy termination) {
-        this.startSequence = startSequence;
-        this.endSequence = endSequence;
+    public Terminator(TerminationStrategy termination, String regexp, String group) {
         this.termination = termination;
+        this.group = group;
+        this.regexp = regexp;
+        this.pattern = Pattern.compile("^"+regexp);
     }
 
-    public boolean matches(String source, int index) {
-        // boundary check
-        if (source.length() < (index + startSequence.length()))
-            return false;
-
-        // match check
-        for (int i = 0; i < startSequence.length(); i++)
-            if (startSequence.charAt(i) != source.charAt(index + i))
-                return false;
-
-        return true;
-    }
-
-    public boolean hasEndSequence() {
-        return endSequence != null;
+    public Matcher match(String source, int index) {
+        Matcher matcher = pattern.matcher(source);
+        matcher.region(index, source.length());
+        return matcher;
     }
 
     @Override
     public String toString() {
         return "Terminator{" +
-                "startSequence='" + startSequence + '\'' +
-                ", endSequence='" + endSequence + '\'' +
+                "regexp='" + regexp + '\'' +
                 ", termination=" + termination +
+                ", group='" + group + '\'' +
+                ", active=" + active +
                 '}';
     }
 
     public enum TerminationStrategy {
-        PRESERVE, // Identify and return such tokens for further processing.
-        DROP // Identify but ignore such tokens, do not return them. Good for handling comments in scripts.
+        /**
+         * Preserve token that is identified within Terminator and return it for processing. For example when
+         * building language parser, it could be used for statements that you want to capture.
+         */
+        PRESERVE,
+
+        /**
+         * While tokens that are marked by Terminator are identified, they are dropped and not returned for consumption.
+         * For example, when building language parser, you might use such strategy for whitespace and comments.
+         * That is, those tokens act as separators between actually useful tokens, but you don't want to consume such
+         * separators or comments in your code.
+         */
+        DROP
     }
 }