Use regular expressions as terminators
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Terminator.java
index c1d1983..8946b32 100755 (executable)
@@ -1,60 +1,56 @@
 /*
- * Svjatoslav Commons - shared library of common functionality.
- * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 3 of the GNU Lesser General Public License
- * or later as published by the Free Software Foundation.
+ * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
+ * This project is released under Creative Commons Zero (CC0) license.
  */
-
 package eu.svjatoslav.commons.string.tokenizer;
 
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 public class Terminator {
 
-    public final String startSequence;
-    public final String endSequence;
+    String regexp;
     public final TerminationStrategy termination;
+    public final String group;
+    public boolean active = true;
+    public final Pattern pattern;
 
-    public Terminator(final String startSequence, TerminationStrategy termination) {
-        this.startSequence = startSequence;
-        this.endSequence = null;
-        this.termination = termination;
-    }
-
-    public Terminator(final String startSequence, final String endSequence, TerminationStrategy termination) {
-        this.startSequence = startSequence;
-        this.endSequence = endSequence;
+    public Terminator(TerminationStrategy termination, String regexp, String group) {
         this.termination = termination;
+        this.group = group;
+        this.regexp = regexp;
+        this.pattern = Pattern.compile("^"+regexp);
     }
 
-    public boolean matches(String source, int index) {
-        // boundary check
-        if (source.length() < (index + startSequence.length()))
-            return false;
-
-        // match check
-        for (int i = 0; i < startSequence.length(); i++)
-            if (startSequence.charAt(i) != source.charAt(index + i))
-                return false;
-
-        return true;
-    }
-
-    public enum TerminationStrategy {
-        PRESERVE,
-        DROP
-    }
-
-    public boolean hasEndSequence(){
-        return endSequence != null;
+    public Matcher match(String source, int index) {
+        Matcher matcher = pattern.matcher(source);
+        matcher.region(index, source.length());
+        return matcher;
     }
 
     @Override
     public String toString() {
         return "Terminator{" +
-                "startSequence='" + startSequence + '\'' +
-                ", endSequence='" + endSequence + '\'' +
+                "regexp='" + regexp + '\'' +
                 ", termination=" + termination +
+                ", group='" + group + '\'' +
+                ", active=" + active +
                 '}';
     }
+
+    public enum TerminationStrategy {
+        /**
+         * Preserve token that is identified within Terminator and return it for processing. For example when
+         * building language parser, it could be used for statements that you want to capture.
+         */
+        PRESERVE,
+
+        /**
+         * While tokens that are marked by Terminator are identified, they are dropped and not returned for consumption.
+         * For example, when building language parser, you might use such strategy for whitespace and comments.
+         * That is, those tokens act as separators between actually useful tokens, but you don't want to consume such
+         * separators or comments in your code.
+         */
+        DROP
+    }
 }