Use regular expressions as terminators
[svjatoslav_commons.git] / src / main / java / eu / svjatoslav / commons / string / tokenizer / Terminator.java
index 2d0fd45..8946b32 100755 (executable)
@@ -1,33 +1,56 @@
 /*
- * Svjatoslav Commons - shared library of common functionality.
- * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 3 of the GNU Lesser General Public License
- * or later as published by the Free Software Foundation.
+ * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
+ * This project is released under Creative Commons Zero (CC0) license.
  */
-
 package eu.svjatoslav.commons.string.tokenizer;
 
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 public class Terminator {
 
-    final String startSequence;
-    String endSequence;
-    TerminationStrategy termination;
+    String regexp;
+    public final TerminationStrategy termination;
+    public final String group;
+    public boolean active = true;
+    public final Pattern pattern;
 
-    public Terminator(final String startPattern, TerminationStrategy termination) {
-        startSequence = startPattern;
+    public Terminator(TerminationStrategy termination, String regexp, String group) {
         this.termination = termination;
+        this.group = group;
+        this.regexp = regexp;
+        this.pattern = Pattern.compile("^"+regexp);
     }
 
-    public Terminator(final String startSequence, final String endSequence, TerminationStrategy termination) {
-        this.startSequence = startSequence;
-        this.endSequence = endSequence;
-        this.termination = termination;
+    public Matcher match(String source, int index) {
+        Matcher matcher = pattern.matcher(source);
+        matcher.region(index, source.length());
+        return matcher;
     }
 
-    enum TerminationStrategy {
+    @Override
+    public String toString() {
+        return "Terminator{" +
+                "regexp='" + regexp + '\'' +
+                ", termination=" + termination +
+                ", group='" + group + '\'' +
+                ", active=" + active +
+                '}';
+    }
+
+    public enum TerminationStrategy {
+        /**
+         * Preserve token that is identified within Terminator and return it for processing. For example when
+         * building language parser, it could be used for statements that you want to capture.
+         */
         PRESERVE,
-        DROP;
+
+        /**
+         * While tokens that are marked by Terminator are identified, they are dropped and not returned for consumption.
+         * For example, when building language parser, you might use such strategy for whitespace and comments.
+         * That is, those tokens act as separators between actually useful tokens, but you don't want to consume such
+         * separators or comments in your code.
+         */
+        DROP
     }
 }