/*
- * Svjatoslav Commons - shared library of common functionality.
- * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 3 of the GNU Lesser General Public License
- * or later as published by the Free Software Foundation.
+ * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
+ * This project is released under Creative Commons Zero (CC0) license.
*/
-
package eu.svjatoslav.commons.string.tokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
public class Terminator {
- public final String startSequence;
- public final String endSequence;
+ String regexp;
public final TerminationStrategy termination;
+ public final String group;
+ public boolean active = true;
+ public final Pattern pattern;
- public Terminator(final String startSequence, TerminationStrategy termination) {
- this.startSequence = startSequence;
- this.endSequence = null;
- this.termination = termination;
- }
-
- public Terminator(final String startSequence, final String endSequence, TerminationStrategy termination) {
- this.startSequence = startSequence;
- this.endSequence = endSequence;
+ public Terminator(TerminationStrategy termination, String regexp, String group) {
this.termination = termination;
+ this.group = group;
+ this.regexp = regexp;
+ this.pattern = Pattern.compile("^"+regexp);
}
- public boolean matches(String source, int index) {
- // boundary check
- if (source.length() < (index + startSequence.length()))
- return false;
-
- // match check
- for (int i = 0; i < startSequence.length(); i++)
- if (startSequence.charAt(i) != source.charAt(index + i))
- return false;
-
- return true;
- }
-
- public boolean hasEndSequence() {
- return endSequence != null;
+ public Matcher match(String source, int index) {
+ Matcher matcher = pattern.matcher(source);
+ matcher.region(index, source.length());
+ return matcher;
}
@Override
public String toString() {
return "Terminator{" +
- "startSequence='" + startSequence + '\'' +
- ", endSequence='" + endSequence + '\'' +
+ "regexp='" + regexp + '\'' +
", termination=" + termination +
+ ", group='" + group + '\'' +
+ ", active=" + active +
'}';
}
public enum TerminationStrategy {
+ /**
+ * Preserve token that is identified within Terminator and return it for processing. For example when
+ * building language parser, it could be used for statements that you want to capture.
+ */
PRESERVE,
+
+ /**
+ * While tokens that are marked by Terminator are identified, they are dropped and not returned for consumption.
+ * For example, when building language parser, you might use such strategy for whitespace and comments.
+ * That is, those tokens act as separators between actually useful tokens, but you don't want to consume such
+ * separators or comments in your code.
+ */
DROP
}
}