X-Git-Url: http://www2.svjatoslav.eu/gitweb/?p=svjatoslav_commons.git;a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTerminator.java;h=8946b32a2474ae6d4c2e91758d5f6b5c3fe10296;hp=1a6c5eeb5650d0ff87a4427a14337729e13c29e8;hb=9bb7c6bf73ebbcbc66f4abd0fabf2f698c42d42c;hpb=67f7af91a79bc2ff50071389b6333a28755a4bff diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java index 1a6c5ee..8946b32 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java @@ -4,52 +4,53 @@ */ package eu.svjatoslav.commons.string.tokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + public class Terminator { - public final String startSequence; - public final String endSequence; + String regexp; public final TerminationStrategy termination; + public final String group; + public boolean active = true; + public final Pattern pattern; - public Terminator(final String startSequence, TerminationStrategy termination) { - this.startSequence = startSequence; - this.endSequence = null; - this.termination = termination; - } - - public Terminator(final String startSequence, final String endSequence, TerminationStrategy termination) { - this.startSequence = startSequence; - this.endSequence = endSequence; + public Terminator(TerminationStrategy termination, String regexp, String group) { this.termination = termination; + this.group = group; + this.regexp = regexp; + this.pattern = Pattern.compile("^"+regexp); } - public boolean matches(String source, int index) { - // boundary check - if (source.length() < (index + startSequence.length())) - return false; - - // match check - for (int i = 0; i < startSequence.length(); i++) - if (startSequence.charAt(i) != source.charAt(index + i)) - return false; - - return true; - } - - public boolean hasEndSequence() { - return endSequence != null; + public Matcher match(String source, int index) { + Matcher matcher = pattern.matcher(source); + matcher.region(index, source.length()); + return matcher; } @Override public String toString() { return "Terminator{" + - "startSequence='" + startSequence + '\'' + - ", endSequence='" + endSequence + '\'' + + "regexp='" + regexp + '\'' + ", termination=" + termination + + ", group='" + group + '\'' + + ", active=" + active + '}'; } public enum TerminationStrategy { - PRESERVE, // Identify and return such tokens for further processing. - DROP // Identify but ignore such tokens, do not return them. Good for handling comments in scripts. + /** + * Preserve token that is identified within Terminator and return it for processing. For example when + * building language parser, it could be used for statements that you want to capture. + */ + PRESERVE, + + /** + * While tokens that are marked by Terminator are identified, they are dropped and not returned for consumption. + * For example, when building language parser, you might use such strategy for whitespace and comments. + * That is, those tokens act as separators between actually useful tokens, but you don't want to consume such + * separators or comments in your code. + */ + DROP } }