From: Svjatoslav Agejenko Date: Sun, 12 Apr 2026 04:20:21 +0000 (+0300) Subject: Initial commit X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Fmaster;p=svjatoslav_commons.git Initial commit --- 934cf98965ab967d905d26e5903ea6ad47c2b4c2 diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..80a1eb3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +/.idea/ +/.settings/ +/target/ +/*.iml +/*.log +/test/ + +/doc/apidocs/ +/doc/graphs/ + +/.classpath +/.factorypath +/.project +*.html diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..5ccf1fd --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,80 @@ +# Svjatoslav Commons + +Shared Java utility library used as a dependency across other +projects. Contains commonly useful functionality for string +manipulation, file I/O, binary data handling, and simple GUI dialogs. + +## Build Commands + +```bash +mvn compile # Compile source code +mvn test # Run unit tests +mvn package # Build JAR +mvn clean install # Clean build and install to local Maven repo +``` + +## Project Info + +- **License:** Creative Commons Zero (CC0) +- **Java Version:** 8 +- **Package:** `eu.svjatoslav.commons` + +## Dependencies + +| Dependency | Scope | Purpose | +|-----------------|----------|----------------------------| +| junit 4.8.1 | test | Unit testing | +| servlet-api 2.5 | provided | Servlet support (optional) | + +## Package Structure + +### `eu.svjatoslav.commons.data` +- **BitInputStream** - Read individual bits from an InputStream +- **BitOutputStream** - Write individual bits to an OutputStream +- **HexConverter** - Convert byte arrays to hexadecimal strings + +### `eu.svjatoslav.commons.file` +- **IOHelper** - File I/O utilities (recursive delete, read/write files, smart overwrite) +- **FilePathParser** - Extract file extensions, names without extensions, format file sizes +- **CommonPathResolver** - Resolve common system paths (home directory) + +### `eu.svjatoslav.commons.gui.dialog` +- **ExceptionDialog** - Swing dialog for displaying exceptions with stack traces + +### `eu.svjatoslav.commons.string` +- **String2** - Mutable string builder optimized for prefix/suffix operations (trim, prepend, append, repeat, enforce length) +- **GlobMatcher** - Match strings against wildcard patterns (`*` and `?`) + +### `eu.svjatoslav.commons.string.tokenizer` +Regex-based tokenizer for parsing structured text. + +- **Tokenizer** - Main tokenizer class; add terminators with regex patterns +- **Terminator** - Defines how tokens are identified (PRESERVE or DROP) +- **TokenizerMatch** - Result object containing matched token and metadata +- **InvalidSyntaxException** - Thrown on parsing errors + +Example usage: +```java +Tokenizer tokenizer = new Tokenizer("\"hello\" world"); +tokenizer.addTerminator(DROP, "\\s"); // Drop whitespace +tokenizer.addTerminator(PRESERVE, "\".*\""); // Preserve quoted strings +TokenizerMatch match = tokenizer.getNextToken(); // Returns "\"hello\"" +``` + +## Code Style + +- Standard file header with license comment on all source files +- Fluent API pattern (method chaining returns `this`) +- Javadoc on public classes, methods, and fields +- Static utility methods in helper classes (e.g., `IOHelper`, `GlobMatcher`, `HexConverter`) +- Mutable builder pattern (e.g., `String2`) + +## Testing + +Tests are located in `src/test/java/` mirroring the main package structure. +Uses JUnit 4 with standard `@Test` annotations. + +```bash +mvn test # Run all tests +mvn test -Dtest=String2Test # Run specific test class +``` diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/COPYING @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/TODO.org b/TODO.org new file mode 100644 index 0000000..765652c --- /dev/null +++ b/TODO.org @@ -0,0 +1,2 @@ ++ publish auto generated JavaDoc on web site + + link to JavaDoc from documentation diff --git a/Tools/Open with IntelliJ IDEA b/Tools/Open with IntelliJ IDEA new file mode 100755 index 0000000..304bf94 --- /dev/null +++ b/Tools/Open with IntelliJ IDEA @@ -0,0 +1,54 @@ +#!/bin/bash + +# This script launches IntelliJ IDEA with the current project +# directory. The script is designed to be run by double-clicking it in +# the GNOME Nautilus file manager. + +# First, we change the current working directory to the directory of +# the script. + +# "${0%/*}" gives us the path of the script itself, without the +# script's filename. + +# This command basically tells the system "change the current +# directory to the directory containing this script". + +cd "${0%/*}" + +# Then, we move up one directory level. +# The ".." tells the system to go to the parent directory of the current directory. +# This is done because we assume that the project directory is one level up from the script. +cd .. + +# Now, we use the 'setsid' command to start a new session and run +# IntelliJ IDEA in the background. 'setsid' is a UNIX command that +# runs a program in a new session. + +# The command 'idea .' opens IntelliJ IDEA with the current directory +# as the project directory. The '&' at the end is a UNIX command that +# runs the process in the background. The '> /dev/null' part tells +# the system to redirect all output (both stdout and stderr, denoted +# by '&') that would normally go to the terminal to go to /dev/null +# instead, which is a special file that discards all data written to +# it. + +setsid idea . &>/dev/null & + +# The 'disown' command is a shell built-in that removes a shell job +# from the shell's active list. Therefore, the shell will not send a +# SIGHUP to this particular job when the shell session is terminated. + +# '-h' option specifies that if the shell receives a SIGHUP, it also +# doesn't send a SIGHUP to the job. + +# '$!' is a shell special parameter that expands to the process ID of +# the most recent background job. +disown -h $! + + +sleep 2 + +# Finally, we use the 'exit' command to terminate the shell script. +# This command tells the system to close the terminal window after +# IntelliJ IDEA has been opened. +exit diff --git a/Tools/Update web site b/Tools/Update web site new file mode 100755 index 0000000..13594a3 --- /dev/null +++ b/Tools/Update web site @@ -0,0 +1,86 @@ +#!/bin/bash +cd "${0%/*}"; if [ "$1" != "T" ]; then gnome-terminal -e "'$0' T"; exit; fi; + +cd .. + +# Function to export org to html using emacs in batch mode +export_org_to_html() { + local org_file=$1 + local dir=$(dirname "$org_file") + local base=$(basename "$org_file" .org) + ( + cd "$dir" || return 1 + local html_file="${base}.html" + if [ -f "$html_file" ]; then + rm -f "$html_file" + fi + echo "Exporting: $org_file → $dir/$html_file" + emacs --batch -l ~/.emacs --visit="${base}.org" --funcall=org-html-export-to-html --kill + if [ $? -eq 0 ]; then + echo "✓ Successfully exported $org_file" + else + echo "✗ Failed to export $org_file" + return 1 + fi + ) +} + +export_org_files_to_html() { + echo "🔍 Searching for .org files in doc/ ..." + echo "=======================================" + + mapfile -t ORG_FILES < <(find doc -type f -name "*.org" | sort) + + if [ ${#ORG_FILES[@]} -eq 0 ]; then + echo "❌ No .org files found!" + return 1 + fi + + echo "Found ${#ORG_FILES[@]} .org file(s):" + printf '%s\n' "${ORG_FILES[@]}" + echo "=======================================" + + SUCCESS_COUNT=0 + FAILED_COUNT=0 + + for org_file in "${ORG_FILES[@]}"; do + export_org_to_html "$org_file" + if [ $? -eq 0 ]; then + ((SUCCESS_COUNT++)) + else + ((FAILED_COUNT++)) + fi + done + + echo "=======================================" + echo "📊 SUMMARY:" + echo " ✓ Successful: $SUCCESS_COUNT" + echo " ✗ Failed: $FAILED_COUNT" + echo " Total: $((SUCCESS_COUNT + FAILED_COUNT))" + echo "" +} + +# Build project jar file and JavaDocs +mvn clean package + +# Put generated JavaDoc HTML files to documentation directory +rm -rf doc/apidocs/ +cp -r target/apidocs/ doc/ + +# Publish Emacs org-mode files into HTML format +export_org_files_to_html + +## Upload assembled documentation to server +echo "📤 Uploading to server..." +rsync -avz --delete -e 'ssh -p 10006' doc/ \ + n0@www3.svjatoslav.eu:/mnt/big/projects/svjatoslav_commons/ + +if [ $? -eq 0 ]; then + echo "✓ Upload completed successfully!" +else + echo "✗ Upload failed!" +fi + +echo "" +echo "Press ENTER to close this window." +read diff --git a/doc/index.org b/doc/index.org new file mode 100644 index 0000000..733ab35 --- /dev/null +++ b/doc/index.org @@ -0,0 +1,224 @@ +#+SETUPFILE: ~/.emacs.d/org-styles/html/darksun.theme +#+TITLE: Svjatoslav Commons - Java library of commonly used functions +#+LANGUAGE: en +#+LATEX_HEADER: \usepackage[margin=1.0in]{geometry} +#+LATEX_HEADER: \usepackage{parskip} +#+LATEX_HEADER: \usepackage[none]{hyphenat} + +#+OPTIONS: H:20 num:20 +#+OPTIONS: author:nil + +#+begin_export html + +#+end_export + + +* Introduction + +Svjatoslav Commons is a shared Java utility library that consolidates +commonly needed functionality: reading and writing files, manipulating +strings, handling binary data, and displaying errors to users. + +The library requires Java 8 and has minimal dependencies: only JUnit +for testing and servlet-api (optional, provided scope). See [[#usage][How to +take library into use]] for Maven dependency configuration. The complete +API documentation is available in the [[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/][JavaDoc]]. + +* Library contents +:PROPERTIES: +:CUSTOM_ID: library-contents +:END: +- See [[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/][JavaDoc]]. + +** Binary data utilities ([[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/data/package-summary.html][eu.svjatoslav.commons.data]]) + +The data package provides classes for working with binary data at the +bit level and converting it to human-readable formats. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/data/BitInputStream.html][BitInputStream]] and [[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/data/BitOutputStream.html][BitOutputStream]] allow reading and writing +individual bits from streams, not just whole bytes. This is essential +for applications that work with bit-packed data formats where data is +not byte-aligned, such as custom compression algorithms, network +protocols, or binary file formats. Bits are processed from most +significant to least significant within each byte, and you can read or +write any number of bits (not just multiples of 8). + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/data/HexConverter.html][HexConverter]] converts byte arrays to uppercase hexadecimal strings. +Each byte becomes exactly two characters (0-9, A-F), making binary data +visible in logs, debug output, or text-based protocols. Null inputs +return null; empty arrays return empty strings. + +** File system utilities ([[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/file/package-summary.html][eu.svjatoslav.commons.file]]) + +The file package provides utilities for common file operations: reading +and writing files, parsing paths, and resolving system directories. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/file/IOHelper.html][IOHelper]] is the primary file I/O class. It reads and writes files as +byte arrays or UTF-8 strings, deletes files and directories +recursively (without following symbolic links, which prevents accidental +deletion of data outside the intended scope), and overwrites files only +when content actually differs, avoiding unnecessary disk writes and +preserving timestamps. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/file/FilePathParser.html][FilePathParser]] extracts metadata from file paths: the file extension +(lowercase normalized), the file name without extension, and formats +file sizes using binary units (b, KiB, MiB, GiB, TiB, PiB) with +human-readable descriptions like "15 MiB". + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/file/CommonPathResolver.html][CommonPathResolver]] resolves common system paths. Currently it provides +the user's home directory, useful for storing application configuration +files or user-specific data. + +** Graphical dialogs ([[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/gui/dialog/package-summary.html][eu.svjatoslav.commons.gui.dialog]]) + +The dialog package provides Swing-based components for displaying +information to users in graphical applications. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/gui/dialog/ExceptionDialog.html][ExceptionDialog]] displays exceptions with their full stack traces in a +windowed dialog. It shows the exception type, error message, cause (if +present), and complete stack trace, making it easy to debug errors in +GUI applications without requiring console output or log files. Note: +this requires a graphical environment and is not suitable for headless +or server-side applications. + +** String utilities ([[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/package-summary.html][eu.svjatoslav.commons.string]]) + +The string package provides a mutable string builder optimized for +prefix and suffix operations, plus simple glob-style pattern matching. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/String2.html][String2]] is a mutable string that stores characters in a list, enabling +efficient operations at both ends. It provides ~prepend()~ and ~append()~ +for adding text, ~trimPrefix()~ and ~trimSuffix()~ for removing characters, +~trimPrefixIfExists()~ and ~trimSuffixIfExists()~ for conditional removal, +~hasPrefix()~ and ~hasSuffix()~ for checking, ~repeat()~ for duplication, +and ~enforceLength()~ for padding or truncating to exact lengths. All +methods return the same instance for fluent chaining. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/GlobMatcher.html][GlobMatcher]] matches strings against glob-style wildcard patterns using +~*~ (matches any sequence of characters, including empty) and ~?~ (matches +exactly one character). This is simpler than regular expressions and +useful for file name filtering, simple validation, or user-friendly +pattern input. + +** Tokenizer ([[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/tokenizer/package-summary.html][eu.svjatoslav.commons.string.tokenizer]]) + +The tokenizer package provides a regex-based tokenizer for parsing +structured text into tokens with lookahead and backtracking support. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/tokenizer/Tokenizer.html][Tokenizer]] is the main class. You add [[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/tokenizer/Terminator.html][Terminator]] objects that define +token boundaries using regex patterns. Each terminator has a strategy: +~PRESERVE~ returns matched tokens for processing, ~DROP~ silently discards +them (useful for whitespace or comments). You can peek at the next token +without consuming it, unread tokens to backtrack, expect specific tokens +and throw exceptions on mismatch, and categorize tokens using group +names. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.html][TokenizerMatch]] is the result object containing the matched text, the +terminator that identified it, the regex matcher (for extracting capture +groups), and methods to check group membership. + +[[https://www3.svjatoslav.eu/projects/svjatoslav_commons/apidocs/eu/svjatoslav/commons/string/tokenizer/InvalidSyntaxException.html][InvalidSyntaxException]] is thrown when expectations fail during parsing, +such as when expecting a specific token but finding something else. + +* How to take library into use +:PROPERTIES: +:CUSTOM_ID: usage +:END: + +Add the *svjatoslavcommons* dependency to your Maven project: + +#+BEGIN_SRC xml + + + eu.svjatoslav + svjatoslavcommons + 1.9 + + +#+END_SRC + +Also add the repository (the library is not on Maven Central): + +#+BEGIN_SRC xml + + + svjatoslav.eu + Svjatoslav repository + https://www3.svjatoslav.eu/maven/ + + +#+END_SRC + +* Source code +:PROPERTIES: +:CUSTOM_ID: source-code +:ID: 978b7ea2-e246-45d0-be76-4d561308e9f3 +:END: + +*This program is free software: released under Creative Commons Zero +(CC0) license* + +*Program author:* +- Svjatoslav Agejenko +- Homepage: https://svjatoslav.eu +- Email: mailto://svjatoslav@svjatoslav.eu +- See also: [[https://www.svjatoslav.eu/projects/][Other software projects hosted at svjatoslav.eu]] + +*Getting the source code:* +- [[https://www2.svjatoslav.eu/gitweb/?p=svjatoslav_commons.git;a=snapshot;h=HEAD;sf=tgz][Download latest snapshot in TAR GZ format]] +- [[https://www2.svjatoslav.eu/gitweb/?p=svjatoslav_commons.git;a=summary][Browse Git repository online]] +- Clone Git repository using command: + : git clone https://www2.svjatoslav.eu/git/svjatoslav_commons.git diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..ef82e33 --- /dev/null +++ b/pom.xml @@ -0,0 +1,133 @@ + + 4.0.0 + eu.svjatoslav + svjatoslavcommons + 1.10-SNAPSHOT + jar + Svjatoslav Commons + Collection many small but commonly useful functionalities + https://www3.svjatoslav.eu/projects/svjatoslav_commons/ + + + svjatoslav.eu + http://svjatoslav.eu + + + + UTF-8 + UTF-8 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 8 + 8 + UTF-8 + + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + + attach-javadocs + + jar + + + + + + + + foo + bar + + + + ${java.home}/bin/javadoc + + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.2 + + + org.apache.maven.scm + maven-scm-provider-gitexe + 1.9.4 + + + + + + + + + + org.apache.maven.wagon + wagon-ssh-external + 2.6 + + + + + + + junit + junit + 4.8.1 + test + + + + javax.servlet + servlet-api + 2.5 + provided + + + + + + svjatoslav.eu + svjatoslav.eu + scpexe://svjatoslav.eu:10006/srv/maven + + + svjatoslav.eu + svjatoslav.eu + scpexe://svjatoslav.eu:10006/srv/maven + + + + + scm:git:ssh://n0@svjatoslav.eu:10006/home/n0/git/svjatoslav_commons.git + scm:git:ssh://n0@svjatoslav.eu:10006/home/n0/git/svjatoslav_commons.git + HEAD + + + diff --git a/src/main/java/eu/svjatoslav/commons/data/BitInputStream.java b/src/main/java/eu/svjatoslav/commons/data/BitInputStream.java new file mode 100755 index 0000000..ab4d3eb --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/data/BitInputStream.java @@ -0,0 +1,110 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.data; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Reads individual bits from an input stream. + * + *

This class provides bit-level reading capabilities, allowing you to + * read any number of bits from an underlying input stream. Bits are read from + * most significant to least significant within each byte.

+ * + *

Example usage:

+ *
{@code
+ * InputStream input = new ByteArrayInputStream(new byte[]{0b10101010, 0b11001100});
+ * BitInputStream bis = new BitInputStream(input);
+ *
+ * int first3Bits = bis.readBits(3);  // Returns 0b101 (5)
+ * int next5Bits = bis.readBits(5);   // Returns 0b01010 (10)
+ * int next8Bits = bis.readBits(8);   // Returns 0b11001100 (204)
+ * }
+ * + *

This class is useful for applications that work with bit-packed data + * formats, such as custom compression algorithms, network protocols, or + * binary file formats where data is not byte-aligned.

+ * + * @see BitOutputStream + */ +public class BitInputStream { + + /** + * The underlying input stream from which bytes are read. + * Bytes are then disassembled into individual bits. + */ + private final InputStream inputStream; + + /** + * The current byte being processed. Bits are extracted from this byte + * one at a time as requested by read operations. + */ + private int currentByte; + + /** + * The current position within the current byte (0-7). + * Starts at -1 to indicate that no byte has been read yet. + * Bit 7 is the most significant bit, bit 0 is the least significant. + */ + private int currentBytePointer = -1; + + /** + * Creates a new BitInputStream that reads bits from the specified + * input stream. + * + * @param inputStream the underlying input stream to read bytes from. + * Must not be null. + */ + public BitInputStream(final InputStream inputStream) { + this.inputStream = inputStream; + } + + /** + * Reads the specified number of bits from the input stream. + * + *

Bits are read from most significant to least significant. + * The returned value is assembled with the first bit read becoming + * the most significant bit of the result.

+ * + *

If the end of the stream is reached while reading, the remaining + * bits will be zero-filled. Reading zero bits returns zero.

+ * + *

When crossing byte boundaries, the method automatically reads + * the next byte from the underlying stream.

+ * + * @param bitCount the number of bits to read (0 to 32). + * Values greater than 32 may produce unexpected results. + * @return the assembled bits as an integer value, with the first bit + * read becoming the most significant bit of the result. + * @throws IOException if an I/O error occurs while reading from + * the underlying stream. + */ + public int readBits(final int bitCount) throws IOException { + + int readableByte = 0; + for (int i = 0; i < bitCount; i++) { + + readableByte = readableByte << 1; + + if (currentBytePointer == -1) { + currentBytePointer = 7; + currentByte = inputStream.read(); + } + + int mask = 1; + mask = mask << currentBytePointer; + + final int currentBit = currentByte & mask; + + if (currentBit != 0) + readableByte = readableByte | 1; + + currentBytePointer--; + } + return readableByte; + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/data/BitOutputStream.java b/src/main/java/eu/svjatoslav/commons/data/BitOutputStream.java new file mode 100755 index 0000000..0149a98 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/data/BitOutputStream.java @@ -0,0 +1,144 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.data; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * Writes individual bits to an output stream. + * + *

This class provides bit-level writing capabilities, allowing you to + * write any number of bits to an underlying output stream. Bits are written + * from most significant to least significant within each byte.

+ * + *

Writing starts from the most significant bit (bit 7) of each byte and + * proceeds to the least significant bit (bit 0). When a byte is complete + * (8 bits written), it is flushed to the underlying stream automatically.

+ * + *

Example usage:

+ *
{@code
+ * ByteArrayOutputStream output = new ByteArrayOutputStream();
+ * BitOutputStream bos = new BitOutputStream(output);
+ *
+ * bos.storeBits(0b101, 3);      // Writes 3 bits: 101
+ * bos.storeBits(0b11110000, 8); // Writes 8 bits: 11110000
+ * bos.storeBits(0b11, 2);       // Writes 2 bits: 11
+ * bos.finishByte();             // Flushes partial byte if any
+ *
+ * byte[] result = output.toByteArray();
+ * }
+ * + *

This class is useful for applications that create bit-packed data + * formats, such as custom compression algorithms, network protocols, or + * binary file formats where data is not byte-aligned.

+ * + *

Important: Always call {@link #finishByte()} when done + * writing to ensure any partially written byte is flushed to the output stream.

+ * + * @see BitInputStream + */ +public class BitOutputStream { + + /** + * The underlying output stream where assembled bytes are written. + * Bytes are accumulated from individual bits before being written. + */ + private final OutputStream outputStream; + + /** + * The current byte being assembled from individual bits. + * Bits are added from most significant to the least significant position. + */ + private int currentByte; + + /** + * The current position within the current byte (0-7). + * Counts how many bits have been written to the current byte. + * When it reaches 8, the byte is flushed and the counter resets to 0. + */ + private int currentBytePointer; + + /** + * Creates a new BitOutputStream that writes bits to the specified + * output stream. + * + *

The output stream is not modified until at least 8 bits have + * been written, or {@link #finishByte()} is called.

+ * + * @param outputStream the underlying output stream to write bytes to. + * Must not be null. + */ + public BitOutputStream(final OutputStream outputStream) { + currentByte = 0; + currentBytePointer = 0; + this.outputStream = outputStream; + } + + /** + * Finishes writing the current incomplete byte to the output stream. + * + *

If less than 8 bits have been written to the current byte, + * the remaining bits are filled with zeros and the byte is written. + * If exactly 8 bits have been written (byte already flushed), this + * method does nothing.

+ * + *

Important: Always call this method when finished + * writing to ensure all data is flushed to the output stream. Otherwise, + * the last partial byte may be lost.

+ * + * @throws IOException if an I/O error occurs while writing to + * the underlying stream. + */ + public void finishByte() throws IOException { + if (currentBytePointer != 0) { + outputStream.write(currentByte); + currentBytePointer = 0; + } + } + + /** + * Writes the specified number of bits from the data value to the output stream. + * + *

Bits are taken from the most significant positions of the data value. + * For example, calling {@code storeBits(0b1011, 3)} will write the 3 most + * significant bits: 101.

+ * + *

Bits are written from most significant to least significant within + * each byte. When a byte is completed (8 bits), it is automatically + * flushed to the underlying stream.

+ * + *

Writing zero bits does nothing and does not advance the byte pointer.

+ * + * @param data the integer value containing the bits to write. + * Only the specified number of most significant bits are used. + * @param bitCount the number of bits to write (0 to 32). + * Values greater than 32 may produce unexpected results. + * @throws IOException if an I/O error occurs while writing to + * the underlying stream. + */ + public void storeBits(final int data, final int bitCount) + throws IOException { + for (int i = bitCount - 1; i >= 0; i--) { + + int mask = 1; + mask = mask << i; + + final int currentBit = data & mask; + currentByte = currentByte << 1; + + if (currentBit != 0) + currentByte = currentByte | 1; + currentBytePointer++; + + if (currentBytePointer == 8) { + currentBytePointer = 0; + outputStream.write(currentByte); + currentByte = 0; + } + } + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/data/HexConverter.java b/src/main/java/eu/svjatoslav/commons/data/HexConverter.java new file mode 100755 index 0000000..a641d26 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/data/HexConverter.java @@ -0,0 +1,91 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.data; + +/** + * Converts byte arrays to hexadecimal string representation. + * + *

This utility class provides a simple method to convert binary data + * into a human-readable hexadecimal format. Each byte is converted to + * two uppercase hexadecimal characters (0-9, A-F).

+ * + *

Example usage:

+ *
{@code
+ * byte[] data = new byte[]{0x01, 0x02, 0xFF};
+ * String hex = HexConverter.byteArrayToHex(data);
+ * // Result: "0102FF"
+ *
+ * byte[] empty = new byte[]{};
+ * String emptyHex = HexConverter.byteArrayToHex(empty);
+ * // Result: ""
+ *
+ * String nullHex = HexConverter.byteArrayToHex(null);
+ * // Result: null
+ * }
+ * + *

This class is useful for:

+ * + * + */ +public class HexConverter { + + /** + * Private constructor to prevent instantiation. + * + *

This is a utility class with only static methods. It does not need + * to be instantiated.

+ */ + private HexConverter() {} + + /** + * The lookup table for hexadecimal characters (0-9, A-F). + * Each nibble (4 bits, value 0-15) maps to the corresponding + * hexadecimal character. + */ + static final String hexCharacters = "0123456789ABCDEF"; + + /** + * Converts a byte array to an uppercase hexadecimal string. + * + *

Each byte is converted to exactly two hexadecimal characters. + * The high nibble (bits 7-4) becomes the first character, and the + * low nibble (bits 3-0) becomes the second character.

+ * + *

Example conversions:

+ * + * + *

The result is always uppercase. Empty arrays return an empty string. + * Null arrays return null.

+ * + * @param bytes the byte array to convert. May be null or empty. + * @return an uppercase hexadecimal string representation, or null + * if the input was null. The string length is always twice + * the byte array length. + */ + public static String byteArrayToHex(final byte[] bytes) { + + if (bytes == null) + return null; + + final StringBuilder result = new StringBuilder(2 * bytes.length); + + for (final byte b : bytes) + result.append(hexCharacters.charAt((b & 0xF0) >> 4)).append( + hexCharacters.charAt((b & 0x0F))); + + return result.toString(); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/data/package-info.java b/src/main/java/eu/svjatoslav/commons/data/package-info.java new file mode 100644 index 0000000..b5bb4b5 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/data/package-info.java @@ -0,0 +1,28 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ + +/** + * Provides utility classes for binary data manipulation and conversion. + * + *

This package contains classes for working with data at the bit level + * and converting binary data to human-readable formats:

+ * + * + * + *

These utilities are useful for applications that need fine-grained + * control over binary data, such as compression algorithms, custom file + * formats, or network protocols.

+ * + * @author Svjatoslav Agejenko + * @since 1.0 + * @see eu.svjatoslav.commons.data.BitInputStream + * @see eu.svjatoslav.commons.data.BitOutputStream + * @see eu.svjatoslav.commons.data.HexConverter + */ +package eu.svjatoslav.commons.data; \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/file/CommonPathResolver.java b/src/main/java/eu/svjatoslav/commons/file/CommonPathResolver.java new file mode 100755 index 0000000..0695c16 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/file/CommonPathResolver.java @@ -0,0 +1,72 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.file; + +import java.io.File; + +/** + * Resolves common system paths and directories. + * + *

This utility class provides static methods to obtain common file system + * paths that are frequently needed in applications, such as the user's home + * directory, configuration directories, or temporary directories.

+ * + *

Currently provides:

+ * + * + *

Example usage:

+ *
{@code
+ * File homeDir = CommonPathResolver.getHomeDirectory();
+ * File configFile = new File(homeDir, ".myapp/config.properties");
+ * }
+ * + *

This class is designed to be extended with additional common path + * resolution methods as needed.

+ * + */ +public class CommonPathResolver { + + /** + * Private constructor to prevent instantiation. + * + *

This is a utility class with only static methods. It does not need + * to be instantiated.

+ */ + private CommonPathResolver() {} + + /** + * Returns the current user's home directory. + * + *

This is typically the personal directory of the logged-in user, + * such as:

+ * + * + *

This is a convenient location for storing user-specific configuration + * files, documents, and application data.

+ * + *

Example usage:

+ *
{@code
+     * File home = CommonPathResolver.getHomeDirectory();
+     * System.out.println("Home directory: " + home.getAbsolutePath());
+     *
+     * // Create an application config directory
+     * File configDir = new File(home, ".myapp");
+     * configDir.mkdirs();
+     * }
+ * + * @return a File object representing the user's home directory. + * The directory typically exists and is readable/writable + * by the current user. + */ + public static File getHomeDirectory() { + return new File(System.getProperty("user.home")); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/file/FilePathParser.java b/src/main/java/eu/svjatoslav/commons/file/FilePathParser.java new file mode 100755 index 0000000..136f808 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/file/FilePathParser.java @@ -0,0 +1,219 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.file; + +import java.io.File; + +/** + * Utility class for parsing file paths and extracting file metadata. + * + *

This class provides static methods for extracting file extensions, + * file names without extensions, and formatting file sizes for display. + * All methods are simple utilities that operate on file names or paths.

+ * + *

Key features:

+ * + * + *

Example usage:

+ *
{@code
+ * // Get file extension
+ * String ext = FilePathParser.getFileExtension("document.PDF");
+ * // Result: "pdf" (lowercase)
+ *
+ * // Get file name without extension
+ * String name = FilePathParser.getFileNameWithoutExtension(new File("image.jpg"));
+ * // Result: "image"
+ *
+ * // Format file size
+ * String size = FilePathParser.getFileSizeDescription(15728640);
+ * // Result: "15 MiB"
+ * }
+ * + *

Note on extensions: For files with multiple dots, + * only the last dot-separated segment is considered the extension. + * For example, "archive.tar.gz" has extension "gz", not "tar.gz".

+ */ +public class FilePathParser { + + /** + * Private constructor to prevent instantiation. + * + *

This is a utility class with only static methods. It does not need + * to be instantiated.

+ */ + private FilePathParser() {} + + /** + * Returns the file extension from a File object. + * + *

The extension is extracted from the file name (not the full path) + * and returned in lowercase. Files without an extension return an + * empty string.

+ * + *

Example results:

+ * + * + * @param file the file to extract the extension from. Must not be null. + * @return the file extension in lowercase, or an empty string if the + * file has no extension (no dot in the name, or dot at the start). + */ + public static String getFileExtension(final File file) { + final String fullFileName = file.getName(); + + return getFileExtension(fullFileName); + } + + /** + * Returns the file extension from a file name string. + * + *

The extension is extracted from the last dot-separated segment + * and returned in lowercase. File names without an extension return + * an empty string.

+ * + *

Example results:

+ * + * + * @param fullFileName the file name (not full path) to parse. + * Must not be null. + * @return the file extension in lowercase, or an empty string if the + * file has no extension. + */ + public static String getFileExtension(final String fullFileName) { + final int dot = fullFileName.lastIndexOf('.'); + String fileExtension; + if (dot == -1) + fileExtension = ""; + else { + fileExtension = fullFileName.substring(dot + 1); + fileExtension = fileExtension.toLowerCase(); + } + + return fileExtension; + } + + /** + * Returns the file name without its extension from a File object. + * + *

The base name is everything before the last dot. If the file + * has no extension, the entire file name is returned.

+ * + *

Example results:

+ * + * + * @param file the file to extract the base name from. Must not be null. + * @return the file name without the extension. Returns the complete + * file name if there is no extension. + */ + public static String getFileNameWithoutExtension(final File file) { + final String fullFileName = file.getName(); + return getFileNameWithoutExtension(fullFileName); + } + + /** + * Returns the file name without its extension from a file name string. + * + *

The base name is everything before the last dot. If the file + * has no extension, the entire file name is returned.

+ * + *

Example results:

+ * + * + * @param fullFileName the file name (not full path) to parse. + * Must not be null. + * @return the file name without the extension. Returns the complete + * file name if there is no extension. + */ + public static String getFileNameWithoutExtension(final String fullFileName) { + final int dot = fullFileName.lastIndexOf('.'); + String fileName; + if (dot == -1) + fileName = fullFileName; + else + fileName = fullFileName.substring(0, dot); + + return fileName; + } + + /** + * Returns a human-readable description of a file size using binary units. + * + *

The size is formatted with appropriate binary units (IEC standard). + * Larger units are used only when the size is at least 5 units of that size:

+ * + * + * + * + * + * + * + * + * + *
Size unit thresholds
UnitThreshold
bytes (b)< 5 KiB (5120 bytes)
kibibytes (KiB)≥ 5 KiB, < 5 MiB
mebibytes (MiB)≥ 5 MiB, < 5 GiB
gibibytes (GiB)≥ 5 GiB, < 5 TiB
tebibytes (TiB)≥ 5 TiB, < 5 PiB
pebibytes (PiB)≥ 5 PiB
+ * + *

Example results:

+ * + * + *

Note: This method uses binary units (KiB = 1024 bytes, + * MiB = 1024 KiB, etc.) per the IEC standard, not SI units (KB = 1000 bytes).

+ * + * @param fileSize the file size in bytes. Negative values will produce + * unexpected results. + * @return a human-readable size description with binary units. + */ + public static String getFileSizeDescription(long fileSize) { + final long KIB = 1L << 10; + final long MIB = 1L << 20; + final long GIB = 1L << 30; + final long TIB = 1L << 40; + final long PIB = 1L << 50; + + if (fileSize >= 5L * PIB) { + return (fileSize / PIB) + " PiB"; + } else if (fileSize >= 5L * TIB) { + return (fileSize / TIB) + " TiB"; + } else if (fileSize >= 5L * GIB) { + return (fileSize / GIB) + " GiB"; + } else if (fileSize >= 5L * MIB) { + return (fileSize / MIB) + " MiB"; + } else if (fileSize >= 5L * KIB) { + return (fileSize / KIB) + " KiB"; + } else { + return fileSize + " b"; + } + } +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/file/IOHelper.java b/src/main/java/eu/svjatoslav/commons/file/IOHelper.java new file mode 100755 index 0000000..af2a23e --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/file/IOHelper.java @@ -0,0 +1,292 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.file; + +import java.io.*; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.file.Files.isSymbolicLink; + +/** + * Utility class for common file system operations. + * + *

This class provides static methods for file I/O operations including: + * reading and writing files, recursive directory deletion, and smart file + * overwriting that only writes when content differs.

+ * + *

Key features:

+ * + * + *

Example usage:

+ *
{@code
+ * // Write and read a text file
+ * File file = new File("example.txt");
+ * IOHelper.saveToFile(file, "Hello, World!");
+ * String content = IOHelper.getFileContentsAsString(file);
+ *
+ * // Smart overwriting - only writes if content differs
+ * IOHelper.overwriteFileIfContentDiffers(file, "New content".getBytes());
+ *
+ * // Delete a directory recursively
+ * File directory = new File("temp");
+ * IOHelper.deleteRecursively(directory);
+ * }
+ * + *

Note on symbolic links: The {@link #deleteRecursively(File)} + * method handles symbolic links safely. When a symlink points to a directory, + * the directory's contents are not deleted - only the symlink itself is removed. + * This prevents accidental deletion of data outside the intended scope.

+ * + */ +public class IOHelper { + + /** + * Private constructor to prevent instantiation. + * + *

This is a utility class with only static methods. It does not need + * to be instantiated.

+ */ + private IOHelper() {} + + /** + * Deletes a file or directory recursively. Does not follow symbolic links. + * + *

This method safely handles:

+ * + * + *

The non-following of symlinks is an important safety feature. If a + * symlink points to an important directory outside the scope of deletion, + * that directory's contents remain safe.

+ * + *

Example usage:

+ *
{@code
+     * // Delete a file
+     * IOHelper.deleteRecursively(new File("temp.txt"));
+     *
+     * // Delete a directory with all contents
+     * IOHelper.deleteRecursively(new File("tempDir"));
+     *
+     * // Delete a symlink (target remains intact)
+     * IOHelper.deleteRecursively(new File("linkToOtherDir"));
+     * }
+ * + * @param file the file, directory, or symbolic link to delete. + * Must not be null. + * @throws IOException if a filesystem error occurs during deletion, + * such as permission denied or file in use. + */ + public static void deleteRecursively(final File file) throws IOException { + + if (file.isDirectory()) { + deleteDirectory(file); + return; + } + + if (file.isFile()){ + if (!file.delete()) throw new IOException("Failed to delete file: " + file); + } else { + if (isSymbolicLink(file.toPath()) && !file.delete()) + throw new IOException("Failed to delete symlink: " + file); + } + } + + /** + * Deletes a directory and all its contents recursively. + * + *

If the directory is a symbolic link, only the link is deleted + * (not the target directory's contents). This prevents accidental + * deletion of data outside the intended scope.

+ * + * @param file the directory to delete. Must not be null. + * @throws IOException if a filesystem error occurs, such as + * failure to read directory contents or delete files. + */ + private static void deleteDirectory(File file) throws IOException { + // if file is symlink that points to directory, do not touch content + if (!isSymbolicLink(file.toPath())){ + File[] files = file.listFiles(); + if (files == null) + throw new IOException("Failed to read directory content for: " + + file); + + for (final File subFile : files) + deleteRecursively(subFile); + } + + if (!file.delete()) + throw new IOException("Failed to delete directory: " + file); + } + + /** + * Reads the entire contents of a file as a byte array. + * + *

The file must exist and its size must fit in memory. This method + * is suitable for small to medium-sized files. For large files, consider + * using streaming approaches instead.

+ * + *

The returned array has exactly the same length as the file size.

+ * + * @param file the file to read. Must exist and be readable. + * @return the complete file contents as a byte array. + * @throws IOException if an I/O error occurs, such as file not found + * or permission denied. + * @throws RuntimeException if the file could not be fully read + * (unexpected EOF). + */ + public static byte[] getFileContents(final File file) + throws IOException { + + final byte[] result = new byte[(int) file.length()]; + try (final FileInputStream fileInputStream = new FileInputStream(file)) { + if (fileInputStream.read(result) != result.length) + throw new RuntimeException("Could not read file content:" + file); + } + return result; + } + + /** + * Reads the entire contents of a file as a UTF-8 encoded string. + * + *

The file content is assumed to be UTF-8 encoded. This is the + * standard encoding for text files in most modern applications.

+ * + *

The file must exist and its size must fit in memory. This method + * is suitable for text files of reasonable size.

+ * + *

Example usage:

+ *
{@code
+     * File file = new File("config.txt");
+     * String config = IOHelper.getFileContentsAsString(file);
+     * }
+ * + * @param file the file to read. Must exist and be readable. + * @return the file contents as a UTF-8 decoded string. + * @throws IOException if an I/O error occurs during file reading. + */ + public static String getFileContentsAsString(final File file) + throws IOException { + try { + return new String(getFileContents(file), UTF_8); + } catch (final UnsupportedEncodingException exception) { + throw new RuntimeException(exception); + } + } + + /** + * Compares new content with existing file content and overwrites + * only if different. + * + *

This method performs a byte-by-byte comparison with the existing + * file content. If the content is identical, no write operation occurs, + * saving disk I/O and preserving file timestamps.

+ * + *

This is useful for:

+ * + * + *

If the file does not exist, it is created and the content is written.

+ * + *

Example usage:

+ *
{@code
+     * byte[] newConfig = generateConfig().getBytes();
+     * boolean changed = IOHelper.overwriteFileIfContentDiffers(
+     *     new File("config.xml"), newConfig);
+     * if (changed) {
+     *     System.out.println("Configuration updated");
+     * } else {
+     *     System.out.println("No changes needed");
+     * }
+     * }
+ * + * @param file the file to potentially overwrite. If it does not exist, + * it will be created. + * @param newContent the new content to potentially write. Must not be null. + * @return {@code true} if the file was overwritten (content differed or + * file was created), {@code false} if the content was identical + * and no write occurred. + * @throws FileNotFoundException if the file cannot be found for reading + * (when comparing existing content). + * @throws IOException if an I/O error occurs during reading or writing. + */ + public static boolean overwriteFileIfContentDiffers(final File file, + final byte[] newContent) throws IOException { + + checkForEquality: + { + if (file.length() == newContent.length) { + + final byte[] oldContent = getFileContents(file); + + for (int i = 0; i < newContent.length; i++) + if (newContent[i] != oldContent[i]) + break checkForEquality; + + // new file content is identical to old content + return false; + } + } + + // New content differs from existing. Overwrite file. + saveToFile(file, newContent); + return true; + } + + /** + * Saves byte content to a file, overwriting any existing content. + * + *

This method writes the complete byte array to the file. If the + * file exists, it is overwritten. If it does not exist, it is created.

+ * + *

This is a straightforward write operation - use + * {@link #overwriteFileIfContentDiffers(File, byte[])} if you want to + * avoid unnecessary writes when content is unchanged.

+ * + * @param file the file to write to. Parent directories must exist. + * @param content the byte content to write. Must not be null. + * @throws IOException if an I/O error occurs during writing. + */ + public static void saveToFile(final File file, final byte[] content) + throws IOException { + try (final FileOutputStream fos = new FileOutputStream(file)) { + fos.write(content); + } + } + + /** + * Saves string content to a file using UTF-8 encoding, overwriting + * any existing content. + * + *

The string is encoded as UTF-8 before writing. If the file exists, + * it is overwritten. If it does not exist, it is created.

+ * + *

This is a convenient method for writing text files:

+ *
{@code
+     * IOHelper.saveToFile(new File("output.txt"), "Hello, World!");
+     * }
+ * + * @param file the file to write to. Parent directories must exist. + * @param content the string content to write, encoded as UTF-8. + * Must not be null. + * @throws IOException if an I/O error occurs during writing. + */ + public static void saveToFile(final File file, final String content) + throws IOException { + saveToFile(file, content.getBytes(UTF_8)); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/file/package-info.java b/src/main/java/eu/svjatoslav/commons/file/package-info.java new file mode 100644 index 0000000..d830d3a --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/file/package-info.java @@ -0,0 +1,30 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ + +/** + * Provides utility classes for file system operations and path handling. + * + *

This package contains classes for common file operations:

+ * + * + * + *

These utilities simplify common file operations that would otherwise require + * repetitive boilerplate code. The {@link eu.svjatoslav.commons.file.IOHelper} class + * in particular handles edge cases like symbolic links and recursive directory deletion.

+ * + * @author Svjatoslav Agejenko + * @since 1.0 + * @see eu.svjatoslav.commons.file.IOHelper + * @see eu.svjatoslav.commons.file.FilePathParser + * @see eu.svjatoslav.commons.file.CommonPathResolver + */ +package eu.svjatoslav.commons.file; \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/gui/dialog/ExceptionDialog.java b/src/main/java/eu/svjatoslav/commons/gui/dialog/ExceptionDialog.java new file mode 100755 index 0000000..9dc30a4 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/gui/dialog/ExceptionDialog.java @@ -0,0 +1,175 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.gui.dialog; + +import javax.swing.*; +import java.awt.*; + +import static javax.swing.BoxLayout.Y_AXIS; + +/** + * A graphical dialog for displaying exceptions with stack traces. + * + *

This Swing-based dialog provides a user-friendly way to display + * exceptions and their stack traces in graphical applications. It shows:

+ * + * + * + *

Example usage:

+ *
{@code
+ * try {
+ *     // some operation that might fail
+ * } catch (Exception e) {
+ *     new ExceptionDialog(e);
+ * }
+ * }
+ * + *

The dialog opens a new JFrame window with the exception details. + * The window is 800x600 pixels and centered on screen.

+ * + *

Note: This class requires a graphical environment + * and Swing libraries. It is not suitable for headless or server-side + * applications. For non-graphical error reporting, use standard logging + * or console output instead.

+ * + */ +public class ExceptionDialog { + + /** + * Creates and displays an exception dialog for the given exception. + * + *

The dialog is shown immediately upon construction. This is a + * convenience for quick exception display without additional setup.

+ * + * @param exception the exception to display. Must not be null. + */ + public ExceptionDialog(final Exception exception) { + showException(exception); + } + + /** + * Main method for testing the exception dialog. + * + *

This method creates a sample exception with a cause and displays + * it in the dialog, allowing developers to test the dialog appearance + * without needing actual exceptions.

+ * + * @param args command line arguments (ignored). + */ + public static void main(final String[] args) { + + final Throwable cause = new Throwable("details....."); + + final Exception exception = new Exception("test", cause); + + new ExceptionDialog(exception); + } + + /** + * Displays the exception in a new JFrame window. + * + *

Creates a window with:

+ * + * + *

The window is sized 800x600 pixels and centered on the screen. + * It closes when the user clicks the close button (DISPOSE_ON_CLOSE).

+ * + * @param exception the exception to display. Must not be null. + */ + public void showException(final Exception exception) { + final JPanel contentPanel = new JPanel(new BorderLayout()); + contentPanel.add(getTopPanel(exception), BorderLayout.NORTH); + contentPanel.add(getStackTraceView(exception.getCause()), BorderLayout.CENTER); + + final JFrame frame = new JFrame("Exception occurred!"); + frame.getContentPane().add(contentPanel); + frame.setSize(800, 600); + frame.setVisible(true); + frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); + frame.setLocationRelativeTo(null); // center frame on screen + } + + /** + * Creates a text area displaying the stack trace. + * + *

If a cause is provided, shows the cause's stack trace. Otherwise, + * shows a placeholder message and the current stack trace for context.

+ * + * @param cause the cause of the exception, or null if no cause is available. + * @return a TextArea containing the stack trace text. + */ + private TextArea getStackTraceView(Throwable cause) { + final StringBuilder buffer = new StringBuilder(); + + if (cause != null) { + // if cause is available, show original stack trace + buffer.append("Stack trace:\n"); + final StackTraceElement[] stackTrace = cause.getStackTrace(); + enlistStackTraceElements(buffer, stackTrace); + } else { + // otherwise show at least current stack trace + buffer.append("Stack trace from original cause is not available.\n" + + "Showing current stack trace instead:\n"); + enlistStackTraceElements(buffer, new Exception("Stack trace").getStackTrace()); + } + + return new TextArea(buffer.toString()); + } + + /** + * Appends stack trace elements to a StringBuilder buffer. + * + *

Each element is converted to a string and appended on a new line.

+ * + * @param buffer the StringBuilder to append to. Must not be null. + * @param stackTrace the array of stack trace elements to append. + * May be empty but must not be null. + */ + private void enlistStackTraceElements(StringBuilder buffer, StackTraceElement[] stackTrace) { + for (final StackTraceElement stackTraceElement : stackTrace) + buffer.append(stackTraceElement.toString()).append("\n"); + } + + /** + * Creates a panel showing exception summary information. + * + *

The panel contains JLabels for:

+ * + * + * @param exception the exception to display information for. Must not be null. + * @return a JPanel containing the exception summary labels. + */ + private JPanel getTopPanel(Exception exception) { + + final JPanel topPanel = new JPanel(); + topPanel.setLayout(new BoxLayout(topPanel, Y_AXIS)); + + // add an exception type + topPanel.add( + new JLabel("Exception type: " + exception.getClass().getCanonicalName())); + + // add an error message + topPanel.add(new JLabel("Error message: " + exception.getMessage())); + + // add a cause message + Throwable cause = exception.getCause(); + if (cause != null && cause.getMessage() != null) + topPanel.add(new JLabel("Cause: " + cause.getMessage())); + return topPanel; + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/gui/dialog/package-info.java b/src/main/java/eu/svjatoslav/commons/gui/dialog/package-info.java new file mode 100644 index 0000000..041e2a9 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/gui/dialog/package-info.java @@ -0,0 +1,28 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ + +/** + * Provides Swing-based dialog components for graphical user interfaces. + * + *

This package contains utility dialogs for displaying information + * to users in graphical applications:

+ * + * + * + *

These dialogs are designed to be simple to use and provide immediate + * visual feedback for application errors, making debugging and user + * communication easier in GUI applications.

+ * + *

Note: This package requires a graphical environment and Swing libraries. + * It is not suitable for headless or server-side applications.

+ * + * @author Svjatoslav Agejenko + * @since 1.0 + * @see eu.svjatoslav.commons.gui.dialog.ExceptionDialog + */ +package eu.svjatoslav.commons.gui.dialog; \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/GlobMatcher.java b/src/main/java/eu/svjatoslav/commons/string/GlobMatcher.java new file mode 100755 index 0000000..dc62e8c --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/GlobMatcher.java @@ -0,0 +1,205 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string; + +/** + * Matches strings against glob-style wildcard patterns. + * + *

This class provides simple glob pattern matching similar to shell + * wildcards, using two special characters:

+ * + * + * + *

All other characters match literally (case-sensitive).

+ * + *

Example patterns and matches:

+ *
{@code
+ * "*.txt" matches: "file.txt", "document.txt", ".txt"
+ * "*.txt" does NOT match: "file.TXT" (case-sensitive)
+ *
+ * "file?.txt" matches: "file1.txt", "fileA.txt", "file_.txt"
+ * "file?.txt" does NOT match: "file.txt", "file12.txt"
+ *
+ * "test*" matches: "test", "testing", "test123"
+ * "*test" matches: "test", "mytest", "123test"
+ *
+ * "*_*" matches: "a_b", "hello_world", "_"
+ * }
+ * + *

This is simpler than regular expressions but useful for basic + * pattern matching in file name filtering, simple validation, or + * user-friendly pattern input.

+ * + *

Example usage:

+ *
{@code
+ * if (GlobMatcher.match("document.TXT", "*.txt")) {
+ *     // This will NOT match (case-sensitive)
+ * }
+ *
+ * if (GlobMatcher.match("image.jpg", "*.jpg")) {
+ *     // This will match
+ * }
+ *
+ * if (GlobMatcher.match("file123", "file?*")) {
+ *     // This will match (? for one char, * for remaining)
+ * }
+ * }
+ * + *

Null handling: Both string and pattern arguments + * are handled gracefully - if either is null, the result is {@code false}.

+ */ +public class GlobMatcher { + + /** + * Private constructor to prevent instantiation. + * + *

This is a utility class with only static methods. It does not need + * to be instantiated.

+ */ + private GlobMatcher() {} + + /** + * Checks if the remaining portion of a wildcard expression consists + * only of asterisks. + * + *

This is used during matching to determine if a partial match + * can succeed. If only asterisks remain, any remaining input string + * content will match.

+ * + * @param wildcardExpression the wildcard pattern to check. + * @param wildCardPosition the starting position within the pattern. + * @return {@code true} if all remaining characters are asterisks, + * {@code false} otherwise. + */ + private static boolean checkWildCardEnd(final String wildcardExpression, + int wildCardPosition) { + for (; wildCardPosition < wildcardExpression.length(); wildCardPosition++) { + final char wildCardChar = wildcardExpression + .charAt(wildCardPosition); + if (wildCardChar != '*') + return false; + } + + return true; + } + + /** + * Tests if an input string matches a wildcard expression. + * + *

The wildcard expression uses:

+ * + * + *

All other characters must match exactly (case-sensitive).

+ * + *

Matching rules:

+ * + * + *

Example:

+ *
{@code
+     * GlobMatcher.match("file.txt", "*.txt");     // true
+     * GlobMatcher.match("file.TXT", "*.txt");     // false (case)
+     * GlobMatcher.match("file1", "file?");        // true
+     * GlobMatcher.match("file", "file?");         // false (too short)
+     * GlobMatcher.match("", "*");                 // true
+     * GlobMatcher.match("", "?");                 // false
+     * GlobMatcher.match(null, "*");               // false
+     * }
+ * + * @param inputString the string to test against the pattern. + * May be null (returns false). + * @param wildcardExpression the wildcard pattern to match against. + * May be null (returns false). + * @return {@code true} if the input string matches the wildcard pattern, + * {@code false} if it does not match or either argument is null. + */ + public static boolean match(final String inputString, + final String wildcardExpression) { + + if (inputString == null) + return false; + + if (wildcardExpression == null) + return false; + + int i; + + for (i = 0; i < inputString.length(); i++) { + if (i >= wildcardExpression.length()) + return false; + final char wildCardChar = wildcardExpression.charAt(i); + if (wildCardChar == '*') + return matchPiece(inputString, i, wildcardExpression, i + 1); + if (wildCardChar != '?') + if (inputString.charAt(i) != wildCardChar) + return false; + } + + return checkWildCardEnd(wildcardExpression, i); + } + + /** + * Handles matching after encountering an asterisk in the pattern. + * + *

This method attempts to match the remaining input against the + * remaining pattern, accounting for the asterisk's ability to match + * any number of characters. It tries multiple starting positions + * to find a valid match.

+ * + * @param inputString the input string being matched. + * @param inputStringIndex the current position in the input string. + * @param wildcardExpression the wildcard pattern. + * @param wildCardExpressionIndex the position after the asterisk in the pattern. + * @return {@code true} if a valid match is found, {@code false} otherwise. + */ + private static boolean matchPiece(final String inputString, + final int inputStringIndex, final String wildcardExpression, + final int wildCardExpressionIndex) { + + int wildCardPosition = wildCardExpressionIndex; + + for (int i = inputStringIndex; i < inputString.length(); i++) { + + wildCardPosition = wildCardExpressionIndex; + + subMatchAttempt: + { + + for (int j = i; j < inputString.length(); j++) { + if (wildCardPosition >= wildcardExpression.length()) + break subMatchAttempt; + final char wildCardChar = wildcardExpression + .charAt(wildCardPosition); + + if (wildCardChar == '*') + return matchPiece(inputString, j, wildcardExpression, + wildCardPosition + 1); + + if (wildCardChar != '?') + if (inputString.charAt(j) != wildCardChar) + break subMatchAttempt; + + wildCardPosition++; + } + + return checkWildCardEnd(wildcardExpression, wildCardPosition); + } + + } + + return checkWildCardEnd(wildcardExpression, wildCardPosition); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/String2.java b/src/main/java/eu/svjatoslav/commons/string/String2.java new file mode 100755 index 0000000..d15a29c --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/String2.java @@ -0,0 +1,579 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A mutable string builder optimized for prefix and suffix operations. + * + *

String2 provides a more intuitive alternative to {@link StringBuilder} + * for operations involving prefixes and suffixes. It uses a fluent API + * that allows method chaining for concise string manipulation.

+ * + *

Key features:

+ * + * + *

Example usage:

+ *
{@code
+ * String2 s = new String2("world");
+ * s.prepend("Hello, ").append("!");
+ * // Result: "Hello, world!"
+ *
+ * s.trimSuffix(1).append(" wonderful world!");
+ * // Result: "Hello, world wonderful world!"
+ *
+ * String2 path = new String2("/path/to/file");
+ * path.trimPrefixIfExists("/").prepend("./");
+ * // Result: "./path/to/file"
+ * }
+ * + *

All methods (except getters) return the same String2 instance, enabling + * fluent method chaining:

+ *
{@code
+ * String result = new String2()
+ *     .append("hello")
+ *     .appendWithSeparator(" ", "world")
+ *     .prepend("Say: ")
+ *     .append("!")
+ *     .toString();
+ * }
+ */ +public class String2 { + + /** + * The internal list of characters that make up the string. + * Characters are stored individually in an ArrayList, allowing + * efficient insertion at the beginning (prepend) and removal + * from either end (trimPrefix/trimSuffix). + */ + private final List chars = new ArrayList<>(); + + /** + * Clears all characters from this string. + * + *

After calling this method, the string becomes empty (length 0). + * The fluent return allows immediate rebuilding of the string.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("hello");
+     * s.clear().append("world");
+     * // Result: "world"
+     * }
+ * + * @return this String2 instance for fluent method chaining. + */ + public String2 clear(){ + chars.clear(); + return this; + } + + /** + * Creates a new String2 initialized with the given value. + * + *

If the value is null, an empty String2 is created.

+ * + * @param value the initial string content. May be null. + */ + public String2(String value) { + append(value); + } + + /** + * Creates an empty String2. + * + *

The string has zero length initially. Use {@link #append(String)} + * or {@link #prepend(String)} to add content.

+ */ + public String2() { + } + + /** + * Repeats the current string content the specified number of times. + * + *

The string is duplicated and appended to itself.

+ * + * + *

Example:

+ *
{@code
+     * String2 s = new String2("ab");
+     * s.repeat(3);
+     * // Result: "ababab"
+     *
+     * s.repeat(0);
+     * // Result: "" (empty)
+     * }
+ * + * @param count the number of times to repeat the content. + * If less than 1, the string is cleared. + * @return this String2 instance for fluent method chaining. + */ + public String2 repeat(int count){ + if (count > 0) { + String value = toString(); + + for (int i = 1; i < count; i++) + append(value); + } else { + clear(); + } + return this; + } + + /** + * Inserts text at the beginning of this string. + * + *

The prefix is added before all existing characters. If the + * prefix is null, no change occurs.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("world");
+     * s.prepend("Hello, ");
+     * // Result: "Hello, world"
+     * }
+ * + * @param prefix the text to prepend. May be null (no effect). + * @return this String2 instance for fluent method chaining. + */ + public String2 prepend(final String prefix) { + if (prefix == null) + return this; + + int i = 0; + for (final char c : prefix.toCharArray()) + chars.add(i++, c); + + return this; + } + + /** + * Appends text at the end of this string. + * + *

The suffix is added after all existing characters. If the + * suffix is null, no change occurs.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("Hello");
+     * s.append(", world!");
+     * // Result: "Hello, world!"
+     * }
+ * + * @param suffix the text to append. May be null (no effect). + * @return this String2 instance for fluent method chaining. + */ + public String2 append(final String suffix) { + if (suffix == null) + return this; + + for (final char c : suffix.toCharArray()) + chars.add(c); + + return this; + } + + /** + * Appends text with a separator, only adding the separator if + * the string is not empty. + * + *

This is useful for building comma-separated or space-separated + * lists where the separator should not appear before the first item.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2();
+     * s.appendWithSeparator(", ", "apple");
+     * s.appendWithSeparator(", ", "banana");
+     * s.appendWithSeparator(", ", "cherry");
+     * // Result: "apple, banana, cherry"
+     * }
+ * + * @param separator the separator to insert before the suffix if + * the string is not empty. Must not be null. + * @param suffix the text to append. Must not be null. + * @return this String2 instance for fluent method chaining. + */ + public String2 appendWithSeparator(String separator, final String suffix) { + if (!isEmpty()) + append(separator); + + append(suffix); + + return this; + } + + /** + * Appends the given string multiple times. + * + *

If times is zero, no change occurs. The string is appended + * repeatedly in sequence.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2();
+     * s.append("x", 5);
+     * // Result: "xxxxx"
+     * }
+ * + * @param s the string to append repeatedly. Must not be null. + * @param times the number of times to append. If zero, no change. + * @return this String2 instance for fluent method chaining. + */ + public String2 append(String s, int times) { + for (int i = 0; i < times; i++) append(s); + return this; + } + + + /** + * Removes the specified number of characters from the beginning of this string. + * + *

If cutAmount exceeds the string length, the entire string is cleared. + * If cutAmount is zero, no change occurs.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("hello world");
+     * s.trimPrefix(6);
+     * // Result: "world"
+     *
+     * s.trimPrefix(500);  // exceeds length
+     * // Result: "" (empty)
+     * }
+ * + * @param cutAmount the number of characters to remove from the beginning. + * @return this String2 instance for fluent method chaining. + */ + public String2 trimPrefix(final int cutAmount) { + + int actualCutAmount = cutAmount; + + if (actualCutAmount > getLength()) + actualCutAmount = getLength(); + + if (actualCutAmount > 0) chars.subList(0, actualCutAmount).clear(); + + return this; + } + + /** + * Removes the specified prefix if this string starts with it. + * + *

If the string does not have the given prefix, no change occurs. + * If the prefix is null, no change occurs.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("prefix_content");
+     * s.trimPrefixIfExists("prefix_");
+     * // Result: "content"
+     *
+     * s.trimPrefixIfExists("nonexistent");
+     * // Result: "content" (unchanged)
+     * }
+ * + * @param prefix the prefix to remove if present. May be null (no effect). + * @return this String2 instance for fluent method chaining. + */ + public String2 trimPrefixIfExists(String prefix) { + if (prefix == null) + return this; + + if (hasPrefix(prefix)) + trimPrefix(prefix.length()); + + return this; + } + + /** + * Removes the specified suffix if this string ends with it. + * + *

If the string does not have the given suffix, no change occurs.

+ * + *

Note: This method may throw an exception if the + * suffix is longer than the string. Use carefully with potentially + * long suffix strings.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("content_suffix");
+     * s.trimSuffixIfExists("_suffix");
+     * // Result: "content"
+     * }
+ * + * @param suffix the suffix to remove if present. Must not be null + * and must not be longer than the string. + * @return this String2 instance for fluent method chaining. + */ + public String2 trimSuffixIfExists(String suffix) { + if (hasSuffix(suffix)) + trimSuffix(suffix.length()); + + return this; + } + + /** + * Removes the specified number of characters from the end of this string. + * + *

If charsToTrim exceeds the string length, the entire string is cleared. + * If charsToTrim is zero, no change occurs.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("hello world");
+     * s.trimSuffix(6);
+     * // Result: "hello"
+     *
+     * s.trimSuffix(500);  // exceeds length
+     * // Result: "" (empty)
+     * }
+ * + * @param charsToTrim the number of characters to remove from the end. + * @return this String2 instance for fluent method chaining. + */ + public String2 trimSuffix(int charsToTrim) { + + if (charsToTrim > chars.size()) { + chars.clear(); + return this; + } + + for (int i = 0; i < charsToTrim; i++) + chars.remove(chars.size() - 1); + + return this; + } + + /** + * Checks if this string ends with the specified suffix. + * + *

Note: This method may throw an exception if the + * suffix is longer than the string. Ensure the suffix length is + * appropriate before calling.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("filename.txt");
+     * s.hasSuffix(".txt");  // true
+     * s.hasSuffix("txt");   // true
+     * }
+ * + * @param suffix the suffix to check for. Must not be null and must + * not be longer than the string. + * @return {@code true} if this string ends with the suffix, + * {@code false} otherwise. + */ + public boolean hasSuffix(String suffix) { + return contains(suffix, getLength() - suffix.length()); + } + + /** + * Checks if this string starts with the specified prefix. + * + *

Example:

+ *
{@code
+     * String2 s = new String2("Hello, world");
+     * s.hasPrefix("Hello");  // true
+     * s.hasPrefix("hello");  // false (case-sensitive)
+     * }
+ * + * @param prefix the prefix to check for. Must not be null. + * @return {@code true} if this string starts with the prefix, + * {@code false} otherwise. + */ + public boolean hasPrefix(String prefix) { + return contains(prefix, 0); + } + + /** + * Checks if a fragment appears at the specified position in this string. + * + *

This is a substring check starting at the given index. The fragment + * must match exactly (case-sensitive) at that position.

+ * + *

If the position plus fragment length exceeds the string length, + * {@code false} is returned.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("hello world");
+     * s.contains("hello", 0);   // true
+     * s.contains("world", 6);   // true
+     * s.contains("lo wo", 3);   // true
+     * s.contains("hello", 1);   // false
+     * }
+ * + * @param fragment the text to check for. Must not be null. + * @param index the starting position to check from (0-based). + * @return {@code true} if the fragment appears at that position, + * {@code false} otherwise. + */ + public boolean contains(String fragment, int index) { + if (index + fragment.length() > chars.size()) + return false; + + for (int i = 0; i < fragment.length(); i++) + if (chars.get(index + i) != fragment.charAt(i)) + return false; + + return true; + } + + /** + * Adjusts this string to have exactly the specified length. + * + *

If the string is longer than the target, excess characters are + * removed from the end. If the string is shorter, spaces are added + * at the end. If the length already matches, no change occurs.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("12345678");
+     * s.enforceLength(5);
+     * // Result: "12345"
+     *
+     * s.enforceLength(8);
+     * // Result: "12345   " (3 spaces added)
+     * }
+ * + *

This is useful for creating fixed-width columns or formatting + * text for tabular output.

+ * + * @param targetLength the exact length to enforce. + * @return this String2 instance for fluent method chaining. + */ + public String2 enforceLength(final int targetLength) { + if (getLength() > targetLength) + chars.subList(targetLength, getLength()).clear(); + else if (getLength() < targetLength) { + final int charactersToAdd = targetLength - getLength(); + for (int i = 0; i < charactersToAdd; i++) + chars.add(' '); + } + + return this; + } + + /** + * Returns the current length of this string. + * + * @return the number of characters in this string. + */ + public int getLength() { + return chars.size(); + } + + /** + * Returns a substring of this string. + * + *

The substring includes characters from startInclusive to + * endExclusive (exclusive end). This follows the standard Java + * substring convention.

+ * + *

Example:

+ *
{@code
+     * String2 s = new String2("hello world");
+     * s.getSubString(0, 5);   // "hello"
+     * s.getSubString(6, 11);  // "world"
+     * }
+ * + * @param startInclusive the starting index (inclusive, 0-based). + * @param endExclusive the ending index (exclusive). Must be greater + * than or equal to startInclusive. + * @return the substring as a new String object. + */ + public String getSubString(final int startInclusive, final int endExclusive) { + final char[] charArray = new char[endExclusive - startInclusive]; + + int j = 0; + for (int i = startInclusive; i < endExclusive; i++) { + charArray[j] = chars.get(i); + j++; + } + return new String(charArray); + } + + /** + * Checks if this string is empty. + * + * @return {@code true} if this string has zero length, + * {@code false} otherwise. + */ + public boolean isEmpty() { + return chars.size() == 0; + } + + /** + * Returns the string content as a standard Java String. + * + *

This creates a new String object containing all characters + * currently in this String2.

+ * + * @return the complete string content as a String. + */ + @Override + public String toString() { + return getSubString(0, chars.size()); + } + + + /** + * Extracts regex capture groups from a string. + * + *

This static utility method applies a regular expression to a + * string and returns all capture groups as an array.

+ * + *

Example:

+ *
{@code
+     * String[] groups = String2.getGroups(
+     *     "name: John, age: 25",
+     *     "name: (\\w+), age: (\\d+)");
+     * // groups[0] = "John"
+     * // groups[1] = "25"
+     * }
+ * + *

Note: The regex must match the string, otherwise + * this method will throw an exception. The returned array contains + * only capture groups (group 1 onwards), not the entire match.

+ * + * @param s the string to search. Must not be null. + * @param regexp the regular expression with capture groups. + * Must not be null and must match the string. + * @return an array of captured group strings. Length equals the + * number of capture groups in the regexp. + */ + public static String[] getGroups(String s, String regexp){ + Pattern pattern = Pattern.compile(regexp); + Matcher matcher = pattern.matcher(s); + + matcher.find(); + String[] result = new String[matcher.groupCount()]; + + for (int i = 0; i< result.length; i++){ + result[i] = matcher.group(i+1); + } + + return result; + } +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/package-info.java b/src/main/java/eu/svjatoslav/commons/string/package-info.java new file mode 100644 index 0000000..ae22a06 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/package-info.java @@ -0,0 +1,32 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ + +/** + * Provides utility classes for string manipulation and pattern matching. + * + *

This package contains classes for advanced string operations:

+ * + * + * + *

The {@link eu.svjatoslav.commons.string.String2} class provides a more intuitive + * alternative to {@link java.lang.StringBuilder} for operations involving prefixes + * and suffixes, with methods like {@code trimPrefix()}, {@code trimSuffix()}, + * {@code prepend()}, and {@code appendWithSeparator()}.

+ * + *

The {@link eu.svjatoslav.commons.string.GlobMatcher} implements glob-style pattern + * matching similar to shell wildcards, useful for file name filtering and simple + * pattern matching without regular expressions.

+ * + * @author Svjatoslav Agejenko + * @since 1.0 + * @see eu.svjatoslav.commons.string.String2 + * @see eu.svjatoslav.commons.string.GlobMatcher + */ +package eu.svjatoslav.commons.string; \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/InvalidSyntaxException.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/InvalidSyntaxException.java new file mode 100755 index 0000000..b913d8e --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/InvalidSyntaxException.java @@ -0,0 +1,57 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +/** + * Exception thrown when token parsing encounters unexpected content. + * + *

This exception is thrown by the tokenizer when expectations fail, + * such as:

+ * + * + * + *

Example handling:

+ *
{@code
+ * try {
+ *     tokenizer.expectAndConsumeNextStringToken("if");
+ *     // Parse if statement...
+ * } catch (InvalidSyntaxException e) {
+ *     System.err.println("Syntax error: " + e.getMessage());
+ *     // Error recovery or abort
+ * }
+ * }
+ * + *

The exception message describes what was expected and what was + * actually encountered, useful for error reporting to users.

+ * + * @see Tokenizer + */ +public class InvalidSyntaxException extends Exception { + + /** + * Serial version UID for serialization compatibility. + */ + private static final long serialVersionUID = 88294980027680555L; + + /** + * Creates a new InvalidSyntaxException with a descriptive message. + * + *

The message should describe what was expected and what was found, + * to help users understand the syntax error.

+ * + * @param cause a description of the syntax error. Must not be null. + */ + public InvalidSyntaxException(final String cause) { + super(cause); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java new file mode 100755 index 0000000..6480471 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java @@ -0,0 +1,192 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Defines a token boundary using a regular expression pattern. + * + *

A Terminator specifies how to identify and handle token boundaries + * in the tokenizer. Each terminator has:

+ * + * + * + *

Termination strategies:

+ * + * + *

Example:

+ *
{@code
+ * // Drop whitespace (don't return it as a token)
+ * Terminator whitespace = tokenizer.addTerminator(DROP, "\\s+");
+ *
+ * // Preserve quoted strings (return them as tokens)
+ * Terminator strings = tokenizer.addTerminator(PRESERVE, "\".*?\"", "string");
+ *
+ * // Preserve numbers with a group name
+ * Terminator numbers = tokenizer.addTerminator(PRESERVE, "\\d+", "number");
+ *
+ * // Temporarily disable a terminator
+ * whitespace.active = false;  // Now whitespace will be returned as tokens
+ * whitespace.active = true;   // Back to dropping whitespace
+ * }
+ * + *

Patterns are anchored to match only at the current position (the "^" + * anchor is prepended automatically).

+ * + * @see Tokenizer + * @see TokenizerMatch + */ +public class Terminator { + + /** + * The regular expression pattern that identifies this token boundary. + * + *

This is stored for reference (used in toString). The actual pattern + * used for matching is stored in {@link #pattern} with "^" prepended.

+ */ + String regexp; + + /** + * The strategy for handling matched tokens. + * + *

Determines whether matched text is returned as a token (PRESERVE) + * or silently discarded (DROP).

+ * + * @see TerminationStrategy + */ + public final TerminationStrategy termination; + + /** + * An optional group name for categorizing this token type. + * + *

When set, matched tokens can be identified by this group name + * using {@link TokenizerMatch#isGroup(String)}.

+ * + *

May be null for uncategorized tokens.

+ */ + public final String group; + + /** + * Flag indicating whether this terminator is active. + * + *

When false, this terminator is skipped during matching. Set this + * to temporarily disable a terminator without removing it.

+ * + *

Default value is {@code true}.

+ */ + public boolean active = true; + + /** + * The compiled regex pattern used for matching. + * + *

This is the regexp with "^" prepended to anchor it at the start. + * The pattern is compiled once for efficiency.

+ */ + public final Pattern pattern; + + /** + * Creates a new terminator with the specified configuration. + * + *

The regex pattern is anchored with "^" to match only at the + * current position in the source string.

+ * + * @param termination how to handle matched tokens (PRESERVE or DROP). + * @param regexp the regex pattern to match tokens. Must not be null. + * @param group the group name for categorizing this token type. + * May be null. + */ + public Terminator(TerminationStrategy termination, String regexp, String group) { + this.termination = termination; + this.group = group; + this.regexp = regexp; + this.pattern = Pattern.compile("^"+regexp); + } + + /** + * Creates a matcher for this terminator at the specified position. + * + *

The matcher is configured to search from the given index to the + * end of the source string, ensuring matches only occur at that + * exact position (due to the "^" anchor in the pattern).

+ * + * @param source the source string to match against. Must not be null. + * @param index the starting position for matching. + * @return a Matcher configured to search from index to the end. + */ + public Matcher match(String source, int index) { + Matcher matcher = pattern.matcher(source); + matcher.region(index, source.length()); + return matcher; + } + + /** + * Returns a string representation of this terminator. + * + *

Includes the regexp, termination strategy, group name, and active status.

+ * + * @return a descriptive string for debugging. + */ + @Override + public String toString() { + return "Terminator{" + + "regexp='" + regexp + '\'' + + ", termination=" + termination + + ", group='" + group + '\'' + + ", active=" + active + + '}'; + } + + /** + * Defines how matched tokens are handled by the tokenizer. + */ + public enum TerminationStrategy { + /** + * Preserve the matched token and return it for processing. + * + *

Use this for tokens you want to capture and handle in your + * parsing logic, such as keywords, operators, literals, or + * other syntax elements.

+ * + *

Example use cases:

+ * + */ + PRESERVE, + + /** + * Drop the matched token silently without returning it. + * + *

Use this for tokens that act as separators or noise that + * you don't need to process, such as whitespace or comments.

+ * + *

Example use cases:

+ * + * + *

Dropped tokens still separate other tokens - they're just + * not returned by getNextToken().

+ */ + DROP + } +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java new file mode 100755 index 0000000..1304283 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -0,0 +1,548 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +import java.util.ArrayList; +import java.util.List; +import java.util.Stack; +import java.util.regex.Matcher; +import java.util.stream.Stream; + +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; +import static java.lang.System.out; + +/** + * A regex-based tokenizer for parsing structured text into tokens. + * + *

The Tokenizer breaks down source text into tokens based on regular + * expression patterns called "terminators". Terminators define how to + * identify and handle token boundaries:

+ * + * + * + *

Key features:

+ * + * + *

Example usage:

+ *
{@code
+ * Tokenizer tokenizer = new Tokenizer("hello, world! 123");
+ * tokenizer.addTerminator(DROP, "\\s+");        // Drop whitespace
+ * tokenizer.addTerminator(PRESERVE, "\\w+");    // Preserve words
+ * tokenizer.addTerminator(PRESERVE, ",");       // Preserve comma
+ * tokenizer.addTerminator(PRESERVE, "!");       // Preserve exclamation
+ * tokenizer.addTerminator(PRESERVE, "\\d+");    // Preserve numbers
+ *
+ * while (tokenizer.hasMoreContent()) {
+ *     TokenizerMatch match = tokenizer.getNextToken();
+ *     System.out.println(match.token);
+ * }
+ * // Output: hello, world, !, 123
+ * }
+ * + *

The tokenizer maintains a history stack, allowing you to unread + * tokens and backtrack during parsing:

+ *
{@code
+ * TokenizerMatch first = tokenizer.getNextToken();
+ * TokenizerMatch second = tokenizer.getNextToken();
+ * tokenizer.unreadToken();  // Go back one token
+ * tokenizer.unreadToken();  // Go back another token
+ * TokenizerMatch again = tokenizer.getNextToken();  // Same as first
+ * }
+ * + *

You can also peek without consuming:

+ *
{@code
+ * TokenizerMatch peeked = tokenizer.peekNextToken();  // Look ahead
+ * TokenizerMatch actual = tokenizer.getNextToken();   // Same as peeked
+ * }
+ * + * @see Terminator + * @see TokenizerMatch + * @see InvalidSyntaxException + */ +public class Tokenizer { + + /** + * Stack of token indexes for backtracking. + * + *

Each time a token is consumed, the current index is pushed onto + * this stack. Calling {@link #unreadToken()} pops the stack and + * restores the previous position, allowing the same token to be + * read again.

+ */ + private final Stack tokenIndexes = new Stack<>(); + + /** + * List of terminators that define token boundaries. + * + *

Terminators are checked in order during tokenization. When a + * terminator matches at the current position, it determines how + * the match is handled (preserved or dropped).

+ */ + private final List terminators = new ArrayList<>(); + + /** + * The source string being tokenized. + * + *

This is the text that will be broken down into tokens. Can be + * null initially and set later via {@link #setSource(String)}.

+ */ + private String source; + + /** + * Current reading position within the source string. + * + *

This index advances as tokens are consumed. It starts at 0 + * and moves forward through the source string.

+ */ + private int currentIndex = 0; + + /** + * Creates a new tokenizer for the specified source string. + * + *

The source string will be processed when {@link #getNextToken()} + * is called. Add terminators before calling getNextToken() to + * define how tokens should be identified.

+ * + * @param source the text to tokenize. May be null (use setSource later). + */ + public Tokenizer(final String source) { + this.source = source; + } + + /** + * Creates an empty tokenizer without a source string. + * + *

Use {@link #setSource(String)} to provide text for tokenization + * before calling {@link #getNextToken()}.

+ */ + public Tokenizer() { + } + + /** + * Sets or replaces the source string to tokenize. + * + *

This resets the tokenizer state: the reading position is set to 0, + * and the token history stack is cleared. Use this to tokenize a new + * string with the same terminator configuration.

+ * + *

Example:

+ *
{@code
+     * Tokenizer tokenizer = new Tokenizer();
+     * tokenizer.addTerminator(DROP, "\\s+");
+     *
+     * tokenizer.setSource("first string");
+     * // tokenize first string...
+     *
+     * tokenizer.setSource("second string");
+     * // tokenize second string with same rules...
+     * }
+ * + * @param source the new text to tokenize. May be null. + * @return this Tokenizer instance for fluent method chaining. + */ + public Tokenizer setSource(String source) { + this.source = source; + currentIndex = 0; + tokenIndexes.clear(); + return this; + } + + /** + * Adds a terminator with a termination strategy and regex pattern. + * + *

The terminator will match tokens based on the regex pattern. + * The termination strategy determines whether matched tokens are + * preserved (returned) or dropped (silently discarded).

+ * + *

The pattern is anchored to match only at the current position + * (prepended with "^").

+ * + * @param terminationStrategy how to handle matched tokens + * (PRESERVE or DROP). + * @param regexp the regex pattern to match tokens. + * @return the created Terminator object, which can be further configured + * (e.g., setting the active flag or group). + */ + public Terminator addTerminator(final Terminator.TerminationStrategy terminationStrategy, String regexp) { + Terminator terminator = new Terminator(terminationStrategy, regexp,null); + terminators.add(terminator); + return terminator; + } + + /** + * Adds a terminator with a termination strategy, regex pattern, and group name. + * + *

The group name allows categorizing tokens by type, which can be + * checked using {@link TokenizerMatch#isGroup(String)}.

+ * + *

Example:

+ *
{@code
+     * tokenizer.addTerminator(PRESERVE, "\\d+", "number");
+     * tokenizer.addTerminator(PRESERVE, "\\w+", "word");
+     *
+     * TokenizerMatch match = tokenizer.getNextToken();
+     * if (match.isGroup("number")) {
+     *     // Handle number token...
+     * }
+     * }
+ * + * @param terminationStrategy how to handle matched tokens + * (PRESERVE or DROP). + * @param regexp the regex pattern to match tokens. + * @param group the group name for categorizing this token type. + * May be null. + * @return the created Terminator object. + */ + public Terminator addTerminator(final Terminator.TerminationStrategy terminationStrategy, + String regexp, String group) { + Terminator terminator = new Terminator(terminationStrategy, regexp,group); + terminators.add(terminator); + return terminator; + } + + + /** + * Adds a pre-configured terminator to this tokenizer. + * + *

Use this when you need to create a Terminator with custom + * configuration before adding it.

+ * + * @param terminator the terminator to add. Must not be null. + * @return the same terminator that was added. + */ + public Terminator addTerminator(Terminator terminator) { + terminators.add(terminator); + return terminator; + } + + /** + * Consumes the next token and verifies it matches the expected value. + * + *

This is a convenience method for parsing where you expect a + * specific token at a specific position. If the token doesn't match, + * an exception is thrown.

+ * + *

Example:

+ *
{@code
+     * tokenizer.expectAndConsumeNextStringToken("if");
+     * // Consumes "if" token, throws if next token is not "if"
+     * }
+ * + * @param value the expected token value. Must not be null. + * @throws InvalidSyntaxException if the next token does not match + * the expected value. + */ + public void expectAndConsumeNextStringToken(final String value) + throws InvalidSyntaxException { + final TokenizerMatch match = getNextToken(); + if (!value.equals(match.token)) + throw new InvalidSyntaxException("Expected \"" + value + + "\" but got \"" + match.token + "\" instead."); + } + + /** + * Consumes the next token and verifies it was matched by the expected terminator. + * + *

This is useful when you need to ensure a specific terminator matched + * the token, not just that the token has a certain value.

+ * + *

Example:

+ *
{@code
+     * Terminator stringTerminator = tokenizer.addTerminator(PRESERVE, "\".*\"");
+     * tokenizer.expectAndConsumeNextTerminatorToken(stringTerminator);
+     * }
+ * + * @param terminator the expected terminator that should have matched. + * @return the TokenizerMatch containing the matched token. + * @throws InvalidSyntaxException if the next token was matched by + * a different terminator. + */ + public TokenizerMatch expectAndConsumeNextTerminatorToken(Terminator terminator) + throws InvalidSyntaxException { + final TokenizerMatch match = getNextToken(); + + if (match.terminator != terminator) + throw new InvalidSyntaxException("Expected terminator \"" + terminator + + "\" but got \"" + match.terminator + "\" instead."); + + return match; + } + + + /** + * Returns the next token from the source string. + * + *

This method advances the reading position. The token is identified + * based on the configured terminators:

+ *
    + *
  • If a PRESERVE terminator matches, that matched text is returned
  • + *
  • If a DROP terminator matches, it is discarded and the next token is sought
  • + *
  • If no terminator matches, characters accumulate until a terminator matches
  • + *
+ * + *

Example:

+ *
{@code
+     * TokenizerMatch match = tokenizer.getNextToken();
+     * if (match != null) {
+     *     System.out.println(match.token);
+     * }
+     * }
+ * + * @return the next TokenizerMatch, or {@code null} if the end of the + * source string is reached. + */ + public TokenizerMatch getNextToken() { + tokenIndexes.push(currentIndex); + + StringBuilder tokenAccumulator = new StringBuilder(); + + while (true) { + + if (currentIndex >= source.length()) { // reached end of input + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, null, this); + else + return null; + } + + TokenizerMatch matchResult = findTerminatorMatch(); + if (matchResult == null) { + tokenAccumulator.append(source.charAt(currentIndex)); + currentIndex++; + continue; + } + + if (matchResult.terminator.termination == PRESERVE) { + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, null, this); + + currentIndex = matchResult.matcher.end(); + return matchResult; + } else { + currentIndex = matchResult.matcher.end(); + + if (hasAccumulatedToken(tokenAccumulator)) + return new TokenizerMatch(tokenAccumulator.toString(), null, null, this); + } + } + } + + /** + * Finds a terminator that matches at the current position. + * + *

This checks all active terminators (in order) to see if any + * matches at the current index. The first matching terminator + * is returned.

+ * + *

Terminators with {@code active = false} are skipped.

+ * + * @return a TokenizerMatch if a terminator matches, or {@code null} + * if no terminator matches at the current position. + */ + public TokenizerMatch findTerminatorMatch(){ + for (Terminator terminator : terminators) + if (terminator.active) { + Matcher match = terminator.match(source, currentIndex); + if (match.find()) { + String token = source.substring(match.start(), match.end()); + return new TokenizerMatch(token, terminator, match, this); + } + } + return null; + } + + /** + * Checks if the token accumulator has any content. + * + *

This is used internally to determine if accumulated characters + * should be returned as a token.

+ * + * @param tokenAccumulator the StringBuilder containing accumulated characters. + * @return {@code true} if there are accumulated characters, {@code false} otherwise. + */ + private boolean hasAccumulatedToken(StringBuilder tokenAccumulator) { + return tokenAccumulator.length() > 0; + } + + /** + * Checks if there is more content to read. + * + *

Returns true if the current position is before the end of the + * source string. Note that even if this returns true, getNextToken() + * might return null if remaining content is dropped by terminators.

+ * + * @return {@code true} if there is more content, {@code false} if at + * the end of source or source is null. + */ + public boolean hasMoreContent() { + if (source == null) return false; + return currentIndex < source.length(); + } + + /** + * Consumes the next token if it matches the expected value. + * + *

If the next token matches, it is consumed and {@code true} is returned. + * If it doesn't match, the token is unread and {@code false} is returned.

+ * + *

Example:

+ *
{@code
+     * if (tokenizer.consumeIfNextToken("else")) {
+     *     // Handle else clause
+     * } else {
+     *     // Token was not "else", position unchanged
+     * }
+     * }
+ * + * @param token the expected token value. Must not be null. + * @return {@code true} if the next token matched and was consumed, + * {@code false} otherwise (position unchanged). + * @throws InvalidSyntaxException if parsing fails. + */ + public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException { + if (token.equals(getNextToken().token)) + return true; + + unreadToken(); + return false; + } + + /** + * Returns the next token without consuming it. + * + *

This looks ahead at the next token and returns it, then immediately + * unread to restore the position. Use this to examine what's coming + * without advancing.

+ * + *

Example:

+ *
{@code
+     * TokenizerMatch peeked = tokenizer.peekNextToken();
+     * System.out.println("Next will be: " + peeked.token);
+     * TokenizerMatch actual = tokenizer.getNextToken();  // Same as peeked
+     * }
+ * + * @return the next TokenizerMatch without advancing the position. + * @throws InvalidSyntaxException if parsing fails. + */ + public TokenizerMatch peekNextToken() throws InvalidSyntaxException { + TokenizerMatch result = getNextToken(); + unreadToken(); + return result; + } + + /** + * Checks if the next token is one of the specified possibilities. + * + *

This peeks at the next token and checks if its value equals any + * of the given strings. The position is unchanged after this call.

+ * + *

Example:

+ *
{@code
+     * if (tokenizer.peekIsOneOf("if", "else", "while")) {
+     *     // Next token is a control keyword
+     * }
+     * }
+ * + * @param possibilities the token values to check against. + * Must not be null or empty. + * @return {@code true} if the next token matches one of the possibilities, + * {@code false} otherwise. + * @throws InvalidSyntaxException if parsing fails. + */ + public boolean peekIsOneOf(String... possibilities) throws InvalidSyntaxException { + String nextToken = peekNextToken().token; + return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken)); + } + + /** + * Verifies the next token is NOT one of the specified possibilities. + * + *

If the next token matches any possibility, an exception is thrown. + * Use this for negative assertions in parsing.

+ * + *

Example:

+ *
{@code
+     * tokenizer.peekExpectNoneOf("}", "end");
+     * // Throws if next token is } or end
+     * }
+ * + * @param possibilities the token values that should NOT appear next. + * @throws InvalidSyntaxException if the next token matches any possibility. + */ + public void peekExpectNoneOf(String... possibilities) throws InvalidSyntaxException { + if (peekIsOneOf(possibilities)) + throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here."); + } + + /** + * Unreads the most recently consumed token. + * + *

This restores the reading position to before the last token was + * read. The token can be read again with getNextToken().

+ * + *

You can unread multiple times to backtrack further:

+ *
{@code
+     * TokenizerMatch first = tokenizer.getNextToken();
+     * TokenizerMatch second = tokenizer.getNextToken();
+     * TokenizerMatch third = tokenizer.getNextToken();
+     *
+     * tokenizer.unreadToken();  // Back to after second
+     * tokenizer.unreadToken();  // Back to after first
+     *
+     * TokenizerMatch again = tokenizer.getNextToken();  // Same as second
+     * }
+ */ + public void unreadToken() { + currentIndex = tokenIndexes.pop(); + } + + /** + * Prints all remaining tokens for debugging purposes. + * + *

This reads and prints all remaining tokens without permanently + * consuming them. After printing, the position is restored to the + * original location.

+ * + *

Output is printed to stdout with each token on a new line.

+ */ + public void enlistRemainingTokens(){ + int redTokenCount = 0; + + while (hasMoreContent()) { + out.println(getNextToken().toString()); + redTokenCount++; + } + + // restore pointer to original location + for (int i = 0; i< redTokenCount; i++ ) unreadToken(); + } + + + /** + * Skips to the end of the source string without consuming tokens. + * + *

This advances directly to the end, skipping all remaining content. + * After calling this, {@link #hasMoreContent()} will return {@code false}.

+ * + *

The current position is saved on the stack, so you can unread + * to restore it if needed.

+ */ + public void skipUntilDataEnd() { + tokenIndexes.push(currentIndex); + currentIndex = source.length(); + } + +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java new file mode 100755 index 0000000..f1951c4 --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatch.java @@ -0,0 +1,211 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +import java.util.regex.Matcher; + +/** + * Represents a matched token from the tokenizer. + * + *

TokenizerMatch contains all information about a token that was + * extracted from the source string:

+ * + *
    + *
  • {@link #token} - The actual text content of the token
  • + *
  • {@link #terminator} - The Terminator that identified this token
  • + *
  • {@link #matcher} - The regex Matcher used for matching
  • + *
+ * + *

Key methods:

+ *
    + *
  • {@link #isGroup(String)} - Check if this token belongs to a specific group
  • + *
  • {@link #getRegExpGroups()} - Extract regex capture groups from the match
  • + *
  • {@link #getTokenizer()} - Get the tokenizer that produced this match
  • + *
+ * + *

Example usage:

+ *
{@code
+ * TokenizerMatch match = tokenizer.getNextToken();
+ *
+ * System.out.println("Token: " + match.token);
+ *
+ * if (match.isGroup("number")) {
+ *     int value = Integer.parseInt(match.token);
+ * }
+ *
+ * if (match.isGroup("string")) {
+ *     String[] groups = match.getRegExpGroups();
+ *     // groups[0] might be the string content without quotes
+ * }
+ * }
+ * + *

For tokens that were accumulated text (not matched by a terminator), + * the terminator and matcher fields will be null.

+ * + * @see Tokenizer + * @see Terminator + */ +public class TokenizerMatch { + + /** + * The text content of the matched token. + * + *

This is the actual substring from the source that was identified + * as a token. For accumulated text (no terminator match), this contains + * all characters accumulated before a terminator was found.

+ */ + public final String token; + + /** + * The Terminator that identified this token. + * + *

May be null if this token was accumulated text rather than + * matched by a terminator. When not null, you can check the + * terminator's group to categorize the token.

+ */ + public final Terminator terminator; + + /** + * The regex Matcher used to identify this token. + * + *

May be null if this token was accumulated text. When not null, + * you can use this to extract capture groups from the match.

+ */ + public final Matcher matcher; + + /** + * The tokenizer that produced this match. + * + *

This is used internally for backtracking. Can also be useful + * to continue tokenization from within token handling code.

+ */ + private Tokenizer tokenizer; + + /** + * Creates a new TokenizerMatch with all components. + * + * @param token the matched text. May be empty but should not be null. + * @param terminator the Terminator that matched this token. May be null + * for accumulated text tokens. + * @param matcher the regex Matcher used for matching. May be null for + * accumulated text tokens. + * @param tokenizer the Tokenizer that produced this match. + */ + public TokenizerMatch(final String token, final Terminator terminator, Matcher matcher, Tokenizer tokenizer) { + this.token = token; + this.terminator = terminator; + this.matcher = matcher; + this.tokenizer = tokenizer; + } + + /** + * Checks if this token belongs to the specified group. + * + *

This compares the group name of the terminator against the + * provided group name. Useful for categorizing tokens by type.

+ * + *

Special cases:

+ *
    + *
  • If terminator is null, returns true only if group is also null
  • + *
  • If terminator.group is null, returns true only if group is null
  • + *
+ * + *

Example:

+ *
{@code
+     * tokenizer.addTerminator(PRESERVE, "\\d+", "number");
+     * tokenizer.addTerminator(PRESERVE, "\\w+", "word");
+     *
+     * TokenizerMatch match = tokenizer.getNextToken();
+     * if (match.isGroup("number")) {
+     *     // Token is a number
+     * } else if (match.isGroup("word")) {
+     *     // Token is a word
+     * }
+     * }
+ * + * @param group the group name to check against. May be null. + * @return {@code true} if this token belongs to the specified group, + * {@code false} otherwise. + */ + public boolean isGroup(String group){ + if (terminator == null){ + return group == null; + } + + if (terminator.group == null){ + return group == null; + } + + return terminator.group.equals(group); + } + + /** + * Extracts regex capture groups from this match. + * + *

Returns the captured groups from the regex pattern that matched + * this token. Group 1 and onwards are returned (not the full match).

+ * + *

Example:

+ *
{@code
+     * tokenizer.addTerminator(PRESERVE, "(\\d+):(\\d+)", "time");
+     * // Matches "12:30"
+     *
+     * TokenizerMatch match = tokenizer.getNextToken();
+     * String[] groups = match.getRegExpGroups();
+     * // groups[0] = "12" (hours)
+     * // groups[1] = "30" (minutes)
+     * }
+ * + * @return an array of captured group strings. Empty array if matcher + * is null or no capture groups exist in the pattern. + */ + public String[] getRegExpGroups(){ + if (matcher == null) return new String[]{}; + + String[] result = new String[matcher.groupCount()]; + + for (int i = 0; i< result.length; i++){ + result[i] = matcher.group(i+1); + } + + return result; + } + + /** + * Returns a detailed string representation for debugging. + * + *

Includes the token text, terminator details, and any regex groups.

+ * + * @return a multi-line descriptive string. + */ + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append("TokenizerMatch\n" + + " token='" + token + "'\n" + + " terminator=" + terminator + "\n" + + " groups:\n"); + + int i = 0; + for (String s : getRegExpGroups()) { + result.append(" " + i + ": " + s + "\n"); + i++; + } + + return result.toString(); + } + + /** + * Returns the tokenizer that produced this match. + * + *

This allows continuing tokenization or accessing tokenizer + * state from within token handling code.

+ * + * @return the Tokenizer instance that created this match. + */ + public Tokenizer getTokenizer() { + return tokenizer; + } +} \ No newline at end of file diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/package-info.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/package-info.java new file mode 100644 index 0000000..27abdab --- /dev/null +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/package-info.java @@ -0,0 +1,46 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ + +/** + * Provides a regex-based tokenizer for parsing structured text. + * + *

This package contains a flexible tokenizer system for breaking down + * text into tokens based on regular expression patterns:

+ * + *
    + *
  • {@link eu.svjatoslav.commons.string.tokenizer.Tokenizer} - Main tokenizer class + * that processes source text and extracts tokens
  • + *
  • {@link eu.svjatoslav.commons.string.tokenizer.Terminator} - Defines token boundaries + * using regex patterns with configurable handling strategies
  • + *
  • {@link eu.svjatoslav.commons.string.tokenizer.TokenizerMatch} - Result object + * containing matched token and metadata
  • + *
  • {@link eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException} - Exception + * thrown when parsing fails
  • + *
+ * + *

The tokenizer supports two termination strategies:

+ *
    + *
  • {@code PRESERVE} - Return matched tokens for processing
  • + *
  • {@code DROP} - Silently drop matched tokens (useful for whitespace/comments)
  • + *
+ * + *

Example usage:

+ *
{@code
+ * Tokenizer tokenizer = new Tokenizer("hello, world!");
+ * tokenizer.addTerminator(DROP, "\\s+");        // Drop whitespace
+ * tokenizer.addTerminator(PRESERVE, ",");       // Preserve commas
+ * while (tokenizer.hasMoreContent()) {
+ *     TokenizerMatch match = tokenizer.getNextToken();
+ *     System.out.println(match.token);
+ * }
+ * }
+ * + * @author Svjatoslav Agejenko + * @since 1.0 + * @see eu.svjatoslav.commons.string.tokenizer.Tokenizer + * @see eu.svjatoslav.commons.string.tokenizer.Terminator + * @see eu.svjatoslav.commons.string.tokenizer.TokenizerMatch + */ +package eu.svjatoslav.commons.string.tokenizer; \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/data/BitInputStreamTest.java b/src/test/java/eu/svjatoslav/commons/data/BitInputStreamTest.java new file mode 100644 index 0000000..8f5dc8b --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/data/BitInputStreamTest.java @@ -0,0 +1,101 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.data; + +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +public class BitInputStreamTest { + + @Test + public void testReadSingleBit() throws IOException { + byte[] data = {(byte) 0b10000000}; + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(data)); + + assertEquals(1, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + assertEquals(0, bis.readBits(1)); + } + + @Test + public void testReadMultipleBits() throws IOException { + byte[] data = {(byte) 0b11110000, (byte) 0b00001111}; + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(data)); + + assertEquals(0b1111, bis.readBits(4)); + assertEquals(0b0000, bis.readBits(4)); + assertEquals(0b0000, bis.readBits(4)); + assertEquals(0b1111, bis.readBits(4)); + } + + @Test + public void testReadAcrossByteBoundary() throws IOException { + byte[] data = {(byte) 0b11001100, (byte) 0b10101010}; + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(data)); + + assertEquals(0b11001, bis.readBits(5)); + assertEquals(0b1001010, bis.readBits(7)); + assertEquals(0b1010, bis.readBits(4)); + } + + @Test + public void testReadFullByte() throws IOException { + byte[] data = {(byte) 0xFF, (byte) 0xAB, (byte) 0x00}; + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(data)); + + assertEquals(0xFF, bis.readBits(8)); + assertEquals(0xAB, bis.readBits(8)); + assertEquals(0x00, bis.readBits(8)); + } + + @Test + public void testReadZeroBits() throws IOException { + byte[] data = {(byte) 0xFF}; + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(data)); + + assertEquals(0, bis.readBits(0)); + assertEquals(0xFF, bis.readBits(8)); + } + + @Test + public void testReadVariedBitWidths() throws IOException { + byte[] data = {(byte) 0b10101010, (byte) 0b11001100}; + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(data)); + + assertEquals(0b101, bis.readBits(3)); + assertEquals(0b010, bis.readBits(3)); + assertEquals(0b1011, bis.readBits(4)); + assertEquals(0b001100, bis.readBits(6)); + } + + @Test + public void testRoundTripWithBitOutputStream() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0xFF, 8); + bos.storeBits(0xAB, 8); + bos.storeBits(0x00, 8); + bos.finishByte(); + + byte[] written = baos.toByteArray(); + BitInputStream bis = new BitInputStream(new ByteArrayInputStream(written)); + + assertEquals(0xFF, bis.readBits(8)); + assertEquals(0xAB, bis.readBits(8)); + assertEquals(0x00, bis.readBits(8)); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/data/BitOutputStreamTest.java b/src/test/java/eu/svjatoslav/commons/data/BitOutputStreamTest.java new file mode 100644 index 0000000..41100fd --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/data/BitOutputStreamTest.java @@ -0,0 +1,137 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.data; + +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class BitOutputStreamTest { + + @Test + public void testWriteSingleBits() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(1, 1); + bos.storeBits(0, 1); + bos.storeBits(0, 1); + bos.storeBits(0, 1); + bos.storeBits(0, 1); + bos.storeBits(0, 1); + bos.storeBits(0, 1); + bos.storeBits(0, 1); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(1, result.length); + assertEquals((byte) 0b10000000, result[0]); + } + + @Test + public void testWriteFullByte() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0xFF, 8); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(1, result.length); + assertEquals((byte) 0xFF, result[0]); + } + + @Test + public void testWriteAcrossByteBoundary() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0b11001, 5); + bos.storeBits(0b100, 3); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(1, result.length); + assertEquals((byte) 0b11001100, result[0]); + } + + @Test + public void testWriteMultipleBytes() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0b11110000, 8); + bos.storeBits(0b00001111, 8); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(2, result.length); + assertEquals((byte) 0b11110000, result[0]); + assertEquals((byte) 0b00001111, result[1]); + } + + @Test + public void testWriteZeroBits() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0xFF, 8); + bos.storeBits(0, 0); + bos.storeBits(0xAB, 8); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(2, result.length); + assertEquals((byte) 0xFF, result[0]); + assertEquals((byte) 0xAB, result[1]); + } + + @Test + public void testFinishByteWithPartialByte() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0b101, 3); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(1, result.length); + assertEquals(5, result[0]); + } + + @Test + public void testFinishByteTwice() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0b101, 3); + bos.finishByte(); + bos.finishByte(); + + byte[] result = baos.toByteArray(); + assertEquals(1, result.length); + } + + @Test + public void testWriteVariedBitWidths() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BitOutputStream bos = new BitOutputStream(baos); + + bos.storeBits(0b101, 3); + bos.storeBits(0b010, 3); + bos.storeBits(0b1011, 4); + bos.storeBits(0b001100, 6); + bos.finishByte(); + + byte[] expected = {(byte) 0b10101010, (byte) 0b11001100}; + byte[] result = baos.toByteArray(); + assertArrayEquals(expected, result); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/data/HexConverterTest.java b/src/test/java/eu/svjatoslav/commons/data/HexConverterTest.java new file mode 100755 index 0000000..b1da44b --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/data/HexConverterTest.java @@ -0,0 +1,70 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.data; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class HexConverterTest { + + @Test + public void testBasicConversion() { + final String result = HexConverter.byteArrayToHex(new byte[]{1, 2, (byte) 255}); + assertEquals("0102FF", result); + } + + @Test + public void testEmptyArray() { + final String result = HexConverter.byteArrayToHex(new byte[]{}); + assertEquals("", result); + } + + @Test + public void testNullArray() { + final String result = HexConverter.byteArrayToHex(null); + assertNull(result); + } + + @Test + public void testSingleByte() { + assertEquals("00", HexConverter.byteArrayToHex(new byte[]{0})); + assertEquals("0F", HexConverter.byteArrayToHex(new byte[]{15})); + assertEquals("FF", HexConverter.byteArrayToHex(new byte[]{(byte) 255})); + } + + @Test + public void testAllZeros() { + final byte[] zeros = new byte[5]; + assertEquals("0000000000", HexConverter.byteArrayToHex(zeros)); + } + + @Test + public void testAllOnes() { + final byte[] ones = new byte[3]; + for (int i = 0; i < ones.length; i++) ones[i] = (byte) 0xFF; + assertEquals("FFFFFF", HexConverter.byteArrayToHex(ones)); + } + + @Test + public void testMixedValues() { + final byte[] data = {0x0A, 0x1B, 0x2C, 0x3D, 0x4E, 0x5F}; + assertEquals("0A1B2C3D4E5F", HexConverter.byteArrayToHex(data)); + } + + @Test + public void testNegativeBytes() { + final byte[] data = {(byte) -1, (byte) -128, (byte) -2}; + assertEquals("FF80FE", HexConverter.byteArrayToHex(data)); + } + + @Test + public void testUpperCaseOutput() { + final byte[] data = {0x0a, 0x1b, 0x2c}; + final String result = HexConverter.byteArrayToHex(data); + assertTrue(result.equals(result.toUpperCase())); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/file/FilePathParserTest.java b/src/test/java/eu/svjatoslav/commons/file/FilePathParserTest.java new file mode 100644 index 0000000..0237955 --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/file/FilePathParserTest.java @@ -0,0 +1,115 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.file; + +import org.junit.Test; + +import java.io.File; + +import static org.junit.Assert.assertEquals; + +public class FilePathParserTest { + + @Test + public void testGetFileExtension() { + assertEquals("txt", FilePathParser.getFileExtension("document.txt")); + assertEquals("jpg", FilePathParser.getFileExtension("image.JPG")); + assertEquals("gz", FilePathParser.getFileExtension("archive.tar.gz")); + assertEquals("", FilePathParser.getFileExtension("noextension")); + assertEquals("", FilePathParser.getFileExtension("")); + } + + @Test + public void testGetFileExtensionWithFile() { + File file = new File("test/document.pdf"); + assertEquals("pdf", FilePathParser.getFileExtension(file)); + + File noExtFile = new File("test/README"); + assertEquals("", FilePathParser.getFileExtension(noExtFile)); + } + + @Test + public void testGetFileExtensionCaseInsensitive() { + assertEquals("jpg", FilePathParser.getFileExtension("photo.JPG")); + assertEquals("png", FilePathParser.getFileExtension("image.PNG")); + assertEquals("pdf", FilePathParser.getFileExtension("document.PDF")); + } + + @Test + public void testGetFileNameWithoutExtension() { + assertEquals("document", FilePathParser.getFileNameWithoutExtension("document.txt")); + assertEquals("archive.tar", FilePathParser.getFileNameWithoutExtension("archive.tar.gz")); + assertEquals("noextension", FilePathParser.getFileNameWithoutExtension("noextension")); + assertEquals("", FilePathParser.getFileNameWithoutExtension("")); + assertEquals("file", FilePathParser.getFileNameWithoutExtension("file.")); + } + + @Test + public void testGetFileNameWithoutExtensionWithFile() { + File file = new File("path/to/document.pdf"); + assertEquals("document", FilePathParser.getFileNameWithoutExtension(file)); + + File noExtFile = new File("path/to/README"); + assertEquals("README", FilePathParser.getFileNameWithoutExtension(noExtFile)); + } + + @Test + public void testGetFileSizeDescriptionBytes() { + assertEquals("0 b", FilePathParser.getFileSizeDescription(0)); + assertEquals("100 b", FilePathParser.getFileSizeDescription(100)); + assertEquals("1023 b", FilePathParser.getFileSizeDescription(1023)); + assertEquals("1024 b", FilePathParser.getFileSizeDescription(1024)); + assertEquals("5119 b", FilePathParser.getFileSizeDescription(5119)); + } + + @Test + public void testGetFileSizeDescriptionKibibytes() { + assertEquals("5 KiB", FilePathParser.getFileSizeDescription(5 * 1024)); + assertEquals("10 KiB", FilePathParser.getFileSizeDescription(10 * 1024)); + assertEquals("1024 KiB", FilePathParser.getFileSizeDescription(1024 * 1024)); + assertEquals("5 MiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024)); + } + + @Test + public void testGetFileSizeDescriptionMebibytes() { + assertEquals("5 MiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 + 1)); + assertEquals("10 MiB", FilePathParser.getFileSizeDescription(10L * 1024 * 1024)); + assertEquals("1024 MiB", FilePathParser.getFileSizeDescription(1024L * 1024 * 1024)); + assertEquals("5 GiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024)); + } + + @Test + public void testGetFileSizeDescriptionGibibytes() { + assertEquals("5 GiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024 + 1)); + assertEquals("10 GiB", FilePathParser.getFileSizeDescription(10L * 1024 * 1024 * 1024)); + assertEquals("1024 GiB", FilePathParser.getFileSizeDescription(1024L * 1024 * 1024 * 1024)); + assertEquals("5 TiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024 * 1024)); + } + + @Test + public void testGetFileSizeDescriptionTebibytes() { + assertEquals("5 TiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024 * 1024 + 1)); + assertEquals("10 TiB", FilePathParser.getFileSizeDescription(10L * 1024 * 1024 * 1024 * 1024)); + assertEquals("1024 TiB", FilePathParser.getFileSizeDescription(1024L * 1024 * 1024 * 1024 * 1024)); + assertEquals("5 PiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024 * 1024 * 1024)); + } + + @Test + public void testGetFileSizeDescriptionPebibytes() { + assertEquals("5 PiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024 * 1024 * 1024)); + assertEquals("10 PiB", FilePathParser.getFileSizeDescription(10L * 1024 * 1024 * 1024 * 1024 * 1024)); + } + + @Test + public void testGetFileSizeDescriptionBoundary() { + assertEquals("5119 b", FilePathParser.getFileSizeDescription(5119)); + assertEquals("5 KiB", FilePathParser.getFileSizeDescription(5120)); + assertEquals("4999 KiB", FilePathParser.getFileSizeDescription(4999 * 1024)); + assertEquals("5 MiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024)); + assertEquals("4999 MiB", FilePathParser.getFileSizeDescription(4999L * 1024 * 1024)); + assertEquals("5 GiB", FilePathParser.getFileSizeDescription(5L * 1024 * 1024 * 1024)); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/file/IOHelperTest.java b/src/test/java/eu/svjatoslav/commons/file/IOHelperTest.java new file mode 100755 index 0000000..1c7b61d --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/file/IOHelperTest.java @@ -0,0 +1,114 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.file; + +import org.junit.After; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; + +import static org.junit.Assert.*; + +public class IOHelperTest { + + private static final String UTF_8 = "UTF-8"; + + private final File testDir = new File("testDir"); + private final File testFile = new File("testFile.txt"); + + @After + public void cleanup() throws IOException { + if (testFile.exists()) testFile.delete(); + if (testDir.exists()) IOHelper.deleteRecursively(testDir); + } + + @Test + public void testOverwriteFileIfContentDiffers() throws IOException { + assertTrue(IOHelper.overwriteFileIfContentDiffers(testFile, "aoa".getBytes(UTF_8))); + assertFalse(IOHelper.overwriteFileIfContentDiffers(testFile, "aoa".getBytes(UTF_8))); + assertTrue(IOHelper.overwriteFileIfContentDiffers(testFile, "1234".getBytes(UTF_8))); + } + + @Test + public void testSaveAndReadBytes() throws IOException { + final byte[] content = "hello world".getBytes(UTF_8); + IOHelper.saveToFile(testFile, content); + + final byte[] readContent = IOHelper.getFileContents(testFile); + assertArrayEquals(content, readContent); + } + + @Test + public void testSaveAndReadString() throws IOException { + final String content = "hello world"; + IOHelper.saveToFile(testFile, content); + + final String readContent = IOHelper.getFileContentsAsString(testFile); + assertEquals(content, readContent); + } + + @Test + public void testGetFileContentsEmptyFile() throws IOException { + IOHelper.saveToFile(testFile, ""); + final String content = IOHelper.getFileContentsAsString(testFile); + assertEquals("", content); + } + + @Test + public void testDeleteRecursivelyFile() throws IOException { + IOHelper.saveToFile(testFile, "test"); + assertTrue(testFile.exists()); + IOHelper.deleteRecursively(testFile); + assertFalse(testFile.exists()); + } + + @Test + public void testDeleteRecursivelyDirectory() throws IOException { + testDir.mkdirs(); + final File subDir = new File(testDir, "subdir"); + subDir.mkdirs(); + IOHelper.saveToFile(new File(testDir, "file1.txt"), "content1"); + IOHelper.saveToFile(new File(subDir, "file2.txt"), "content2"); + + assertTrue(testDir.exists()); + assertTrue(subDir.exists()); + + IOHelper.deleteRecursively(testDir); + assertFalse(testDir.exists()); + } + + @Test + public void testDeleteRecursivelyEmptyDirectory() throws IOException { + testDir.mkdirs(); + assertTrue(testDir.exists()); + IOHelper.deleteRecursively(testDir); + assertFalse(testDir.exists()); + } + + @Test + public void testDeleteNonExistentFile() throws IOException { + final File nonExistent = new File("nonexistent_" + System.currentTimeMillis()); + IOHelper.deleteRecursively(nonExistent); + assertFalse(nonExistent.exists()); + } + + @Test + public void testOverwriteIdenticalContentLengthButDifferent() throws IOException { + final byte[] content1 = "abcd".getBytes(UTF_8); + final byte[] content2 = "dcba".getBytes(UTF_8); + + IOHelper.saveToFile(testFile, content1); + assertTrue(IOHelper.overwriteFileIfContentDiffers(testFile, content2)); + } + + @Test + public void testOverwriteExistingFileDifferentLength() throws IOException { + IOHelper.saveToFile(testFile, "short"); + assertTrue(IOHelper.overwriteFileIfContentDiffers(testFile, "longer content".getBytes(UTF_8))); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/string/GlobMatcherTest.java b/src/test/java/eu/svjatoslav/commons/string/GlobMatcherTest.java new file mode 100755 index 0000000..95a62f5 --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/string/GlobMatcherTest.java @@ -0,0 +1,110 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class GlobMatcherTest { + + @Test + public void testStarWildcard() { + assertTrue(GlobMatcher.match("IMG_9770.JPG", "*.J*")); + assertTrue(GlobMatcher.match("1", "*")); + assertTrue(GlobMatcher.match("Hello !", "Hell*!***")); + assertTrue(GlobMatcher.match("Hello !", "Hell*!")); + assertTrue(GlobMatcher.match("Hello !", "Hell*")); + assertTrue(GlobMatcher.match("Hello !", "* *")); + assertFalse(GlobMatcher.match("Hello !", "Hell")); + } + + @Test + public void testQuestionWildcard() { + assertTrue(GlobMatcher.match("cat", "c?t")); + assertTrue(GlobMatcher.match("bat", "?at")); + assertTrue(GlobMatcher.match("cat", "ca?")); + assertFalse(GlobMatcher.match("cart", "c?t")); + assertFalse(GlobMatcher.match("at", "?at")); + } + + @Test + public void testMultipleQuestionWildcards() { + assertTrue(GlobMatcher.match("test", "t??t")); + assertTrue(GlobMatcher.match("abcd", "????")); + assertFalse(GlobMatcher.match("abc", "????")); + assertFalse(GlobMatcher.match("abcde", "????")); + } + + @Test + public void testMixedWildcards() { + assertTrue(GlobMatcher.match("document.txt", "*.???")); + assertTrue(GlobMatcher.match("file123.txt", "file*.???")); + assertTrue(GlobMatcher.match("test123", "test?*")); + assertTrue(GlobMatcher.match("a1b2c3", "?*?*?*")); + assertFalse(GlobMatcher.match("ab", "?*?*?*")); + } + + @Test + public void testExactMatch() { + assertTrue(GlobMatcher.match("1", "1")); + assertTrue(GlobMatcher.match("test", "test")); + assertFalse(GlobMatcher.match("f", "1")); + assertFalse(GlobMatcher.match("test", "Test")); + } + + @Test + public void testEmptyStrings() { + assertTrue(GlobMatcher.match("", "")); + assertTrue(GlobMatcher.match("", "*")); + assertFalse(GlobMatcher.match("", "?")); + assertFalse(GlobMatcher.match("test", "")); + } + + @Test + public void testNullInputs() { + assertFalse(GlobMatcher.match(null, "*")); + assertFalse(GlobMatcher.match("test", null)); + assertFalse(GlobMatcher.match(null, null)); + } + + @Test + public void testOnlyStars() { + assertTrue(GlobMatcher.match("anything", "***")); + assertTrue(GlobMatcher.match("", "***")); + assertTrue(GlobMatcher.match("x", "***")); + } + + @Test + public void testStarAtEnd() { + assertTrue(GlobMatcher.match("filename.txt", "filename*")); + assertTrue(GlobMatcher.match("filename", "filename*")); + assertTrue(GlobMatcher.match("filename_xyz", "filename*")); + } + + @Test + public void testStarAtBeginning() { + assertTrue(GlobMatcher.match(".txt", "*.txt")); + assertTrue(GlobMatcher.match("file.txt", "*.txt")); + assertTrue(GlobMatcher.match("anything.txt", "*.txt")); + assertFalse(GlobMatcher.match("anything.pdf", "*.txt")); + } + + @Test + public void testQuestionAtBeginning() { + assertTrue(GlobMatcher.match("a.txt", "?*.txt")); + assertTrue(GlobMatcher.match("x", "?")); + assertFalse(GlobMatcher.match("", "?")); + } + + @Test + public void testComplexPatterns() { + assertTrue(GlobMatcher.match("IMG_9770.JPG", "IMG_*.JPG")); + assertTrue(GlobMatcher.match("photo_2023.png", "photo_*.???")); + assertTrue(GlobMatcher.match("test_file_v1.java", "test_*_v?.java")); + assertFalse(GlobMatcher.match("test_file_v12.java", "test_*_v?.java")); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/string/String2Test.java b/src/test/java/eu/svjatoslav/commons/string/String2Test.java new file mode 100755 index 0000000..593f09e --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/string/String2Test.java @@ -0,0 +1,261 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class String2Test { + + @Test + public void testTrimPrefix() { + final String2 s = new String2("this is a test"); + + assertEquals("is a test", s.trimPrefix(5).toString()); + assertEquals("a test", s.trimPrefix(3).toString()); + assertEquals("test", s.trimPrefix(2).toString()); + assertEquals("", s.trimPrefix(500).toString()); + } + + @Test + public void testTrimPrefixZero() { + final String2 s = new String2("test"); + s.trimPrefix(0); + assertEquals("test", s.toString()); + } + + @Test + public void testTrimPrefixIfExists() { + final String2 s = new String2("prefix_content"); + s.trimPrefixIfExists("prefix_"); + assertEquals("content", s.toString()); + + s.trimPrefixIfExists("nonexistent"); + assertEquals("content", s.toString()); + } + + @Test + public void testTrimPrefixIfExistsNull() { + final String2 s = new String2("test"); + s.trimPrefixIfExists(null); + assertEquals("test", s.toString()); + } + + @Test + public void testTrimSuffix() { + final String2 s = new String2("this is a test"); + + assertEquals("this is a", s.trimSuffix(5).toString()); + assertEquals("this is", s.trimSuffix(2).toString()); + assertEquals("this", s.trimSuffix(3).toString()); + assertEquals("", s.trimSuffix(500).toString()); + } + + @Test + public void testTrimSuffixZero() { + final String2 s = new String2("test"); + s.trimSuffix(0); + assertEquals("test", s.toString()); + } + + @Test + public void testTrimSuffixIfExists() { + final String2 s = new String2("content_suffix"); + s.trimSuffixIfExists("_suffix"); + assertEquals("content", s.toString()); + + s.trimSuffixIfExists("ent"); + assertEquals("cont", s.toString()); + } + + @Test + public void testEnforceLength() { + final String2 s = new String2("12345678"); + s.enforceLength(3); + assertEquals("123", s.toString()); + + s.enforceLength(5); + assertEquals("123 ", s.toString()); + } + + @Test + public void testEnforceLengthExact() { + final String2 s = new String2("test"); + s.enforceLength(4); + assertEquals("test", s.toString()); + } + + @Test + public void testSuffixAndPrefix() { + final String2 s = new String2("experiment"); + s.prepend("The ").append(" !"); + + assertEquals("The experiment !", s.toString()); + } + + @Test + public void testPrependNull() { + final String2 s = new String2("test"); + s.prepend(null); + assertEquals("test", s.toString()); + } + + @Test + public void testAppendNull() { + final String2 s = new String2("test"); + s.append(null); + assertEquals("test", s.toString()); + } + + @Test + public void testConstructorNull() { + final String2 s = new String2((String) null); + assertEquals("", s.toString()); + } + + @Test + public void testRepeat() { + final String2 s = new String2("ab"); + s.repeat(3); + assertEquals("ababab", s.toString()); + } + + @Test + public void testRepeatOnce() { + final String2 s = new String2("test"); + s.repeat(1); + assertEquals("test", s.toString()); + } + + @Test + public void testRepeatZero() { + final String2 s = new String2("test"); + s.repeat(0); + assertEquals("", s.toString()); + } + + @Test + public void testRepeatNegative() { + final String2 s = new String2("test"); + s.repeat(-5); + assertEquals("", s.toString()); + } + + @Test + public void testAppendWithSeparator() { + final String2 s = new String2("a"); + s.appendWithSeparator(",", "b"); + assertEquals("a,b", s.toString()); + + s.appendWithSeparator(",", "c"); + assertEquals("a,b,c", s.toString()); + } + + @Test + public void testAppendWithSeparatorEmptyString() { + final String2 s = new String2(); + s.appendWithSeparator(",", "first"); + assertEquals("first", s.toString()); + } + + @Test + public void testAppendMultipleTimes() { + final String2 s = new String2(); + s.append("x", 3); + assertEquals("xxx", s.toString()); + } + + @Test + public void testAppendMultipleTimesZero() { + final String2 s = new String2("test"); + s.append("x", 0); + assertEquals("test", s.toString()); + } + + @Test + public void testHasPrefix() { + final String2 s = new String2("prefix_content"); + assertTrue(s.hasPrefix("prefix_")); + assertTrue(s.hasPrefix("prefix")); + assertFalse(s.hasPrefix("content")); + } + + @Test + public void testHasPrefixLongerThanString() { + final String2 s = new String2("short"); + assertFalse(s.hasPrefix("very_long_prefix")); + } + + @Test + public void testHasSuffix() { + final String2 s = new String2("content_suffix"); + assertTrue(s.hasSuffix("_suffix")); + assertTrue(s.hasSuffix("suffix")); + assertFalse(s.hasSuffix("other")); + } + + @Test + public void testContains() { + final String2 s = new String2("hello world"); + assertTrue(s.contains("hello", 0)); + assertTrue(s.contains("world", 6)); + assertTrue(s.contains("lo wo", 3)); + assertFalse(s.contains("hello", 1)); + assertFalse(s.contains("xyz", 0)); + } + + @Test + public void testIsEmpty() { + assertTrue(new String2().isEmpty()); + assertTrue(new String2("").isEmpty()); + assertFalse(new String2("test").isEmpty()); + } + + @Test + public void testGetLength() { + assertEquals(0, new String2().getLength()); + assertEquals(0, new String2("").getLength()); + assertEquals(4, new String2("test").getLength()); + } + + @Test + public void testGetSubString() { + final String2 s = new String2("hello world"); + assertEquals("hello", s.getSubString(0, 5)); + assertEquals("world", s.getSubString(6, 11)); + assertEquals("lo wo", s.getSubString(3, 8)); + assertEquals("", s.getSubString(5, 5)); + } + + @Test + public void testClear() { + final String2 s = new String2("test"); + s.clear(); + assertEquals("", s.toString()); + assertTrue(s.isEmpty()); + } + + @Test + public void testGetGroups() { + String[] groups = String2.getGroups("abc 123 def", "(\\w+)\\s+(\\w+)\\s+(\\w+)"); + assertEquals(3, groups.length); + assertEquals("abc", groups[0]); + assertEquals("123", groups[1]); + assertEquals("def", groups[2]); + } + + @Test + public void testFluentChaining() { + final String2 s = new String2() + .append("hello") + .appendWithSeparator(" ", "world") + .prepend("Say: ") + .append("!"); + + assertEquals("Say: hello world!", s.toString()); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java new file mode 100644 index 0000000..023bf25 --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TerminatorTest.java @@ -0,0 +1,116 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TerminatorTest { + + @Test + public void testMatches() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "/\\*.+\\*/", + "test"); + + assertTrue(terminator.match("/* bla bla bla */", 0).find()); + assertFalse(terminator.match("/* bla bla bla */", 1).find()); + assertFalse(terminator.match("/", 0).find()); + } + + @Test + public void testTerminationStrategyPreserve() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "\\d+", + null); + + assertEquals(Terminator.TerminationStrategy.PRESERVE, terminator.termination); + } + + @Test + public void testTerminationStrategyDrop() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.DROP, + "\\s+", + null); + + assertEquals(Terminator.TerminationStrategy.DROP, terminator.termination); + } + + @Test + public void testGroupProperty() { + Terminator withGroup = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "\\d+", + "number"); + + assertEquals("number", withGroup.group); + + Terminator withoutGroup = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "\\d+", + null); + + assertNull(withoutGroup.group); + } + + @Test + public void testActiveFlag() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "\\d+", + null); + + assertTrue(terminator.active); + + terminator.active = false; + assertFalse(terminator.active); + } + + @Test + public void testToString() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "\\d+", + "number"); + + terminator.active = true; + + String str = terminator.toString(); + assertTrue(str.contains("\\d+")); + assertTrue(str.contains("PRESERVE")); + assertTrue(str.contains("number")); + assertTrue(str.contains("true")); + } + + @Test + public void testMatchAtDifferentPositions() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "abc", + null); + + assertTrue(terminator.match("abc", 0).find()); + assertTrue(terminator.match("xyzabc", 3).find()); + assertFalse(terminator.match("xyzabc", 0).find()); + assertFalse(terminator.match("xyzabc", 4).find()); + } + + @Test + public void testMatchWithComplexPattern() { + Terminator terminator = new Terminator( + Terminator.TerminationStrategy.PRESERVE, + "\\d{4}-\\d{2}-\\d{2}", + "date"); + + assertTrue(terminator.match("2023-01-15", 0).find()); + assertTrue(terminator.match("date: 2023-01-15", 6).find()); + assertFalse(terminator.match("2023", 0).find()); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatchTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatchTest.java new file mode 100644 index 0000000..2982805 --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerMatchTest.java @@ -0,0 +1,128 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +import org.junit.Test; + +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; +import static org.junit.Assert.*; + +public class TokenizerMatchTest { + + @Test + public void testIsGroupWithNoTerminator() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertNull(match.terminator); + assertTrue(match.isGroup(null)); + assertFalse(match.isGroup("someGroup")); + } + + @Test + public void testIsGroupWithTerminatorNoGroup() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("'hello'"); + tokenizer.addTerminator(PRESERVE, "'[^']*'"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertNotNull(match.terminator); + assertNull(match.terminator.group); + assertTrue(match.isGroup(null)); + assertFalse(match.isGroup("string")); + } + + @Test + public void testIsGroupWithTerminatorWithGroup() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("'hello'"); + tokenizer.addTerminator(PRESERVE, "'[^']*'", "string"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertNotNull(match.terminator); + assertEquals("string", match.terminator.group); + assertTrue(match.isGroup("string")); + assertFalse(match.isGroup("number")); + assertFalse(match.isGroup(null)); + } + + @Test + public void testGetRegExpGroupsWithNoMatcher() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("hello"); + tokenizer.addTerminator(DROP, "\\s"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertNull(match.matcher); + String[] groups = match.getRegExpGroups(); + assertNotNull(groups); + assertEquals(0, groups.length); + } + + @Test + public void testGetRegExpGroupsWithGroups() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("123 abc"); + tokenizer.addTerminator(PRESERVE, "(\\d+)\\s+(\\w+)"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertNotNull(match.matcher); + + String[] groups = match.getRegExpGroups(); + assertNotNull(groups); + assertEquals(2, groups.length); + assertEquals("123", groups[0]); + assertEquals("abc", groups[1]); + } + + @Test + public void testGetRegExpGroupsWithNoGroups() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("hello"); + tokenizer.addTerminator(PRESERVE, "\\w+"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertNotNull(match.matcher); + + String[] groups = match.getRegExpGroups(); + assertNotNull(groups); + assertEquals(0, groups.length); + } + + @Test + public void testGetTokenizer() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("hello"); + tokenizer.addTerminator(DROP, "\\s"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertSame(tokenizer, match.getTokenizer()); + } + + @Test + public void testTokenField() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + assertEquals("hello", match.token); + } + + @Test + public void testToString() throws InvalidSyntaxException { + Tokenizer tokenizer = new Tokenizer("(abc)"); + tokenizer.addTerminator(PRESERVE, "\\(([^)]+)\\)"); + + TokenizerMatch match = tokenizer.getNextToken(); + assertNotNull(match); + String str = match.toString(); + assertTrue(str.contains("(abc)")); + } + +} \ No newline at end of file diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java new file mode 100644 index 0000000..0f202fe --- /dev/null +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java @@ -0,0 +1,240 @@ +/* + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. + */ +package eu.svjatoslav.commons.string.tokenizer; + +import org.junit.Test; + +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; +import static org.junit.Assert.*; + +public class TokenizerTest { + + @Test + public void testPeeking() throws Exception { + Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test"); + tokenizer.addTerminator(DROP, "\\s"); + tokenizer.addTerminator(PRESERVE, "N'.*'"); + + tokenizer.expectAndConsumeNextStringToken("this"); + + assertEquals("is", tokenizer.peekNextToken().token); + assertEquals("is", tokenizer.peekNextToken().token); + + assertTrue(tokenizer.peekIsOneOf("maybe", "is", "that")); + } + + @Test + public void testTokenization() throws Exception { + Tokenizer tokenizer = new Tokenizer("\"hello world\" /** comment **/ (( is a N'2015-03-18 09:48:54.360' test"); + tokenizer.addTerminator(DROP, "\\s"); + tokenizer.addTerminator(PRESERVE, "\\("); + tokenizer.addTerminator(PRESERVE, "\\\".*\\\""); + tokenizer.addTerminator(PRESERVE, "N'.*'"); + tokenizer.addTerminator(DROP, "/\\*.*\\*/"); + + assertTokenEquals("\"hello world\"", tokenizer); + assertTokenEquals("(", tokenizer); + assertTokenEquals("(", tokenizer); + assertTokenEquals("is", tokenizer); + assertTokenEquals("a", tokenizer); + assertTokenEquals("N'2015-03-18 09:48:54.360'", tokenizer); + assertTokenEquals("test", tokenizer); + + assertNull(tokenizer.getNextToken()); + assertFalse(tokenizer.hasMoreContent()); + } + + @Test + public void testMultilineTokenization() throws Exception { + Tokenizer tokenizer = new Tokenizer("* heading\r\nnormal text\r\nnormal text 2"); + tokenizer.addTerminator(PRESERVE, "\\*[ \\t]+.*\\r?\\n"); + tokenizer.addTerminator(DROP, "\\r?\\n", "normalText"); + + assertTokenEquals("* heading\r\n", tokenizer); + assertTokenEquals("normal text", tokenizer); + assertTokenEquals("normal text 2", tokenizer); + + assertNull(tokenizer.getNextToken()); + assertFalse(tokenizer.hasMoreContent()); + } + + @Test + public void testConsumeIfNextTokenMatch() throws Exception { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + assertTrue(tokenizer.consumeIfNextToken("hello")); + assertFalse(tokenizer.consumeIfNextToken("hello")); + assertTrue(tokenizer.consumeIfNextToken("world")); + assertFalse(tokenizer.hasMoreContent()); + } + + @Test + public void testConsumeIfNextTokenNoMatch() throws Exception { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + assertFalse(tokenizer.consumeIfNextToken("wrong")); + assertEquals("hello", tokenizer.getNextToken().token); + } + + @Test + public void testUnreadToken() throws Exception { + Tokenizer tokenizer = new Tokenizer("a b c"); + tokenizer.addTerminator(DROP, "\\s"); + + assertEquals("a", tokenizer.getNextToken().token); + assertEquals("b", tokenizer.getNextToken().token); + tokenizer.unreadToken(); + assertEquals("b", tokenizer.getNextToken().token); + tokenizer.unreadToken(); + tokenizer.unreadToken(); + assertEquals("a", tokenizer.getNextToken().token); + } + + @Test + public void testSetSourceReset() throws Exception { + Tokenizer tokenizer = new Tokenizer("first"); + tokenizer.addTerminator(DROP, "\\s"); + + assertEquals("first", tokenizer.getNextToken().token); + assertFalse(tokenizer.hasMoreContent()); + + tokenizer.setSource("second source"); + assertTrue(tokenizer.hasMoreContent()); + assertEquals("second", tokenizer.getNextToken().token); + assertEquals("source", tokenizer.getNextToken().token); + } + + @Test + public void testEmptySource() throws Exception { + Tokenizer tokenizer = new Tokenizer(""); + tokenizer.addTerminator(DROP, "\\s"); + + assertFalse(tokenizer.hasMoreContent()); + assertNull(tokenizer.getNextToken()); + } + + @Test + public void testNullSource() { + Tokenizer tokenizer = new Tokenizer(); + tokenizer.addTerminator(DROP, "\\s"); + + assertFalse(tokenizer.hasMoreContent()); + } + + @Test + public void testNullSourceWithSetSource() throws Exception { + Tokenizer tokenizer = new Tokenizer(); + tokenizer.addTerminator(DROP, "\\s"); + + assertFalse(tokenizer.hasMoreContent()); + + tokenizer.setSource("hello"); + assertTrue(tokenizer.hasMoreContent()); + assertEquals("hello", tokenizer.getNextToken().token); + } + + @Test + public void testSkipUntilDataEnd() throws Exception { + Tokenizer tokenizer = new Tokenizer("a b c d e"); + tokenizer.addTerminator(DROP, "\\s"); + + assertEquals("a", tokenizer.getNextToken().token); + tokenizer.skipUntilDataEnd(); + assertFalse(tokenizer.hasMoreContent()); + } + + @Test + public void testExpectAndConsumeNextTerminatorToken() throws Exception { + Tokenizer tokenizer = new Tokenizer("(content)"); + Terminator openParen = tokenizer.addTerminator(PRESERVE, "\\(", "paren"); + Terminator closeParen = tokenizer.addTerminator(PRESERVE, "\\)", "paren"); + + TokenizerMatch match = tokenizer.expectAndConsumeNextTerminatorToken(openParen); + assertEquals("(", match.token); + assertEquals(openParen, match.terminator); + + assertEquals("content", tokenizer.getNextToken().token); + + match = tokenizer.expectAndConsumeNextTerminatorToken(closeParen); + assertEquals(")", match.token); + assertEquals(closeParen, match.terminator); + } + + @Test(expected = InvalidSyntaxException.class) + public void testExpectAndConsumeNextTerminatorTokenWrongTerminator() throws Exception { + Tokenizer tokenizer = new Tokenizer("a b"); + Terminator whitespace = tokenizer.addTerminator(DROP, "\\s"); + tokenizer.addTerminator(PRESERVE, "\\("); + + tokenizer.expectAndConsumeNextTerminatorToken(whitespace); + } + + @Test(expected = InvalidSyntaxException.class) + public void testExpectAndConsumeNextStringTokenWrongValue() throws Exception { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + tokenizer.expectAndConsumeNextStringToken("wrong"); + } + + @Test + public void testPeekExpectNoneOf() throws Exception { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + tokenizer.peekExpectNoneOf("wrong", "other"); + assertEquals("hello", tokenizer.getNextToken().token); + } + + @Test(expected = InvalidSyntaxException.class) + public void testPeekExpectNoneOfViolation() throws Exception { + Tokenizer tokenizer = new Tokenizer("hello world"); + tokenizer.addTerminator(DROP, "\\s"); + + tokenizer.peekExpectNoneOf("hello", "other"); + } + + @Test + public void testFindTerminatorMatch() throws Exception { + Tokenizer tokenizer = new Tokenizer("(hello)"); + tokenizer.addTerminator(PRESERVE, "\\("); + tokenizer.addTerminator(PRESERVE, "\\)"); + + TokenizerMatch match = tokenizer.findTerminatorMatch(); + assertNotNull(match); + assertEquals("(", match.token); + } + + @Test + public void testFindTerminatorMatchNoMatch() throws Exception { + Tokenizer tokenizer = new Tokenizer("hello"); + tokenizer.addTerminator(PRESERVE, "\\("); + + assertNull(tokenizer.findTerminatorMatch()); + } + + @Test + public void testTerminatorActiveFlag() throws Exception { + Tokenizer tokenizer = new Tokenizer("/* comment */ text"); + Terminator comment = tokenizer.addTerminator(DROP, "/\\*.*\\*/"); + tokenizer.addTerminator(DROP, "\\s"); + + comment.active = false; + assertTokenEquals("/*", tokenizer); + assertTokenEquals("comment", tokenizer); + + tokenizer.setSource("/* comment */ text"); + comment.active = true; + assertTokenEquals("text", tokenizer); + } + + private void assertTokenEquals(String expectedValue, Tokenizer tokenizer) { + assertEquals(expectedValue, tokenizer.getNextToken().token); + } + +} \ No newline at end of file