From 6b6927aafa42fefece54df3c05ebd9161993ef52 Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Thu, 7 Feb 2013 14:40:12 +0200 Subject: [PATCH] Added module to strip byte order mark from UTF text files. --- .../meviz/bomremove/BomStrippingOptions.java | 23 +++ .../meviz/bomremove/CommandlineHandler.java | 65 +++++++++ .../eu/svjatoslav/meviz/bomremove/Main.java | 132 ++++++++++++++++++ .../meviz/encoder/CommandlineHandler.java | 129 +++++++++-------- .../meviz/htmlindexer/CommandlineHandler.java | 67 ++++----- .../meviz/renamer/CommandlineHandler.java | 32 ++--- .../textsplitter/CommandlineHandler.java | 22 ++- 7 files changed, 343 insertions(+), 127 deletions(-) create mode 100755 src/main/java/eu/svjatoslav/meviz/bomremove/BomStrippingOptions.java create mode 100755 src/main/java/eu/svjatoslav/meviz/bomremove/CommandlineHandler.java create mode 100755 src/main/java/eu/svjatoslav/meviz/bomremove/Main.java diff --git a/src/main/java/eu/svjatoslav/meviz/bomremove/BomStrippingOptions.java b/src/main/java/eu/svjatoslav/meviz/bomremove/BomStrippingOptions.java new file mode 100755 index 0000000..a97147d --- /dev/null +++ b/src/main/java/eu/svjatoslav/meviz/bomremove/BomStrippingOptions.java @@ -0,0 +1,23 @@ +/* + * Meviz - Various tools collection to work with multimedia. + * Copyright (C) 2012, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +package eu.svjatoslav.meviz.bomremove; + +import java.io.File; +import java.util.ArrayList; + +public class BomStrippingOptions { + + public ArrayList inputPatterns = new ArrayList(); + + public boolean recursive = false; + + public File targetDirectory; + +} diff --git a/src/main/java/eu/svjatoslav/meviz/bomremove/CommandlineHandler.java b/src/main/java/eu/svjatoslav/meviz/bomremove/CommandlineHandler.java new file mode 100755 index 0000000..3276e13 --- /dev/null +++ b/src/main/java/eu/svjatoslav/meviz/bomremove/CommandlineHandler.java @@ -0,0 +1,65 @@ +/* + * Meviz - Various tools collection to work with multimedia. + * Copyright (C) 2012, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +package eu.svjatoslav.meviz.bomremove; + +import java.io.File; + +import eu.svjatoslav.commons.commandline.parameterparser.Parameter; +import eu.svjatoslav.commons.commandline.parameterparser.Parser; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.StringArgument; + +public class CommandlineHandler { + + Parameter recursiveParameter = new Parameter("Enable recursive mode.", + "-r", "--recursive"); + + Parameter inputPatternParameter = new Parameter(true, true, true, + new StringArgument(), "File input pattern.", "-i", + "--input-pattern"); + + Parameter workingDirectoryParameter = new Parameter(false, true, true, + new ExistingDirectory(), "Working directory.", "-w", + "--working-directory"); + + public Parser initParser() { + + final Parser parser = new Parser(); + parser.addParameter(recursiveParameter); + parser.addParameter(inputPatternParameter); + parser.addParameter(workingDirectoryParameter); + + return parser; + } + + public BomStrippingOptions parseCommandlineArguments(final String[] args) { + + final BomStrippingOptions options = new BomStrippingOptions(); + + final Parser parser = initParser(); + if (!parser.parse(args)) + return null; + + if (recursiveParameter.isParameterSpecified()) + options.recursive = true; + + if (workingDirectoryParameter.isParameterSpecified()) + options.targetDirectory = workingDirectoryParameter + .getArgumentsAsFiles().get(0); + else + options.targetDirectory = new File(System.getProperty("user.dir")); + + if (inputPatternParameter.isParameterSpecified()) + options.inputPatterns.addAll(inputPatternParameter + .getArgumentsAsStrings()); + + return options; + } +} diff --git a/src/main/java/eu/svjatoslav/meviz/bomremove/Main.java b/src/main/java/eu/svjatoslav/meviz/bomremove/Main.java new file mode 100755 index 0000000..a377f04 --- /dev/null +++ b/src/main/java/eu/svjatoslav/meviz/bomremove/Main.java @@ -0,0 +1,132 @@ +/* + * Meviz - Various tools collection to work with multimedia. + * Copyright (C) 2012, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +package eu.svjatoslav.meviz.bomremove; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Arrays; + +import eu.svjatoslav.commons.file.IOHelper; +import eu.svjatoslav.commons.string.WildCardMatcher; +import eu.svjatoslav.meviz.Module; + +public class Main implements Module { + + byte[] bomHeader = new byte[] { (byte) 0xfe, (byte) 0xff }; + + CommandlineHandler commandlineHandler = new CommandlineHandler(); + + BomStrippingOptions options; + + public boolean contains(final byte[] header, final byte[] patternToSeek) { + + for (int i = 0; i < patternToSeek.length; i++) + if (header[i] != patternToSeek[i]) + return false; + + return true; + } + + public boolean fileContainsHeader(final File file) + throws FileNotFoundException, IOException { + + final FileInputStream fileInputStream = new FileInputStream(file); + + final byte[] currentFileHeader = new byte[2]; + fileInputStream.read(currentFileHeader); + fileInputStream.close(); + + return contains(currentFileHeader, bomHeader); + } + + public boolean fileMatchesInputPattern(final File file) { + final String fileName = file.getName().toLowerCase(); + + for (final String inputPattern : options.inputPatterns) + if (WildCardMatcher.match(fileName, inputPattern.toLowerCase())) + return true; + + return false; + } + + @Override + public String getDescription() { + return "Remove byte order mark (bom) from UTF text files of they are present."; + } + + @Override + public String getModuleCommand() { + return "stripbom"; + } + + public void processDirectory(final File directory) { + + for (final File subFile : directory.listFiles()) + if (subFile.isDirectory()) { + if (options.recursive) + processDirectory(subFile); + } else if (fileMatchesInputPattern(subFile)) + try { + processFile(subFile); + } catch (final IOException exception) { + System.out.println("Error processing file: " + + subFile.getAbsolutePath()); + System.out.println(" exception: " + + exception.getMessage()); + } + + } + + public void processFile(final File file) throws IOException { + + if (file.length() < 2) + return; + + if (!fileContainsHeader(file)) + return; + + stripFileFromHeader(file); + } + + @Override + public void run(final String[] args) throws IOException { + + options = commandlineHandler.parseCommandlineArguments(args); + + if (options == null) { + showCommandlineHelp(); + return; + } + + processDirectory(options.targetDirectory); + + } + + @Override + public void showCommandlineHelp() { + commandlineHandler.initParser().showHelp(); + } + + public void stripFileFromHeader(final File file) + throws FileNotFoundException, IOException { + // read entire file + final byte[] fileContents = IOHelper.getFileContents(file); + + // remove BOM header form file + final byte[] newFileContents = Arrays.copyOfRange(fileContents, + bomHeader.length, fileContents.length); + + // overwrite file with new contents + IOHelper.saveToFile(file, newFileContents); + } + +} diff --git a/src/main/java/eu/svjatoslav/meviz/encoder/CommandlineHandler.java b/src/main/java/eu/svjatoslav/meviz/encoder/CommandlineHandler.java index fd9d9e9..fbde5c5 100755 --- a/src/main/java/eu/svjatoslav/meviz/encoder/CommandlineHandler.java +++ b/src/main/java/eu/svjatoslav/meviz/encoder/CommandlineHandler.java @@ -11,94 +11,101 @@ package eu.svjatoslav.meviz.encoder; import eu.svjatoslav.commons.commandline.parameterparser.Parameter; import eu.svjatoslav.commons.commandline.parameterparser.Parser; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.StringArgument; public class CommandlineHandler { - Parameter outputFormatParameter = new Parameter(true, true, true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), "Encoding output format.", "-o", - "--output-format"); + Parameter outputFormatParameter = new Parameter(true, true, true, + new StringArgument(), "Encoding output format.", "-o", + "--output-format"); - Parameter terminalParameter = new Parameter("Enable popup terminal.", "--terminal"); + Parameter terminalParameter = new Parameter("Enable popup terminal.", + "--terminal"); - Parameter testParameter = new Parameter("Simulate file encoding.", "-t", "--test"); + Parameter testParameter = new Parameter("Simulate file encoding.", "-t", + "--test"); - Parameter recursiveParameter = new Parameter("Enable recursive mode.", "-r", "--recursive"); + Parameter recursiveParameter = new Parameter("Enable recursive mode.", + "-r", "--recursive"); - Parameter inputPatternParameter = new Parameter(true, true, true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), "File input pattern.", "-i", - "--input-pattern"); + Parameter inputPatternParameter = new Parameter(true, true, true, + new StringArgument(), "File input pattern.", "-i", + "--input-pattern"); - Parameter workingDirectoryParameter = new Parameter(false, true, true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory(), "Working directory.", - "-w", "--working-directory"); + Parameter workingDirectoryParameter = new Parameter( + false, + true, + true, + new eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory(), + "Working directory.", "-w", "--working-directory"); - Parameter videoBitrateParameter = new Parameter(false, true, false, new Bitrate(), "Video bitrate.", "-v", - "--video-bitrate"); + Parameter videoBitrateParameter = new Parameter(false, true, false, + new Bitrate(), "Video bitrate.", "-v", "--video-bitrate"); - public Parser initParser() { - final Parser parser = new Parser(); + public Parser initParser() { + final Parser parser = new Parser(); - parser.addParameter(recursiveParameter); + parser.addParameter(recursiveParameter); - parser.addParameter(testParameter); + parser.addParameter(testParameter); - parser.addParameter(terminalParameter); + parser.addParameter(terminalParameter); - parser.addParameter(outputFormatParameter); + parser.addParameter(outputFormatParameter); - parser.addParameter(inputPatternParameter); + parser.addParameter(inputPatternParameter); - parser.addParameter(workingDirectoryParameter); + parser.addParameter(workingDirectoryParameter); - parser.addParameter(videoBitrateParameter); + parser.addParameter(videoBitrateParameter); - return parser; - } + return parser; + } - /** - * @return {@link EncodingOptions} if commandline arguments were - * successfully parsed, or null if parsing error - * occurred. - */ - public EncodingOptions parseCommandlineArguments(final String[] args) { + /** + * @return {@link EncodingOptions} if commandline arguments were + * successfully parsed, or null if parsing error + * occurred. + */ + public EncodingOptions parseCommandlineArguments(final String[] args) { - final EncodingOptions options = new EncodingOptions(); + final EncodingOptions options = new EncodingOptions(); - final Parser parser = initParser(); - parser.parse(args); + final Parser parser = initParser(); + parser.parse(args); - if (recursiveParameter.isParameterSpecified()) - options.recursive = true; + if (recursiveParameter.isParameterSpecified()) + options.recursive = true; - if (terminalParameter.isParameterSpecified()) - options.terminal = true; + if (terminalParameter.isParameterSpecified()) + options.terminal = true; - if (testParameter.isParameterSpecified()) - options.testOnly = true; + if (testParameter.isParameterSpecified()) + options.testOnly = true; - if (outputFormatParameter.isParameterSpecified()) { - options.outputFormats.addAll(outputFormatParameter.getArgumentsAsStrings()); - } + if (outputFormatParameter.isParameterSpecified()) + options.outputFormats.addAll(outputFormatParameter + .getArgumentsAsStrings()); - if (workingDirectoryParameter.isParameterSpecified()) { - options.workingDirectory = workingDirectoryParameter.getArgumentsAsFiles().get(0); - } + if (workingDirectoryParameter.isParameterSpecified()) + options.workingDirectory = workingDirectoryParameter + .getArgumentsAsFiles().get(0); - if (inputPatternParameter.isParameterSpecified()) { - options.inputPatterns.addAll(inputPatternParameter.getArgumentsAsStrings()); - } + if (inputPatternParameter.isParameterSpecified()) + options.inputPatterns.addAll(inputPatternParameter + .getArgumentsAsStrings()); - if (videoBitrateParameter.isParameterSpecified()) { + if (videoBitrateParameter.isParameterSpecified()) + try { + options.videoBitrate = Bitrate.bitrate + .valueOf(videoBitrateParameter.getArgumentAsString() + .toUpperCase()); + } catch (final Exception e) { + System.out + .println("Invalid video bitrate. Valid values are: LOW, MEDIUM, HIGH."); + return null; + } - try { - options.videoBitrate = Bitrate.bitrate.valueOf(videoBitrateParameter.getArgumentAsString() - .toUpperCase()); - } catch (final Exception e) { - System.out.println("Invalid video bitrate. Valid values are: LOW, MEDIUM, HIGH."); - return null; - } - } - - return options; - } + return options; + } } diff --git a/src/main/java/eu/svjatoslav/meviz/htmlindexer/CommandlineHandler.java b/src/main/java/eu/svjatoslav/meviz/htmlindexer/CommandlineHandler.java index 6095673..84bff07 100755 --- a/src/main/java/eu/svjatoslav/meviz/htmlindexer/CommandlineHandler.java +++ b/src/main/java/eu/svjatoslav/meviz/htmlindexer/CommandlineHandler.java @@ -13,51 +13,52 @@ import java.io.File; import eu.svjatoslav.commons.commandline.parameterparser.Parameter; import eu.svjatoslav.commons.commandline.parameterparser.Parser; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.StringArgument; import eu.svjatoslav.meviz.encoder.EncodingOptions; public class CommandlineHandler { - Parameter galleryNameParameter = new Parameter(false, true, false, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), "Gallery title. (default is: " - + Constants.DEFAULT_GALLERY_TITLE + ").", "-t", "--gallery-title"); + Parameter galleryNameParameter = new Parameter(false, true, false, + new StringArgument(), "Gallery title. (default is: " + + Constants.DEFAULT_GALLERY_TITLE + ").", "-t", + "--gallery-title"); - Parameter workingDirectoryParameter = new Parameter(false, true, true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), "Working directory.", "-w", - "--working-directory"); + Parameter workingDirectoryParameter = new Parameter(false, true, true, + new StringArgument(), "Working directory.", "-w", + "--working-directory"); - public Parser initParser() { + public Parser initParser() { - final Parser parser = new Parser(); - parser.addParameter(galleryNameParameter); - parser.addParameter(workingDirectoryParameter); + final Parser parser = new Parser(); + parser.addParameter(galleryNameParameter); + parser.addParameter(workingDirectoryParameter); - return parser; - } + return parser; + } - /** - * @return {@link EncodingOptions} if commandline arguments were - * successfully parsed, or null if parsing error - * occurred. - */ - public IndexingOptions parseCommandlineArguments(final String[] args) { + /** + * @return {@link EncodingOptions} if commandline arguments were + * successfully parsed, or null if parsing error + * occurred. + */ + public IndexingOptions parseCommandlineArguments(final String[] args) { - final IndexingOptions options = new IndexingOptions(); + final IndexingOptions options = new IndexingOptions(); - final Parser parser = initParser(); - if (!parser.parse(args)) { - return null; - } + final Parser parser = initParser(); + if (!parser.parse(args)) + return null; - if (galleryNameParameter.isParameterSpecified()) { - options.galleryTitle = galleryNameParameter.getArgumentsAsStrings().get(0); - } + if (galleryNameParameter.isParameterSpecified()) + options.galleryTitle = galleryNameParameter.getArgumentsAsStrings() + .get(0); - if (workingDirectoryParameter.isParameterSpecified()) { - options.workingDirectory = workingDirectoryParameter.getArgumentsAsFiles().get(0); - } else { - options.workingDirectory = new File(System.getProperty("user.dir")); - } + if (workingDirectoryParameter.isParameterSpecified()) + options.workingDirectory = workingDirectoryParameter + .getArgumentsAsFiles().get(0); + else + options.workingDirectory = new File(System.getProperty("user.dir")); - return options; - } + return options; + } } diff --git a/src/main/java/eu/svjatoslav/meviz/renamer/CommandlineHandler.java b/src/main/java/eu/svjatoslav/meviz/renamer/CommandlineHandler.java index 1c559c7..051aaa8 100755 --- a/src/main/java/eu/svjatoslav/meviz/renamer/CommandlineHandler.java +++ b/src/main/java/eu/svjatoslav/meviz/renamer/CommandlineHandler.java @@ -13,6 +13,8 @@ import java.io.File; import eu.svjatoslav.commons.commandline.parameterparser.Parameter; import eu.svjatoslav.commons.commandline.parameterparser.Parser; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.StringArgument; import eu.svjatoslav.meviz.encoder.EncodingOptions; public class CommandlineHandler { @@ -24,26 +26,16 @@ public class CommandlineHandler { "Simulate renaming (no changes will be actually done).", "-t", "--test"); - Parameter outputPatternParameter = new Parameter( - true, - true, - true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), - "Output pattern.", "-o", "--output-pattern"); - - Parameter inputPatternParameter = new Parameter( - true, - true, - true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), - "File input pattern.", "-i", "--input-pattern"); - - Parameter workingDirectoryParameter = new Parameter( - false, - true, - true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory(), - "Working directory.", "-w", "--working-directory"); + Parameter outputPatternParameter = new Parameter(true, true, true, + new StringArgument(), "Output pattern.", "-o", "--output-pattern"); + + Parameter inputPatternParameter = new Parameter(true, true, true, + new StringArgument(), "File input pattern.", "-i", + "--input-pattern"); + + Parameter workingDirectoryParameter = new Parameter(false, true, true, + new ExistingDirectory(), "Working directory.", "-w", + "--working-directory"); public Parser initParser() { diff --git a/src/main/java/eu/svjatoslav/meviz/textsplitter/CommandlineHandler.java b/src/main/java/eu/svjatoslav/meviz/textsplitter/CommandlineHandler.java index 1364a13..15be77c 100755 --- a/src/main/java/eu/svjatoslav/meviz/textsplitter/CommandlineHandler.java +++ b/src/main/java/eu/svjatoslav/meviz/textsplitter/CommandlineHandler.java @@ -13,25 +13,21 @@ import java.io.File; import eu.svjatoslav.commons.commandline.parameterparser.Parameter; import eu.svjatoslav.commons.commandline.parameterparser.Parser; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory; +import eu.svjatoslav.commons.commandline.parameterparser.arguments.StringArgument; public class CommandlineHandler { Parameter recursiveParameter = new Parameter("Enable recursive mode.", "-r", "--recursive"); - Parameter inputPatternParameter = new Parameter( - true, - true, - true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.String(), - "File input pattern.", "-i", "--input-pattern"); - - Parameter workingDirectoryParameter = new Parameter( - false, - true, - true, - new eu.svjatoslav.commons.commandline.parameterparser.arguments.ExistingDirectory(), - "Working directory.", "-w", "--working-directory"); + Parameter inputPatternParameter = new Parameter(true, true, true, + new StringArgument(), "File input pattern.", "-i", + "--input-pattern"); + + Parameter workingDirectoryParameter = new Parameter(false, true, true, + new ExistingDirectory(), "Working directory.", "-w", + "--working-directory"); public Parser initParser() { -- 2.20.1