added OCR capability
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Wed, 10 Jun 2015 20:12:36 +0000 (23:12 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Wed, 10 Jun 2015 20:12:36 +0000 (23:12 +0300)
src/main/java/eu/svjatoslav/meviz/encoder/FormatsRegistry.java
src/main/java/eu/svjatoslav/meviz/encoder/converters/Ocr.java [new file with mode: 0644]

index d023f02..6b8d129 100755 (executable)
@@ -19,6 +19,7 @@ import eu.svjatoslav.meviz.encoder.converters.Convert;
 import eu.svjatoslav.meviz.encoder.converters.Ffmpeg2theora;
 import eu.svjatoslav.meviz.encoder.converters.Flac;
 import eu.svjatoslav.meviz.encoder.converters.Midi2Wav;
+import eu.svjatoslav.meviz.encoder.converters.Ocr;
 import eu.svjatoslav.meviz.encoder.converters.Ogg2Wav;
 
 public class FormatsRegistry {
@@ -33,6 +34,9 @@ public class FormatsRegistry {
                // image conversion
                registerEncoder(new Convert());
 
+               // image to text (OCR)
+               registerEncoder(new Ocr());
+
                // audio conversion
                registerEncoder(new Ogg2Wav());
                registerEncoder(new Flac());
diff --git a/src/main/java/eu/svjatoslav/meviz/encoder/converters/Ocr.java b/src/main/java/eu/svjatoslav/meviz/encoder/converters/Ocr.java
new file mode 100644 (file)
index 0000000..0c20018
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Meviz - Various tools collection to work with multimedia.
+ * Copyright (C) 2012, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+package eu.svjatoslav.meviz.encoder.converters;
+
+import java.io.File;
+import java.util.List;
+
+import eu.svjatoslav.meviz.encoder.EncodingOptions;
+
+public class Ocr extends AbstractConverter {
+
+       @Override
+       public String getCommand(final File inputFile, final File targetFile,
+                       final EncodingOptions options, String targetFormat) {
+
+               // for some stupid reason tesseract ALWAYS insists on automatically
+               // adding txt suffix
+               String targetAbsolutePath = targetFile.getAbsolutePath();
+               String targetFileName = targetAbsolutePath.substring(0,
+                               targetAbsolutePath.length() - 4);
+
+               return "tesseract \"" + inputFile.getAbsolutePath() + "\" \""
+                               + targetFileName + "\"";
+       }
+
+       @Override
+       public List<String> getSourceFileExtensions() {
+               return toList("tif", "tiff", "png", "jpg", "jpeg");
+       }
+
+       @Override
+       public List<String> getTargetFileExtensions() {
+               return toList("txt");
+       }
+
+       @Override
+       public boolean isTerminalMandatory() {
+               return false;
+       }
+
+}