added OCR capability

[meviz.git] / src / main / java / eu / svjatoslav / meviz / encoder / converters / Ocr.java
diff --git a/src/main/java/eu/svjatoslav/meviz/encoder/converters/Ocr.java b/src/main/java/eu/svjatoslav/meviz/encoder/converters/Ocr.java

new file mode 100644 (file)

index 0000000..0c20018
--- /dev/null
+++ b/src/main/java/eu/svjatoslav/meviz/encoder/converters/Ocr.java
@@ -0,0 +1,48 @@
+/*
+ * Meviz - Various tools collection to work with multimedia.
+ * Copyright (C) 2012, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+package eu.svjatoslav.meviz.encoder.converters;
+
+import java.io.File;
+import java.util.List;
+
+import eu.svjatoslav.meviz.encoder.EncodingOptions;
+
+public class Ocr extends AbstractConverter {
+
+       @Override
+       public String getCommand(final File inputFile, final File targetFile,
+                       final EncodingOptions options, String targetFormat) {
+
+               // for some stupid reason tesseract ALWAYS insists on automatically
+               // adding txt suffix
+               String targetAbsolutePath = targetFile.getAbsolutePath();
+               String targetFileName = targetAbsolutePath.substring(0,
+                               targetAbsolutePath.length() - 4);
+
+               return "tesseract \"" + inputFile.getAbsolutePath() + "\" \""
+                               + targetFileName + "\"";
+       }
+
+       @Override
+       public List<String> getSourceFileExtensions() {
+               return toList("tif", "tiff", "png", "jpg", "jpeg");
+       }
+
+       @Override
+       public List<String> getTargetFileExtensions() {
+               return toList("txt");
+       }
+
+       @Override
+       public boolean isTerminalMandatory() {
+               return false;
+       }
+
+}