import eu.svjatoslav.meviz.encoder.converters.Ffmpeg2theora;
import eu.svjatoslav.meviz.encoder.converters.Flac;
import eu.svjatoslav.meviz.encoder.converters.Midi2Wav;
+import eu.svjatoslav.meviz.encoder.converters.Ocr;
import eu.svjatoslav.meviz.encoder.converters.Ogg2Wav;
public class FormatsRegistry {
// image conversion
registerEncoder(new Convert());
+ // image to text (OCR)
+ registerEncoder(new Ocr());
+
// audio conversion
registerEncoder(new Ogg2Wav());
registerEncoder(new Flac());
--- /dev/null
+/*
+ * Meviz - Various tools collection to work with multimedia.
+ * Copyright (C) 2012, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+package eu.svjatoslav.meviz.encoder.converters;
+
+import java.io.File;
+import java.util.List;
+
+import eu.svjatoslav.meviz.encoder.EncodingOptions;
+
+public class Ocr extends AbstractConverter {
+
+ @Override
+ public String getCommand(final File inputFile, final File targetFile,
+ final EncodingOptions options, String targetFormat) {
+
+ // for some stupid reason tesseract ALWAYS insists on automatically
+ // adding txt suffix
+ String targetAbsolutePath = targetFile.getAbsolutePath();
+ String targetFileName = targetAbsolutePath.substring(0,
+ targetAbsolutePath.length() - 4);
+
+ return "tesseract \"" + inputFile.getAbsolutePath() + "\" \""
+ + targetFileName + "\"";
+ }
+
+ @Override
+ public List<String> getSourceFileExtensions() {
+ return toList("tif", "tiff", "png", "jpg", "jpeg");
+ }
+
+ @Override
+ public List<String> getTargetFileExtensions() {
+ return toList("txt");
+ }
+
+ @Override
+ public boolean isTerminalMandatory() {
+ return false;
+ }
+
+}