From ee31fdc650c6f9df91ec2331b24c289cb03b466b Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sun, 20 Dec 2020 13:59:25 +0100 Subject: [PATCH] removed unused code --- src/paperless_tesseract/languages.py | 194 --------------------------- src/paperless_text/parsers.py | 12 -- 2 files changed, 206 deletions(-) delete mode 100644 src/paperless_tesseract/languages.py diff --git a/src/paperless_tesseract/languages.py b/src/paperless_tesseract/languages.py deleted file mode 100644 index 5ea560654..000000000 --- a/src/paperless_tesseract/languages.py +++ /dev/null @@ -1,194 +0,0 @@ -# Thanks to the Library of Congress and some creative use of sed and awk: -# http://www.loc.gov/standards/iso639-2/php/English_list.php - -ISO639 = { - - "aa": "aar", - "ab": "abk", - "ae": "ave", - "af": "afr", - "ak": "aka", - "am": "amh", - "an": "arg", - "ar": "ara", - "as": "asm", - "av": "ava", - "ay": "aym", - "az": "aze", - "ba": "bak", - "be": "bel", - "bg": "bul", - "bh": "bih", - "bi": "bis", - "bm": "bam", - "bn": "ben", - "bo": "bod", - "br": "bre", - "bs": "bos", - "ca": "cat", - "ce": "che", - "ch": "cha", - "co": "cos", - "cr": "cre", - "cs": "ces", - "cu": "chu", - "cv": "chv", - "cy": "cym", - "da": "dan", - "de": "deu", - "dv": "div", - "dz": "dzo", - "ee": "ewe", - "el": "ell", - "en": "eng", - "eo": "epo", - "es": "spa", - "et": "est", - "eu": "eus", - "fa": "fas", - "ff": "ful", - "fi": "fin", - "fj": "fij", - "fo": "fao", - "fr": "fra", - "fy": "fry", - "ga": "gle", - "gd": "gla", - "gl": "glg", - "gn": "grn", - "gu": "guj", - "gv": "glv", - "ha": "hau", - "he": "heb", - "hi": "hin", - "ho": "hmo", - "hr": "hrv", - "ht": "hat", - "hu": "hun", - "hy": "hye", - "hz": "her", - "ia": "ina", - "id": "ind", - "ie": "ile", - "ig": "ibo", - "ii": "iii", - "ik": "ipk", - "io": "ido", - "is": "isl", - "it": "ita", - "iu": "iku", - "ja": "jpn", - "jv": "jav", - "ka": "kat", - "kg": "kon", - "ki": "kik", - "kj": "kua", - "kk": "kaz", - "kl": "kal", - "km": "khm", - "kn": "kan", - "ko": "kor", - "kr": "kau", - "ks": "kas", - "ku": "kur", - "kv": "kom", - "kw": "cor", - "ky": "kir", - "la": "lat", - "lb": "ltz", - "lg": "lug", - "li": "lim", - "ln": "lin", - "lo": "lao", - "lt": "lit", - "lu": "lub", - "lv": "lav", - "mg": "mlg", - "mh": "mah", - "mi": "mri", - "mk": "mkd", - "ml": "mal", - "mn": "mon", - "mr": "mar", - "ms": "msa", - "mt": "mlt", - "my": "mya", - "na": "nau", - "nb": "nob", - "nd": "nde", - "ne": "nep", - "ng": "ndo", - "nl": "nld", - "no": "nor", - "nr": "nbl", - "nv": "nav", - "ny": "nya", - "oc": "oci", - "oj": "oji", - "om": "orm", - "or": "ori", - "os": "oss", - "pa": "pan", - "pi": "pli", - "pl": "pol", - "ps": "pus", - "pt": "por", - "qu": "que", - "rm": "roh", - "rn": "run", - "ro": "ron", - "ru": "rus", - "rw": "kin", - "sa": "san", - "sc": "srd", - "sd": "snd", - "se": "sme", - "sg": "sag", - "si": "sin", - "sk": "slk", - "sl": "slv", - "sm": "smo", - "sn": "sna", - "so": "som", - "sq": "sqi", - "sr": "srp", - "ss": "ssw", - "st": "sot", - "su": "sun", - "sv": "swe", - "sw": "swa", - "ta": "tam", - "te": "tel", - "tg": "tgk", - "th": "tha", - "ti": "tir", - "tk": "tuk", - "tl": "tgl", - "tn": "tsn", - "to": "ton", - "tr": "tur", - "ts": "tso", - "tt": "tat", - "tw": "twi", - "ty": "tah", - "ug": "uig", - "uk": "ukr", - "ur": "urd", - "uz": "uzb", - "ve": "ven", - "vi": "vie", - "vo": "vol", - "wa": "wln", - "wo": "wol", - "xh": "xho", - "yi": "yid", - "yo": "yor", - "za": "zha", - - # Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I - # have no idea which one is better, so I just picked the bigger file. - "zh": "chi_tra", - - "zu": "zul" - -} diff --git a/src/paperless_text/parsers.py b/src/paperless_text/parsers.py index 7e488ca37..030c2c2c2 100644 --- a/src/paperless_text/parsers.py +++ b/src/paperless_text/parsers.py @@ -35,15 +35,3 @@ class TextDocumentParser(DocumentParser): def parse(self, document_path, mime_type): with open(document_path, 'r') as f: self.text = f.read() - - -def run_command(*args): - environment = os.environ.copy() - if settings.CONVERT_MEMORY_LIMIT: - environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT - if settings.CONVERT_TMPDIR: - environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR - - if not subprocess.Popen(' '.join(args), env=environment, - shell=True).wait() == 0: - raise ParseError("Convert failed at {}".format(args))