diff options
author | Patrick Simianer <p@simianer.de> | 2017-12-03 14:51:08 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-12-03 14:51:08 +0100 |
commit | d6b8bb4a716a7f6220f4dab651b170535db52545 (patch) | |
tree | 030e7abcade1f87f0b5a1f6ce7c3d1bc5022e417 /lang | |
parent | ca5af986d1148276b5a69aae10e3bae1b4d9dd04 (diff) |
lang
Diffstat (limited to 'lang')
-rwxr-xr-x | lang | 63 |
1 files changed, 63 insertions, 0 deletions
@@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import sys +import langdetect + +from_stdin = False +if sys.argv[1] == '-': + f = sys.stdin + from_stdin = True +else: + f = open(sys.argv[1], 'r') + +try: + l = sys.argv[2].strip() +except: + l = None + +try: + min_p = max(0.0, min(1.0, float(sys.argv[3].strip()))) + if min_p == 0.0: + min_p = None +except: + min_p = None + +if min_p and not l: + l = None + + +factory = langdetect.detector_factory.DetectorFactory() +factory.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY) +factory.set_seed(31337) + +for line in f: + try: + detector = factory.create() + detector.append(line.strip()) + ld = detector.get_probabilities() + print(ld) + except: + print("unk") + continue + + done = False + if l and len(l) > 1: + if min_p != None: + for i in ld: + if i.lang == l: + if i.prob >= min_p: + print(i.lang) + done = True + break + else: + if l in map(lambda x: x.lang, ld): + print(l) + done = True + + if not done: + print(ld[0].lang) + print('---') + + +if not from_stdin: + f.close |