summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2017-12-03 14:51:08 +0100
committerPatrick Simianer <p@simianer.de>2017-12-03 14:51:08 +0100
commitd6b8bb4a716a7f6220f4dab651b170535db52545 (patch)
tree030e7abcade1f87f0b5a1f6ce7c3d1bc5022e417
parentca5af986d1148276b5a69aae10e3bae1b4d9dd04 (diff)
lang
-rwxr-xr-xlang63
1 files changed, 63 insertions, 0 deletions
diff --git a/lang b/lang
new file mode 100755
index 0000000..b278a1e
--- /dev/null
+++ b/lang
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+import sys
+import langdetect
+
+from_stdin = False
+if sys.argv[1] == '-':
+ f = sys.stdin
+ from_stdin = True
+else:
+ f = open(sys.argv[1], 'r')
+
+try:
+ l = sys.argv[2].strip()
+except:
+ l = None
+
+try:
+ min_p = max(0.0, min(1.0, float(sys.argv[3].strip())))
+ if min_p == 0.0:
+ min_p = None
+except:
+ min_p = None
+
+if min_p and not l:
+ l = None
+
+
+factory = langdetect.detector_factory.DetectorFactory()
+factory.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
+factory.set_seed(31337)
+
+for line in f:
+ try:
+ detector = factory.create()
+ detector.append(line.strip())
+ ld = detector.get_probabilities()
+ print(ld)
+ except:
+ print("unk")
+ continue
+
+ done = False
+ if l and len(l) > 1:
+ if min_p != None:
+ for i in ld:
+ if i.lang == l:
+ if i.prob >= min_p:
+ print(i.lang)
+ done = True
+ break
+ else:
+ if l in map(lambda x: x.lang, ld):
+ print(l)
+ done = True
+
+ if not done:
+ print(ld[0].lang)
+ print('---')
+
+
+if not from_stdin:
+ f.close