summaryrefslogtreecommitdiff
path: root/lang
blob: 5caebd13260cb35daf5a402d4088d903965daa88 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python

import sys
import langdetect

from_stdin = False
if sys.argv[1] == '-':
    f = sys.stdin
    from_stdin = True
else:
    f = open(sys.argv[1], 'r')

try:
    l = sys.argv[2].strip()
except:
    l = None

try:
   min_p = max(0.0, min(1.0, float(sys.argv[3].strip())))
   if min_p == 0.0:
       min_p = None
except:
   min_p = None

try:
    strict = bool(sys.argv[4].strip())
except:
    strict = False

if min_p and not l:
   l = None

if strict and not min_p:
   strict = False
     

factory = langdetect.detector_factory.DetectorFactory()
factory.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
factory.set_seed(31337)

for line in f:
    try:
        detector = factory.create()
        detector.append(line.strip())
        ld = detector.get_probabilities()
    except:
        print("unk")
        continue

    done = False
    if l and len(ld) > 1:
        if min_p != None:
            for i in ld:
                if i.lang == l:
                    if i.prob >= min_p:
                        print(i.lang)
                        done = True
                    break
        else:
            if l in map(lambda x: x.lang, ld):
                print(l)
                continue

    if not done:
        if not strict:
            print(ld[0].lang)
        else:
            if ld[0].prob >= min_p:
                print(ld[0].lang)
            else:
                print("unk")

if not from_stdin:
    f.close