summaryrefslogtreecommitdiff
path: root/lang
blob: b278a1e5f0f667717a198ce0e519f013048e0a0a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python

import sys
import langdetect

from_stdin = False
if sys.argv[1] == '-':
    f = sys.stdin
    from_stdin = True
else:
    f = open(sys.argv[1], 'r')

try:
    l = sys.argv[2].strip()
except:
    l = None

try:
   min_p = max(0.0, min(1.0, float(sys.argv[3].strip())))
   if min_p == 0.0:
       min_p = None
except:
   min_p = None

if min_p and not l:
   l = None
     

factory = langdetect.detector_factory.DetectorFactory()
factory.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
factory.set_seed(31337)

for line in f:
    try:
        detector = factory.create()
        detector.append(line.strip())
        ld = detector.get_probabilities()
        print(ld)
    except:
        print("unk")
        continue

    done = False
    if l and len(l) > 1:
        if min_p != None:
            for i in ld:
                if i.lang == l:
                    if i.prob >= min_p:
                        print(i.lang)
                        done = True
                        break
        else:
            if l in map(lambda x: x.lang, ld):
                print(l)
                done = True

    if not done:
        print(ld[0].lang)
    print('---')


if not from_stdin:
    f.close