blob: ea6e86430ab513ceaf6db617bcb12a4ac8e2cfa2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
#!/usr/bin/env ruby
require 'nlp_ruby'
require 'xmlsimple'
def extract fn='./corpus.xml', lang='en', ids
doc = XmlSimple.xml_in(fn)
doc['example'].each { |example|
next if (!ids.include? example['id']) && ids.size>0
if lang == 'funql' || lang == 'geo-prolog'
puts example['mrl'][0]['content'].to_s.strip
else
example['nl'].each { |nl|
if nl['lang'] == lang
puts nl['content']
else
next
end
}
end
}
end
def main
ids = []
ids = ReadFile.readlines_strip ARGV[2]
extract ARGV[0], ARGV[1], ids
end
main
|