summaryrefslogtreecommitdiff
path: root/scripts/geoquery/extract.rb
blob: ea6e86430ab513ceaf6db617bcb12a4ac8e2cfa2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env ruby

require 'nlp_ruby'
require 'xmlsimple'


def extract fn='./corpus.xml', lang='en', ids
  doc = XmlSimple.xml_in(fn)
  doc['example'].each { |example|
    next if (!ids.include? example['id']) && ids.size>0
    if lang == 'funql' || lang == 'geo-prolog'
      puts example['mrl'][0]['content'].to_s.strip
    else
      example['nl'].each { |nl|
        if nl['lang'] == lang
          puts nl['content']
        else
          next
        end
      }
    end
  }
end

def main
  ids = []
  ids = ReadFile.readlines_strip ARGV[2]
  extract ARGV[0], ARGV[1], ids
end


main