diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2014-03-04 21:41:48 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2014-03-04 21:41:48 +0100 |
commit | 13c9f64b23f2610a233eb1ea778fda05329e9237 (patch) | |
tree | 12cb52303758de9507bf6311a7027ebe44975d67 /scripts | |
parent | 38bbcbd52cfd6dd6d7d20148e7fa0a760af90f0a (diff) |
data and scripts
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/geoquery/extract.rb | 33 | ||||
-rwxr-xr-x | scripts/geoquery/select.rb | 20 |
2 files changed, 53 insertions, 0 deletions
diff --git a/scripts/geoquery/extract.rb b/scripts/geoquery/extract.rb new file mode 100755 index 0000000..ea6e864 --- /dev/null +++ b/scripts/geoquery/extract.rb @@ -0,0 +1,33 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' +require 'xmlsimple' + + +def extract fn='./corpus.xml', lang='en', ids + doc = XmlSimple.xml_in(fn) + doc['example'].each { |example| + next if (!ids.include? example['id']) && ids.size>0 + if lang == 'funql' || lang == 'geo-prolog' + puts example['mrl'][0]['content'].to_s.strip + else + example['nl'].each { |nl| + if nl['lang'] == lang + puts nl['content'] + else + next + end + } + end + } +end + +def main + ids = [] + ids = ReadFile.readlines_strip ARGV[2] + extract ARGV[0], ARGV[1], ids +end + + +main + diff --git a/scripts/geoquery/select.rb b/scripts/geoquery/select.rb new file mode 100755 index 0000000..fbf8d14 --- /dev/null +++ b/scripts/geoquery/select.rb @@ -0,0 +1,20 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' + + +def main + ids = [] + ids = ReadFile.readlines_strip(ARGV[0]).map{ |i| i.strip.to_i } if ARGV[0] + delete_ids = [] + delete_ids = ReadFile.readlines_strip(ARGV[1]).map{ |i| i.strip.to_i } if ARGV[1] + i = 0 + while line = STDIN.gets + puts line if ids.include?(i)&&!delete_ids.include?(i) + i += 1 + end +end + + +main + |