summaryrefslogtreecommitdiff
path: root/scripts/geoquery
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-03-04 21:41:48 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-03-04 21:41:48 +0100
commit13c9f64b23f2610a233eb1ea778fda05329e9237 (patch)
tree12cb52303758de9507bf6311a7027ebe44975d67 /scripts/geoquery
parent38bbcbd52cfd6dd6d7d20148e7fa0a760af90f0a (diff)
data and scripts
Diffstat (limited to 'scripts/geoquery')
-rwxr-xr-xscripts/geoquery/extract.rb33
-rwxr-xr-xscripts/geoquery/select.rb20
2 files changed, 53 insertions, 0 deletions
diff --git a/scripts/geoquery/extract.rb b/scripts/geoquery/extract.rb
new file mode 100755
index 0000000..ea6e864
--- /dev/null
+++ b/scripts/geoquery/extract.rb
@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+require 'xmlsimple'
+
+
+def extract fn='./corpus.xml', lang='en', ids
+ doc = XmlSimple.xml_in(fn)
+ doc['example'].each { |example|
+ next if (!ids.include? example['id']) && ids.size>0
+ if lang == 'funql' || lang == 'geo-prolog'
+ puts example['mrl'][0]['content'].to_s.strip
+ else
+ example['nl'].each { |nl|
+ if nl['lang'] == lang
+ puts nl['content']
+ else
+ next
+ end
+ }
+ end
+ }
+end
+
+def main
+ ids = []
+ ids = ReadFile.readlines_strip ARGV[2]
+ extract ARGV[0], ARGV[1], ids
+end
+
+
+main
+
diff --git a/scripts/geoquery/select.rb b/scripts/geoquery/select.rb
new file mode 100755
index 0000000..fbf8d14
--- /dev/null
+++ b/scripts/geoquery/select.rb
@@ -0,0 +1,20 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+
+
+def main
+ ids = []
+ ids = ReadFile.readlines_strip(ARGV[0]).map{ |i| i.strip.to_i } if ARGV[0]
+ delete_ids = []
+ delete_ids = ReadFile.readlines_strip(ARGV[1]).map{ |i| i.strip.to_i } if ARGV[1]
+ i = 0
+ while line = STDIN.gets
+ puts line if ids.include?(i)&&!delete_ids.include?(i)
+ i += 1
+ end
+end
+
+
+main
+