From 13c9f64b23f2610a233eb1ea778fda05329e9237 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 4 Mar 2014 21:41:48 +0100 Subject: data and scripts --- scripts/geoquery/extract.rb | 33 +++++++++++++++++++++++++++++++++ scripts/geoquery/select.rb | 20 ++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100755 scripts/geoquery/extract.rb create mode 100755 scripts/geoquery/select.rb (limited to 'scripts/geoquery') diff --git a/scripts/geoquery/extract.rb b/scripts/geoquery/extract.rb new file mode 100755 index 0000000..ea6e864 --- /dev/null +++ b/scripts/geoquery/extract.rb @@ -0,0 +1,33 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' +require 'xmlsimple' + + +def extract fn='./corpus.xml', lang='en', ids + doc = XmlSimple.xml_in(fn) + doc['example'].each { |example| + next if (!ids.include? example['id']) && ids.size>0 + if lang == 'funql' || lang == 'geo-prolog' + puts example['mrl'][0]['content'].to_s.strip + else + example['nl'].each { |nl| + if nl['lang'] == lang + puts nl['content'] + else + next + end + } + end + } +end + +def main + ids = [] + ids = ReadFile.readlines_strip ARGV[2] + extract ARGV[0], ARGV[1], ids +end + + +main + diff --git a/scripts/geoquery/select.rb b/scripts/geoquery/select.rb new file mode 100755 index 0000000..fbf8d14 --- /dev/null +++ b/scripts/geoquery/select.rb @@ -0,0 +1,20 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' + + +def main + ids = [] + ids = ReadFile.readlines_strip(ARGV[0]).map{ |i| i.strip.to_i } if ARGV[0] + delete_ids = [] + delete_ids = ReadFile.readlines_strip(ARGV[1]).map{ |i| i.strip.to_i } if ARGV[1] + i = 0 + while line = STDIN.gets + puts line if ids.include?(i)&&!delete_ids.include?(i) + i += 1 + end +end + + +main + -- cgit v1.2.3