From 13c9f64b23f2610a233eb1ea778fda05329e9237 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 4 Mar 2014 21:41:48 +0100 Subject: data and scripts --- scripts/geoquery/extract.rb | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 scripts/geoquery/extract.rb (limited to 'scripts/geoquery/extract.rb') diff --git a/scripts/geoquery/extract.rb b/scripts/geoquery/extract.rb new file mode 100755 index 0000000..ea6e864 --- /dev/null +++ b/scripts/geoquery/extract.rb @@ -0,0 +1,33 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' +require 'xmlsimple' + + +def extract fn='./corpus.xml', lang='en', ids + doc = XmlSimple.xml_in(fn) + doc['example'].each { |example| + next if (!ids.include? example['id']) && ids.size>0 + if lang == 'funql' || lang == 'geo-prolog' + puts example['mrl'][0]['content'].to_s.strip + else + example['nl'].each { |nl| + if nl['lang'] == lang + puts nl['content'] + else + next + end + } + end + } +end + +def main + ids = [] + ids = ReadFile.readlines_strip ARGV[2] + extract ARGV[0], ARGV[1], ids +end + + +main + -- cgit v1.2.3