From 22b81757a97f89e6b3415782c0556ffaf4625baa Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 22 Jul 2014 16:22:56 +0200
Subject: collapse_tags.rb
---
collapse_tags.rb | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
create mode 100755 collapse_tags.rb
diff --git a/collapse_tags.rb b/collapse_tags.rb
new file mode 100755
index 0000000..75fcaf5
--- /dev/null
+++ b/collapse_tags.rb
@@ -0,0 +1,40 @@
+#!/usr/bin/env ruby
+
+# works with gigaword en v5
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+
+in_p = false
+in_dateline = false
+collect = []
+
+while line = STDIN.gets
+ line.strip!
+ if line.downcase == "
" and not in_p + in_p = true + collect = [] + next + elsif line.downcase == "
" and in_p + if collect.size > 0 + puts collect.join(" ").strip + end + in_p = false + next + elsif in_p + collect.push line + next + else + puts line + end +end + -- cgit v1.2.3