#!/usr/bin/env ruby
# works with gigaword en v5
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
in_p = false
in_dateline = false
collect = []
while line = STDIN.gets
line.strip!
if line.downcase == "
" and not in_p in_p = true collect = [] next elsif line.downcase == "
" and in_p if collect.size > 0 puts collect.join(" ").strip end in_p = false next elsif in_p collect.push line next else puts line end end