From 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 Mon Sep 17 00:00:00 2001
From: Patrick Simianer 
Date: Tue, 5 Jul 2016 11:01:46 +0200
Subject: mv
---
 norm_german | 87 -------------------------------------------------------------
 1 file changed, 87 deletions(-)
 delete mode 100755 norm_german
(limited to 'norm_german')
diff --git a/norm_german b/norm_german
deleted file mode 100755
index cf9c060..0000000
--- a/norm_german
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'thread'
-require 'trollop'
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-
-conf = Trollop::options do
-  banner "norm_german < "
-  opt :upper, "uppercase", :type => :bool, :default => false
-  opt :threads, "#threads", :type => :int, :default => 1, :short => '-h'
-  opt :shard_size, "shard size", :type => :int, :default => 1000
-  opt :train, "train", :type => :bool
-  opt :apply, "apply", :type => :bool
-end
-
-pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ]
-pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ]
-if conf[:upper]
-  PAIRS = pairs_lower
-else
-  PAIRS = pairs_lower+pairs_upper
-end
-
-def get_key(old, new)
-  PAIRS.each { |i|
-    return old if new.gsub(i[0], i[1])==old
-    return old if new.gsub(i[1], i[0])==old
-  }
-  return nil
-end
-
-def build_partial(tokens)
-  h = {}
-  tokens.each { |tok|
-    found = false
-    h.keys.each { |i|
-      if get_key i, tok
-        h[i] << tok
-        found = true
-        break
-      end
-    }
-    h[tok] = [tok] if !found
-  }
-  return h
-end
-
-h = {}
-threads = []
-thread_n = 0
-counter = 0
-token_stock = []
-mutex = Mutex.new
-while tok = STDIN.gets
-  token_stock << [] if !token_stock[thread_n]
-  token_stock[thread_n] << tok.strip!
-  counter += 1
-  if token_stock[thread_n].size%conf[:shard_size]==0
-    STDERR.write "Starting thread ##{thread_n}\n"
-    threads << Thread.new(token_stock[thread_n]) { |tokens|
-      th = build_partial tokens
-      mutex.synchronize do
-        h.merge! th
-      end
-    }
-    threads.last.abort_on_exception = true
-    thread_n += 1
-  else
-    next
-  end
-  if thread_n==conf[:threads]
-    threads.each { |i|  i.join }
-    token_stock.each { |i| i.clear }
-    thread_n = 0
-  end
-  STDERR.write "#keys #{h.keys.size}\n"
-end
-
-token_stock.each { |i|
-  if i.size!=0
-    h.merge! build_partial i
-  end
-}
-
-- 
cgit v1.2.3