summaryrefslogtreecommitdiff
path: root/norm-german
diff options
context:
space:
mode:
authorPatrick Simianer <patrick@lilt.com>2026-02-26 10:05:59 +0000
committerPatrick Simianer <patrick@lilt.com>2026-02-26 10:05:59 +0000
commitb31ace79ea5f6b3f279c544cd3a443d6fbf2a24d (patch)
tree31f2b599fa5f6996aeb134390d58deb63eefe04a /norm-german
parent8805e95ae94d798c6441f7e1b72c90e049563f17 (diff)
overhaulHEADmaster
Diffstat (limited to 'norm-german')
-rwxr-xr-xnorm-german15
1 files changed, 7 insertions, 8 deletions
diff --git a/norm-german b/norm-german
index 85a39da..5c41f98 100755
--- a/norm-german
+++ b/norm-german
@@ -1,23 +1,23 @@
#!/usr/bin/env ruby
-require 'thread'
-require 'optimist'
+require "thread"
+require "optimist"
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
+STDIN.set_encoding "utf-8"
+STDOUT.set_encoding "utf-8"
conf = Optimist::options do
banner "norm_german < <file w/ lowercased tokens>"
opt :upper, "uppercase", :type => :bool, :default => false
- opt :threads, "#threads", :type => :int, :default => 1, :short => '-h'
+ opt :threads, "#threads", :type => :int, :default => 1, :short => "-h"
opt :shard_size, "shard size", :type => :int, :default => 1000
opt :train, "train", :type => :bool
opt :apply, "apply", :type => :bool
end
-pairs_lower = [ ['ß','ss'], ['ue', 'ü'], ['ae','ä'], ['oe', 'ö'] ]
-pairs_upper = [ ['Ä', 'Ae'], ['Ö', 'Oe'], ['Ü', 'Ue'] ]
+pairs_lower = [ ["ß","ss"], ["ue", "ü"], ["ae","ä"], ["oe", "ö"] ]
+pairs_upper = [ ["Ä", "Ae"], ["Ö", "Oe"], ["Ü", "Ue"] ]
if conf[:upper]
PAIRS = pairs_lower
else
@@ -84,4 +84,3 @@ token_stock.each { |i|
h.merge! build_partial i
end
}
-