1 files changed, 144 insertions, 10 deletions
diff --git a/server.rb b/server.rb
index d6cbec7..752d0d5 100755
--- a/server.rb
+++ b/server.rb
@@ -15,7 +15,10 @@ require_relative './phrase2_extraction/phrase2_extraction'
 # #############################################################################
 # Load configuration file and setup global variables
 # #############################################################################
-require_relative "#{ARGV[0]}" # load configuration for this session
+NOLOO = nil                          # added later, warning
+OLM = nil                            # added later, warning
+$olm_pipe = nil
+require_relative "#{ARGV[0]}"        # load configuration for this session
 $lock                    = false     # lock if currently learning/translating
 $last_reply              = nil       # cache last reply
 $last_processed_postedit = ""        # to show to the user
@@ -73,9 +76,9 @@ end
 def start_daemon cmd, name, addr
   logmsg :server, "starting #{name} daemon"
   cmd.gsub! '__ADDR__', addr
-  pid = fork do
-    exec cmd
-  end
+  pid = spawn(cmd)
+  Process.detach pid
+  logmsg :server, "#{name} detached"
   sock = NanoMsg::PairSocket.new
   sock.connect addr
   logmsg :server, "< got #{sock.recv} from #{name}"
@@ -116,6 +119,11 @@ def init
                                                             # working directory
   `mkdir -p #{WORK_DIR}/`
   `mkdir #{WORK_DIR}/g`
+
+  if OLM
+    `mkfifo #{WORK_DIR}/refp`
+  end
+
                                              # setup environment, start daemons
   port = BEGIN_PORT_RANGE
   $daemons.each { |name,cmd|
@@ -124,7 +132,19 @@ def init
     port += 1
   }
 
+  if OLM
+    logmsg :server, "writing to OLM pipe"
+    $olm_pipe = File.new "#{WORK_DIR}/refp", "w"
+    $olm_pipe.write " \n"
+    $olm_pipe.flush
+    logmsg :server, "writing to OLM pipe, done!"
+  end
+
   send_recv :truecaser, "lOaD iT"
+
+  #if OLM
+  #  $olm_pipe = File.new "#{WORK_DIR}/refp", "w"
+  #end
                                                                    #  lock file
   `touch #{LOCK_FILE}`
   $status = "Initialized"                                              # status
@@ -220,6 +240,7 @@ def process_next reply
 # 5b. backward alignment
 # 5c. symmetrize alignment
 # 5d. actual update
+# 5e. update LM
 # 6. update database
   if data["EDIT"]
     $status = "Processing post-edit"                                   # status
@@ -245,6 +266,10 @@ def process_next reply
       f = []
       data["source_raw"].each { |i| f << URI.decode(i) }
 
+      # no loo rules
+      no_loo_known_rules = []
+      no_loo_new_rules = []
+
       if !NOGRAMMAR
                                                         # 2.5 new rule extraction
         $status = "Extracting rules from post edit"                      # status
@@ -255,6 +280,46 @@ def process_next reply
           s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
           current_grammar_ids[s] = true
         }
+        # no loo rules
+        no_loo_known_rules = []
+        no_loo_new_rules = []
+        if NOLOO
+          tmp_rules = []
+          logmsg :server, "rule diff: #{data['rule_diff'].to_s}"
+          data["rule_diff"].each_key { |k|
+            x = k.split(",").map{|i|i.to_i}.sort
+            tgt_a = data["rule_diff"][k]["tgt_a"]
+            tgt_first,src,tgt = splitpipe data["rule_diff"][k]
+            tgt_first = tgt_first.lstrip.strip
+            src = src.lstrip.strip
+            tgt = tgt.lstrip.strip
+            prev = tgt[0]
+            logmsg :server, "tgt_first #{tgt_first}"
+            tgt = send_recv :truecaser, tgt
+            tgt[0] = prev if tgt_first=="false"
+            if x.first == 0
+              src[0] = data["source_value"][0]
+            end
+            tmp_rules << [src, tgt]
+          }
+          tmp_rules_new = tmp_rules.reject { |r|
+              current_grammar_ids.has_key? r
+          }
+          tmp_rules_known = tmp_rules - tmp_rules_new
+          tmp_rules_known.each { |i| no_loo_known_rules << "[X] ||| #{i[0]} ||| #{i[1]} ||| KnownRule=1 ||| 0-0" }
+          tmp_rules_new.each { |i|
+            a = []
+            i[0].strip.lstrip.split.each_with_index { |s,ii|
+              i[1].strip.lstrip.split.each_with_index { |t,j|
+                if !s.match /\[X,\d+\]/ and !t.match /\[X,\d+\]/
+                  a << "#{ii}-#{j}"
+                end
+              }
+            }
+            no_loo_new_rules << "[X] ||| #{i[0]} ||| #{i[1]} ||| NewRule=1 ||| #{a.join ' '}"
+          }
+        end
+        # regular
         new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true
         new_rules_ids = {}
         $new_rules.each { |r|
@@ -269,6 +334,7 @@ def process_next reply
           current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s)
         }
         $new_rules += new_rules
+        $new_rules += no_loo_new_rules
         $new_rules.uniq! { |rs|
           splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
         }
@@ -277,6 +343,7 @@ def process_next reply
         f.close
         logmsg :server, "# rules after filtering #{new_rules.size}"
         add_known_rules = _-new_rules
+        add_known_rules += no_loo_known_rules
         add_known_rules.reject! { |rs|
           s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
           new_rules_ids.has_key?(s)
@@ -333,7 +400,45 @@ def process_next reply
       grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
       annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>"
       $status = "Learning from post-edit"                              # status
-      send_recv :dtrain, "#{annotated_source} ||| #{post_edit}"
+      if NOLOO
+        `cp #{grammar} #{grammar}.pass0`
+        match = {}
+        no_loo_known_rules.each { |r|
+          _,src,tgt,_,_ = splitpipe r
+          match["#{src.strip.lstrip} ||| #{tgt.strip.lstrip}".hash] = true
+        }
+        all_rules = ReadFile.readlines_strip grammar
+        all_rules.each_with_index { |r,j|
+          nt,src,tgt,f,a = splitpipe(r).map { |i| i.strip.lstrip }
+          if match["#{src} ||| #{tgt}".hash]
+            ar = "#{nt} ||| #{src} ||| #{tgt} ||| #{f} KnownRule=1 ||| #{a}"
+            logmsg :server, "replacing rule '#{r}' with '#{ar}'"
+            all_rules[j] = ar
+          end
+        }
+        if no_loo_new_rules.size > 0
+          all_rules += no_loo_new_rules
+        end
+        f = WriteFile.new(grammar)
+        f.write(all_rules.join("\n")+"\n")
+        f.close
+        logmsg :server, "adding rules and re-translate"
+        if OLM # again ..
+          $status = "Updating language model"
+          logmsg :server, "fake updating lm"
+          $olm_pipe.write " \n"
+          $olm_pipe.flush
+        end
+        `cp #{WORK_DIR}/dtrain.debug.json \
+          #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json.pass0`
+        send_recv :dtrain, "act:translate_learn ||| #{annotated_source} ||| #{post_edit}"
+        `cp #{WORK_DIR}/dtrain.debug.json \
+          #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json.pass1`
+      else
+        send_recv :dtrain, "act:learn ||| #{annotated_source} ||| #{post_edit}"
+        `cp #{WORK_DIR}/dtrain.debug.json \
+          #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json.pass0`
+      end
                                                   # 5. update grammar extractor
       if !$pregenerated_grammars
                                                     # 5a. get forward alignment
@@ -350,12 +455,26 @@ def process_next reply
         msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}"
         send_recv :extractor, msg
       end
+                                                           # 5e update LM
+      if OLM
+        $status = "Updating language model"
+        logmsg :server, "updating lm"
+        #`echo "#{post_edit}" >> #{WORK_DIR}/refp`
+        $olm_pipe.write "#{post_edit}\n"
+        $olm_pipe.flush
+      end
                                                            # 6. update database
       $db['updated'] << true
-      `cp #{WORK_DIR}/dtrain.debug.json \
-        #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json`
     else
+      `cp #{WORK_DIR}/dtrain.debug.json \
+        #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json.nolearn`
       $db['updated'] << false
+      if OLM
+        $status = "Updating language model"
+        logmsg :server, "fake updating lm"
+        $olm_pipe.write " \n"
+        $olm_pipe.flush
+      end
     end
     logmsg :db, "updating database"
     update_database
@@ -532,19 +651,33 @@ end
 
 get '/debug' do                                                    # debug view
   data = {}
-  data = JSON.parse ReadFile.read(DB_FILE).force_encoding("UTF-8")
+  s = File.binread(DB_FILE).encode('UTF-8', 'UTF-8', :invalid => :replace, :replace => "__INVALID__")
+  data = JSON.parse s
   if data["durations"].size == 0
     data["durations"] << -1
   end
 
-  fn = "#{WORK_DIR}/dtrain.debug.json"
+  fn = "#{WORK_DIR}/#{$db["progress"]-1}.dtrain.debug.json.pass"
+  pass = 0
+  if File.exist? fn+"1"
+    fn += "1"
+    pass = 1
+  else
+    fn += "0"
+    pass = 0
+  end
   pairwise_ranking_data                     = {}
   pairwise_ranking_data["kbest"]            = []
   pairwise_ranking_data["weights_before"]   = {}
   pairwise_ranking_data["weights_after"]    = {}
   pairwise_ranking_data["best_match_score"] = 0
   if File.exist? fn
-    pairwise_ranking_data = JSON.parse ReadFile.read(fn).force_encoding("UTF-8")
+    s = File.binread(fn).encode('UTF-8', 'UTF-8', :invalid => :replace, :replace => "__INVALID__").force_encoding("utf-8")
+    begin
+      pairwise_ranking_data = JSON.parse s
+    rescue
+      logmsg :server, s.encoding
+    end
   end
 
   admin = false
@@ -555,6 +688,7 @@ get '/debug' do                                                    # debug view
   haml :debug, :locals => { :data => data,
                             :pairwise_ranking_data => pairwise_ranking_data, \
                             :progress => $db["progress"]-1,
+                            :pass => pass,
                             :new_rules => $new_rules, \
                             :known_rules => $known_rules, \
                             :session_key => SESSION_KEY, \