#!/usr/bin/env ruby

require 'nlp_ruby'
require 'trollop'
require 'tempfile'
require 'memcached'
require_relative './hopefear'


SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset 2>/dev/null'
EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl'
$cache = Memcached.new("localhost:11211")

def exec natural_language_string, reference_output, no_output=false
  func = nil
  output = nil
  feedback = nil
  key_prefix = natural_language_string.encode("ASCII", :invalid => :replace, :undef => :replace, :replace => "?").gsub(/ /,'_')
  begin
    func = $cache.get key_prefix+"__FUNC"
    output = $cache.get key_prefix+"__OUTPUT"
    feedback = $cache.get key_prefix+"__FEEDBACK"
  rescue Memcached::NotFound
    #func   = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\"").strip
    func   = `#{SMT_SEMPARSE} "#{natural_language_string}"`.strip
    #output = spawn_with_timeout("echo \"execute_funql_query(#{func}, X).\" | swipl -s #{EVAL_PL} 2>&1  | grep \"X =\"").strip.split('X = ')[1]
    output = `echo "execute_funql_query(#{func}, X)." | swipl -s #{EVAL_PL} 2>&1  | grep "X ="`.strip.split('X = ')[1]
    feedback = output==reference_output
    begin
      $cache.set key_prefix+"__FUNC", func
      $cache.set key_prefix+"__OUTPUT", output
      $cache.set key_prefix+"__FEEDBACK", feedback
    rescue SystemExit, Interrupt
      $cache.delete key_prefix+"__FUNC"
      $cache.delete key_prefix+"__OUTPUT"
      $cache.delete key_prefix+"__FEEDBACK"
    end
  end
  STDERR.write "        nrl: #{natural_language_string}\n" if !no_output
  STDERR.write "        mrl: #{func}\n" if !no_output
  STDERR.write "     output: #{output}\n" if !no_output
  STDERR.write "   correct?: #{feedback}\n" if !no_output
  return feedback, func, output
end

class Stats

  def initialize name
    @name = name
    @with_parse = 0.0
    @with_output = 0.0
    @with_correct_output = 0.0
  end

  # FIXME
  def update feedback, func, output
    @with_parse +=1 if func!='None'&&func!=''
    @with_output +=1 if output!='null'&&output!=''
    @with_correct_output += 1 if feedback==true
  end

  def to_s total
    without_parse = total-@with_parse
<<-eos
         #{@name} with parse #{((@with_parse/total)*100).round 2}% abs=#{@with_parse}
        #{@name} with output #{((@with_output/total)*100).round 2}% abs=#{@with_output}
#{@name} with correct output #{((@with_correct_output/total)*100).round 2}% adj=#{((@with_correct_output/(total-without_parse))*100).round 2} abs=#{@with_correct_output}
eos
  end
end

# map model scores to lie within [0,1]
def adjust_model_scores kbest, factor
  min = kbest.map{ |k| k.score }.min
  max = kbest.map{ |k| k.score }.max
  kbest.each { |k| k.score = factor*((k.score-min)/(max-min)) }
end

def update model, hope, fear, eta
  diff = hope.f - fear.f
  diff *= eta
  model += diff
  return model
end

def main
  cfg = Trollop::options do
    # data
    opt :k,             "k",                      :type => :int,    :default => 10000, :short => '-k'
    opt :input,         "'foreign' input",        :type => :string, :required => true, :short => '-i'
    opt :references,    "(parseable) references", :type => :string, :required => true, :short => '-r'
    opt :gold,          "gold output",            :type => :string, :required => true, :short => '-g'
    opt :gold_mrl,      "gold parse",             :type => :string, :required => true, :short => '-h'
    opt :init_weights,  "initial weights",        :type => :string, :required => true, :short => '-w'
    opt :cdec_ini,      "cdec config file",       :type => :string, :required => true, :short => '-c'
    # output
    opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o'
    opt :debug,          "debug output",                  :type => :bool,   :default => false, :short => '-d'
    opt :print_kbest,    "print full kbest lists",        :type => :bool,   :default => false, :short => '-l'
    # learning parameters
    opt :eta,                    "learning rate",                                              :type => :float, :default => 0.01,  :short => '-e'
    opt :iterate,                "iteration X epochs",                                         :type => :int,   :default => 1,     :short => '-j'
    opt :stop_after,             "stop after x examples",                                      :type => :int,   :default => -1,    :short => '-s'
    opt :scale_model,            "scale model scores by this factor",                          :type => :float, :default => 1.0,   :short => '-m'
    opt :normalize,              "normalize weights after each update",                        :type => :bool,  :default => false, :short => '-n'
    opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool,  :default => false, :short => '-x'
    opt :no_update,              "don't update weights",                                       :type => :bool,  :default => false, :short => '-y'
    opt :hope_fear_max,          "FIXME",                                                      :type => :int,   :default => 32,    :short => '-q'
    opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v'
  end

  STDERR.write "CONFIGURATION\n"
  cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }

  input      = ReadFile.new(cfg[:input]).readlines_strip
  references = ReadFile.new(cfg[:references]).readlines_strip
  gold       = ReadFile.new(cfg[:gold]).readlines_strip
  gold_mrl   = ReadFile.new(cfg[:gold_mrl]).readlines_strip # FIXME => prolog!
  stopwords  = ReadFile.new('prototype/d/stopwords.en').readlines_strip

  own_references = nil
  own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec'

  w = SparseVector.new
  w.from_kv_file cfg[:init_weights]
  last_weights_fn = ''

  cfg[:iterate].times { |iter|

    # numerous counters
    count                 = 0
    without_translation   = 0
    no_proper_gold_output = 0
    top1_stats = Stats.new 'top1'
    hope_stats = Stats.new 'hope'
    fear_stats = Stats.new 'fear'
    refs_stats = Stats.new 'refs'
    type1_updates     = 0
    type2_updates     = 0
    top1_hit          = 0
    top1_variant      = 0
    top1_true_variant = 0
    hope_hit          = 0
    hope_variant      = 0
    hope_true_variant = 0
    kbest_sz          = 0

    input.each_with_index { |i,j|
      count += 1

      tmp_file        = Tempfile.new('rampion')
      tmp_file_path   = tmp_file.path
      last_weights_fn = tmp_file.path
      tmp_file.write w.to_kv ' '
      tmp_file.close

      kbest = CDEC::kbest i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
      kbest_sz += kbest.size

      STDERR.write "\n=================\n"
      STDERR.write "    EXAMPLE: #{j}\n"
      STDERR.write "   GOLD MRL: #{gold_mrl[j]}\n"
      STDERR.write "GOLD OUTPUT: #{gold[j]}\n"

      if kbest.size == 0
        without_translation += 1
        STDERR.write "NO MT OUTPUT, skipping example\n"
        next
      end

      if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].'
        no_proper_gold_output += 1
        if cfg[:skip_on_no_proper_gold]
          STDERR.write "NO PROPER GOLD OUTPUT, skipping example\n"
          next
        end
      end

      kbest.each { |k| k.other_score = BLEU::per_sentence_bleu k.s, references[j] }

      if cfg[:print_kbest]
        STDERR.write "\n<<< KBEST\n"
        kbest.each_with_index { |k,l| STDERR.write k.to_s+"\n" }
        STDERR.write ">>>\n"
      end

      adjust_model_scores kbest, cfg[:scale_model]

      STDERR.write "\n [TOP1]\n"
      STDERR.write "#{kbest[0].s}\n"
      puts "#{kbest[0].s}" if iter+1==cfg[:iterate]

      feedback, func, output = exec kbest[0].s, gold[j]
      top1_stats.update feedback, func, output


      hope = fear = new_reference = nil
      type1 = type2 = skip = false
      case cfg[:variant]
      when 'standard'
        hope, fear, skip, type1, type2 = gethopefear_standard kbest, feedback
      when 'rampion'
        hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j]
      when 'fear_no_exec_skip'
        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_skip kbest, feedback, gold[j]
      when 'fear_no_exec'
        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec kbest, feedback, gold[j], cfg[:hope_fear_max]
      when 'fear_no_exec_hope_exec'
        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec kbest, feedback, gold[j], cfg[:hope_fear_max]
      when 'fear_no_exec_hope_exec_skip'
        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold[j], cfg[:hope_fear_max]
      when 'only_exec'
        hope, fear, skip, type1, type2, new_reference = gethopefear_only_exec kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j]
      else
        STDERR.write "NO SUCH VARIANT, exiting.\n"
        exit 1
      end

      if new_reference
        own_references[j] = new_reference
      end

      type1_updates+=1 if type1
      type2_updates+=1 if type2

      ref_words = bag_of_words references[j], stopwords

      if kbest[0].s == references[j]
        top1_hit += 1
      else
        top1_variant += 1
        top1_true_variant += 1 if !bag_of_words(kbest[0].s, stopwords).is_subset_of?(ref_words)
      end
      if hope && hope.s==references[j]
        hope_hit += 1
      elsif hope
        hope_variant += 1
        hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words)
      end

      STDERR.write "\n [HOPE]\n"
      if hope
        feedback, func, output =  exec hope.s, gold[j]
        hope_stats.update feedback, func, output
      end
      STDERR.write "\n [FEAR]\n"
      if fear
        feedback, func, output = exec fear.s, gold[j]
        fear_stats.update  feedback, func, output
      end
      STDERR.write "\n [REFERENCE]\n"
      feedback, func, output = exec references[j], gold[j]
      refs_stats.update feedback, func, output

      if skip || !hope || !fear
        STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n"
        next
      end

      w = update w, hope, fear, cfg[:eta] if !cfg[:no_update]
      w.normalize! if cfg[:normalize]

      break if cfg[:stop_after]>0&&(j+1)==cfg[:stop_after]
    }

    if cfg[:iterate] > 1
      WriteFile.new("#{cfg[:output_weights]}.#{iter}.gz").write(ReadFile.new(last_weights_fn).read)
    else
      FileUtils::cp(last_weights_fn, cfg[:output_weights])
    end

    STDERR.write  <<-eos

---
  iteration ##{iter+1}/#{cfg[:iterate]}: #{count} examples
        type1 updates: #{type1_updates}
        type2 updates: #{type2_updates}
            top1 hits: #{top1_hit}
         top1 variant: #{top1_variant}
    top1 true variant: #{top1_true_variant}
            hope hits: #{hope_hit}
         hope variant: #{hope_variant}
    hope true variant: #{hope_true_variant}
           kbest size: #{(kbest_sz/count).round 2}
    #{((without_translation.to_f/count)*100).round 2}% without translations (abs: #{without_translation})
    #{((no_proper_gold_output.to_f/count)*100).round 2}% no good gold output (abs: #{no_proper_gold_output})

#{top1_stats.to_s count}
#{hope_stats.to_s count}
#{fear_stats.to_s count}
#{refs_stats.to_s count}

eos

  }
end


main