diff options
-rwxr-xr-x | bold_reranking.rb | 75 | ||||
-rw-r--r-- | example/example.ini | 2 |
2 files changed, 34 insertions, 43 deletions
diff --git a/bold_reranking.rb b/bold_reranking.rb index 3041ced..8f8cfab 100755 --- a/bold_reranking.rb +++ b/bold_reranking.rb @@ -29,7 +29,7 @@ class FeatureFactory @filter_features = false if cfg['filter_features'] @filter_features = true - @stopwords_target = ReadFile.new(cfg['filter_features']).readlines.map{ |i| i.strip.downcase } + @stopwords_target = ReadFile.readlines(cfg['filter_features']).map{ |i| i.strip.downcase } end end @@ -107,7 +107,7 @@ class MosesKbestEntryWithPhraseAlignment < Translation def initialize super - @other_score = -1.0/0 + @scores[:rr] = -1.0/0 end def get_phrases @@ -126,11 +126,8 @@ class MosesKbestEntryWithPhraseAlignment < Translation @raw.scan(/\|-?\d+\||\|\d+-\d+\|/).map{ |i| i[1..-2] }.map{ |i| _span i } end - def other_score model=nil - if model - @other_score = model.dot(@f) - end - return @other_score + def score model + @scores[:rr] = model.dot(@f) end end @@ -140,34 +137,27 @@ class ConstrainedSearchOracle < MosesKbestEntryWithPhraseAlignment @id = -1 @raw = s.strip.split(' : ', 2)[1].gsub(/(\[|\])/, '|') @s = @raw.gsub(/\s*\|\d+-\d+\||\|-?\d+\|\s*/, ' ').gsub(/\s+/, ' ') - @score = 1.0/0 - @other_score = -1.0/0 + @scores[:rr] = -1.0/0 end end -def structured_update model, hypothesis, oracle +def structured_update model, hypothesis, oracle, learning_rate if hypothesis.s != oracle.s - model += oracle.f - hypothesis.f + model += (oracle.f - hypothesis.f) * learning_rate return [model, 1] end return [model, 0] end -def ranking_update w, hypothesis, oracle - if oracle.other_score <= hypothesis.other_score \ +def ranking_update w, hypothesis, oracle, learning_rate + if oracle.scores[:rr] <= hypothesis.scores[:rr] \ && oracle.s != hypothesis.s - model += oracle.f - hypothesis.f + model += (oracle.f - hypothesis.f) * learning_rate return [model, 1] end return [model, 0] end -def write_model fn, w - f = WriteFile.new fn - f.write w.to_s+"\n" - f.close -end - def read_additional_phrase_pairs fn f = ReadFile.new fn add = {} @@ -192,17 +182,19 @@ end def main usage if ARGV.size != 1 - cfg = read_cfg ARGV[0] - - sources = ReadFile.new(cfg['sources']).readlines - oracles = ReadFile.new(cfg['oracles']).readlines - kbest_lists = read_kbest_lists cfg['kbest_lists'], MosesKbestEntryWithPhraseAlignment - iterations = cfg['iterate'].to_i - output = WriteFile.new cfg['output'] - output_model = cfg['output_model'] - silent = true if cfg['silent'] - verbose = true if cfg['verbose'] - cheat = true if cfg['cheat'] + cfg = read_config ARGV[0] + + sources = ReadFile.readlines cfg['sources'] + oracles = ReadFile.readlines cfg['oracles'] + kbest_lists = read_kbest_lists cfg['kbest_lists'], MosesKbestEntryWithPhraseAlignment + learning_rate = cfg['learning_rate'].to_f + learning_rate = 1.0 if !learning_rate + iterations = cfg['iterate'].to_i + output = WriteFile.new cfg['output'] + output_model = cfg['output_model'] + silent = true if cfg['silent'] + verbose = true if cfg['verbose'] + cheat = true if cfg['cheat'] additional_phrase_pairs = nil if cfg['additional_phrase_pairs'] @@ -218,7 +210,7 @@ def main model = SparseVector.new if cfg['init_model'] - model.from_s ReadFile.new(cfg['init_model']).read + model.from_s ReadFile.read cfg['init_model'] end sz = sources.size @@ -235,26 +227,25 @@ def main kbest = kbest_lists[j] kbest.each { |k| k.f = ff.produce k, sources[j] - k.other_score model + k.score model } - hypothesis = kbest[ kbest.map{ |k| k.other_score }.max_index ] + hypothesis = kbest[ kbest.map{ |k| k.scores[:rr] }.max_index ] if !cheat output.write "#{hypothesis.s}\n" end - oracle = ConstrainedSearchOracle.new - oracle.from_s oracles[j] + oracle = ConstrainedSearchOracle.from_s oracles[j] oracle.f = ff.produce oracle, sources[j] - oracle.other_score model + oracle.score model err = 0 case cfg['update'] when 'structured' - model, err = structured_update model, hypothesis, oracle + model, err = structured_update model, hypothesis, oracle, learning_rate when 'ranking' - model, err = ranking_update model, hypothesis, oracle + model, err = ranking_update model, hypothesis, oracle, learning_rate else STDERR.write "Don't know update method '#{cfg['update']}', exiting.\n" exit 1 @@ -262,8 +253,8 @@ def main overall_errors += err if cheat - kbest.each { |k| k.other_score model } - hypothesis = kbest[ kbest.map{ |k| k.other_score }.max_index ] + kbest.each { |k| k.score model } + hypothesis = kbest[ kbest.map{ |k| k.scores[:rr] }.max_index ] output.write "#{hypothesis.s}\n" end @@ -279,7 +270,7 @@ def main elapsed = Time.now - start STDERR.write"#{elapsed.round 2} s, #{(elapsed/Float(sz)).round 2} s per kbest; model size: #{model.size}\n\n" if !silent - write_model(output_model, model) if output_model + WriteFile.write model.to_s+"\n", output_model if output_model output.close end diff --git a/example/example.ini b/example/example.ini index 46686bd..8c08a5d 100644 --- a/example/example.ini +++ b/example/example.ini @@ -7,7 +7,7 @@ ff_target_ngrams = 4 # 4 fix ff_phrase_pairs = true # true /path/to/phrase_table #filter_features = /path/to/target/stopwords_file binary_feature_values = true -iterate = 3 +iterate = 1 output = - output_model = /dev/null |