From 8b399cb09513cd79ed4182be9f75882c1e1b336a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 18 Jan 2013 14:36:51 +0100 Subject: parallelize enhancements --- training/dtrain/dtrain.h | 2 +- training/dtrain/parallelize.rb | 99 +++++++++++++++++------- training/dtrain/test/parallelize/cdec.ini | 2 +- training/dtrain/test/parallelize/in | 20 ++--- training/dtrain/test/parallelize/refs | 20 ++--- training/dtrain/test/parallelize/test/cdec.ini | 22 ------ training/dtrain/test/parallelize/test/dtrain.ini | 15 ---- training/dtrain/test/parallelize/test/in | 10 --- training/dtrain/test/parallelize/test/refs | 10 --- 9 files changed, 91 insertions(+), 109 deletions(-) delete mode 100644 training/dtrain/test/parallelize/test/cdec.ini delete mode 100644 training/dtrain/test/parallelize/test/dtrain.ini delete mode 100644 training/dtrain/test/parallelize/test/in delete mode 100644 training/dtrain/test/parallelize/test/refs (limited to 'training') diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index 4b6f415c..572fd613 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -3,7 +3,7 @@ #undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs // DO NOT USE WITH SVM! -//#define DTRAIN_LOCAL +#define DTRAIN_LOCAL #define DTRAIN_DOTS 10 // after how many inputs to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" #define DTRAIN_SCALE 100000 diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index eb4148f5..92ce1f6f 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -1,80 +1,119 @@ #!/usr/bin/env ruby -if ARGV.size != 5 +if ARGV.size != 7 STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb <#shards> \n" + STDERR.write "ruby parallelize.rb <#shards|predef> \n" exit end -cdec_dir = '/path/to/cdec_dir' -dtrain_bin = "#{cdec_dir}/training/dtrain/dtrain_local" +cdec_dir = '~/mt/cdec-dtrain/' +dtrain_bin = "~/bin/dtrain_local" ruby = '/usr/bin/ruby' lplp_rb = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' -gzip = '/bin/gzip' - -num_shards = ARGV[0].to_i -input = ARGV[1] -refs = ARGV[2] -epochs = ARGV[3].to_i -ini = ARGV[4] +ini = ARGV[0] +epochs = ARGV[1].to_i +rand = false +rand = true if ARGV[2]=='true' +predefined_shards = false +if ARGV[3] == 'predef' + predefined_shards = true + num_shards = -1 +else + num_shards = ARGV[3].to_i +end +shards_at_once = ARGV[4].to_i +input = ARGV[5] +refs = ARGV[6] `mkdir work` -def make_shards(input, refs, num_shards) +def make_shards(input, refs, num_shards, epoch, rand) lc = `wc -l #{input}`.split.first.to_i + index = (0..lc-1).to_a + index.reverse! + index.shuffle! if rand shard_sz = lc / num_shards leftover = lc % num_shards in_f = File.new input, 'r' + in_lines = in_f.readlines refs_f = File.new refs, 'r' + refs_lines = refs_f.readlines shard_in_files = [] shard_refs_files = [] + in_fns = [] + refs_fns = [] 0.upto(num_shards-1) { |shard| - shard_in = File.new "work/shard.#{shard}.in", 'w+' - shard_refs = File.new "work/shard.#{shard}.refs", 'w+' + in_fn = "work/shard.#{shard}.#{epoch}.in" + shard_in = File.new in_fn, 'w+' + in_fns << in_fn + refs_fn = "work/shard.#{shard}.#{epoch}.refs" + shard_refs = File.new refs_fn, 'w+' + refs_fns << refs_fn 0.upto(shard_sz-1) { |i| - shard_in.write in_f.gets - shard_refs.write refs_f.gets + j = index.pop + shard_in.write in_lines[j] + shard_refs.write refs_lines[j] } shard_in_files << shard_in shard_refs_files << shard_refs } while leftover > 0 - shard_in_files[-1].write in_f.gets - shard_refs_files[-1].write refs_f.gets + j = index.pop + shard_in_files[-1].write in_lines[j] + shard_refs_files[-1].write refs_lines[j] leftover -= 1 end (shard_in_files + shard_refs_files).each do |f| f.close end in_f.close refs_f.close + return [in_fns, refs_fns] end -make_shards input, refs, num_shards +input_files = [] +refs_files = [] +if predefined_shards + input_files = File.new(input).readlines.map {|i| i.strip } + refs_files = File.new(refs).readlines.map {|i| i.strip } + num_shards = input_files.size +else + input_files, refs_files = make_shards input, refs, num_shards, 0, rand +end 0.upto(epochs-1) { |epoch| + puts "epoch #{epoch+1}" pids = [] input_weights = '' if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end weights_files = [] - 0.upto(num_shards-1) { |shard| - pids << Kernel.fork { - `#{dtrain_bin} -c #{ini}\ - --input work/shard.#{shard}.in\ - --refs work/shard.#{shard}.refs #{input_weights}\ - --output work/weights.#{shard}.#{epoch}\ - &> work/out.#{shard}.#{epoch}` + shard = 0 + remaining_shards = num_shards + while remaining_shards > 0 + shards_at_once.times { + pids << Kernel.fork { + `#{dtrain_bin} -c #{ini}\ + --input #{input_files[shard]}\ + --refs #{refs_files[shard]} #{input_weights}\ + --output work/weights.#{shard}.#{epoch}\ + &> work/out.#{shard}.#{epoch}` + } + weights_files << "work/weights.#{shard}.#{epoch}" + shard += 1 + remaining_shards -= 1 } - weights_files << "work/weights.#{shard}.#{epoch}" - } - pids.each { |pid| Process.wait(pid) } + pids.each { |pid| Process.wait(pid) } + pids.clear + end cat = File.new('work/weights_cat', 'w+') weights_files.each { |f| cat.write File.new(f, 'r').read } cat.close `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` + if rand and epoch+1!=epochs + input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand + end } `rm work/weights_cat` -`#{gzip} work/*` diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini index 72e99dc5..e118374b 100644 --- a/training/dtrain/test/parallelize/cdec.ini +++ b/training/dtrain/test/parallelize/cdec.ini @@ -4,7 +4,7 @@ intersection_strategy=cube_pruning cubepruning_pop_limit=200 scfg_max_span_limit=15 feature_function=WordPenalty -feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 +feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz #feature_function=ArityPenalty #feature_function=CMR2008ReorderingFeatures #feature_function=Dwarf diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in index a312809f..3b7dec39 100644 --- a/training/dtrain/test/parallelize/in +++ b/training/dtrain/test/parallelize/in @@ -1,10 +1,10 @@ -barack obama erhält als vierter us @-@ präsident den frieden nobelpreis -der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . -darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . -der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . -zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . -das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . -nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . -diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . -das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . -der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . +europas nach rassen geteiltes haus +ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . +der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . +während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . +eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . +die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . +das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . +die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . +der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . +genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs index 4d3128cb..632e27b0 100644 --- a/training/dtrain/test/parallelize/refs +++ b/training/dtrain/test/parallelize/refs @@ -1,10 +1,10 @@ -barack obama becomes the fourth american president to receive the nobel peace prize -the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . -he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . -the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . -first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . -the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . -then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . -he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . -the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . -the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/test/parallelize/test/cdec.ini b/training/dtrain/test/parallelize/test/cdec.ini deleted file mode 100644 index 72e99dc5..00000000 --- a/training/dtrain/test/parallelize/test/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/test/parallelize/test/dtrain.ini b/training/dtrain/test/parallelize/test/dtrain.ini deleted file mode 100644 index 03f9d240..00000000 --- a/training/dtrain/test/parallelize/test/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -decoder_config=cdec.ini diff --git a/training/dtrain/test/parallelize/test/in b/training/dtrain/test/parallelize/test/in deleted file mode 100644 index a312809f..00000000 --- a/training/dtrain/test/parallelize/test/in +++ /dev/null @@ -1,10 +0,0 @@ -barack obama erhält als vierter us @-@ präsident den frieden nobelpreis -der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . -darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . -der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . -zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . -das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . -nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . -diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . -das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . -der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/training/dtrain/test/parallelize/test/refs b/training/dtrain/test/parallelize/test/refs deleted file mode 100644 index 4d3128cb..00000000 --- a/training/dtrain/test/parallelize/test/refs +++ /dev/null @@ -1,10 +0,0 @@ -barack obama becomes the fourth american president to receive the nobel peace prize -the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . -he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . -the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . -first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . -the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . -then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . -he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . -the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . -the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . -- cgit v1.2.3 From 0c02f35192e7cec1298c94065dee4a32a6730252 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 24 Jan 2013 15:28:03 +0100 Subject: enable qsub use --- environment/LocalConfig.pm | 7 +++++++ training/dtrain/parallelize.rb | 25 +++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'training') diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index b9549c6e..627f7f8c 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -12,6 +12,7 @@ my $host = domainname; # keys are: HOST_REGEXP, MERTMem, QSubQueue, QSubMemFlag, QSubExtraFlags my $CCONFIG = { + 'StarCluster' => { 'HOST_REGEXP' => qr/compute-\d+\.internal$/, 'JobControl' => 'qsub', @@ -67,6 +68,12 @@ my $CCONFIG = { 'JobControl' => 'fork', 'DefaultJobs' => 12, }, + 'cluster.cl.uni-heidelberg.de' => { + 'HOST_REGEXP' => qr/node25/, + 'JobControl' => 'qsub', + 'QSubMemFlag' => '-l h_vmem=', + 'DefaultJobs' => 13, + }, 'LOCAL' => { # LOCAL must be last in the list!!! 'HOST_REGEXP' => qr//, 'QSubMemFlag' => ' ', diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 92ce1f6f..6e30cf9d 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -3,15 +3,16 @@ if ARGV.size != 7 STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb <#shards|predef> \n" + STDERR.write "ruby parallelize.rb <#shards|predef> \n" exit end -cdec_dir = '~/mt/cdec-dtrain/' -dtrain_bin = "~/bin/dtrain_local" +cdec_dir = '~/MAREC/cdec-dtrain/' +dtrain_bin = "~/MAREC/cdec-dtrain/training/dtrain/dtrain" ruby = '/usr/bin/ruby' lplp_rb = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' +cat = '/bin/cat' ini = ARGV[0] epochs = ARGV[1].to_i @@ -27,6 +28,8 @@ end shards_at_once = ARGV[4].to_i input = ARGV[5] refs = ARGV[6] +use_qsub = false +use_qsub = true if ARGV[7] `mkdir work` @@ -92,12 +95,16 @@ end remaining_shards = num_shards while remaining_shards > 0 shards_at_once.times { + qsub_str_start = qsub_str_end = '' + if use_qsub + qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \"" + qsub_str_end = "\"" + end pids << Kernel.fork { - `#{dtrain_bin} -c #{ini}\ + `#{qsub_str_start}#{dtrain_bin} -c #{ini}\ --input #{input_files[shard]}\ --refs #{refs_files[shard]} #{input_weights}\ - --output work/weights.#{shard}.#{epoch}\ - &> work/out.#{shard}.#{epoch}` + --output work/weights.#{shard}.#{epoch}#{qsub_str_end}` } weights_files << "work/weights.#{shard}.#{epoch}" shard += 1 @@ -106,10 +113,8 @@ end pids.each { |pid| Process.wait(pid) } pids.clear end - cat = File.new('work/weights_cat', 'w+') - weights_files.each { |f| cat.write File.new(f, 'r').read } - cat.close - `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` + `#{cat} work/weights.*.#{epoch} > work/weights_cat` + `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}` if rand and epoch+1!=epochs input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand end -- cgit v1.2.3 From 7c4a9e0825b15ce6c08c45c7654c614d542cf93a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 24 Jan 2013 16:28:23 +0100 Subject: made examples work again --- training/dtrain/parallelize.rb | 12 ++++++++---- training/dtrain/test/example/README | 4 ++-- training/dtrain/test/example/cdec.ini | 2 +- training/dtrain/test/example/dtrain.ini | 4 ++-- training/dtrain/test/parallelize/README | 5 +++++ training/dtrain/test/parallelize/cdec.ini | 2 +- training/dtrain/test/parallelize/g/grammar.out.0.gz | Bin 0 -> 8318 bytes training/dtrain/test/parallelize/g/grammar.out.1.gz | Bin 0 -> 358560 bytes training/dtrain/test/parallelize/g/grammar.out.2.gz | Bin 0 -> 1014466 bytes training/dtrain/test/parallelize/g/grammar.out.3.gz | Bin 0 -> 391811 bytes training/dtrain/test/parallelize/g/grammar.out.4.gz | Bin 0 -> 149590 bytes training/dtrain/test/parallelize/g/grammar.out.5.gz | Bin 0 -> 537024 bytes training/dtrain/test/parallelize/g/grammar.out.6.gz | Bin 0 -> 291286 bytes training/dtrain/test/parallelize/g/grammar.out.7.gz | Bin 0 -> 1038140 bytes training/dtrain/test/parallelize/g/grammar.out.8.gz | Bin 0 -> 419889 bytes training/dtrain/test/parallelize/g/grammar.out.9.gz | Bin 0 -> 409140 bytes 16 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 training/dtrain/test/parallelize/README create mode 100644 training/dtrain/test/parallelize/g/grammar.out.0.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.1.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.2.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.3.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.4.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.5.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.6.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.7.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.8.gz create mode 100644 training/dtrain/test/parallelize/g/grammar.out.9.gz (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 6e30cf9d..9b0923f6 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -7,10 +7,10 @@ if ARGV.size != 7 exit end -cdec_dir = '~/MAREC/cdec-dtrain/' -dtrain_bin = "~/MAREC/cdec-dtrain/training/dtrain/dtrain" +dtrain_dir = File.expand_path File.dirname(__FILE__) +dtrain_bin = "#{dtrain_dir}/dtrain" ruby = '/usr/bin/ruby' -lplp_rb = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb" +lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' cat = '/bin/cat' @@ -96,15 +96,19 @@ end while remaining_shards > 0 shards_at_once.times { qsub_str_start = qsub_str_end = '' + local_end = '' if use_qsub qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \"" qsub_str_end = "\"" + local_end = '' + else + local_end = "&>work/out.#{shard}.#{epoch}" end pids << Kernel.fork { `#{qsub_str_start}#{dtrain_bin} -c #{ini}\ --input #{input_files[shard]}\ --refs #{refs_files[shard]} #{input_weights}\ - --output work/weights.#{shard}.#{epoch}#{qsub_str_end}` + --output work/weights.#{shard}.#{epoch}#{qsub_str_end} #{local_end}` } weights_files << "work/weights.#{shard}.#{epoch}" shard += 1 diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README index 6937b11b..2df77086 100644 --- a/training/dtrain/test/example/README +++ b/training/dtrain/test/example/README @@ -1,8 +1,8 @@ Small example of input format for distributed training. -Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . +Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini . For this to work, undef 'DTRAIN_LOCAL' in dtrain.h and recompile. -Data is here: http://simianer.de/#dtrain +data can be found here: http://simianer.de/#dtrain diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini index d5955f0e..068ebd4d 100644 --- a/training/dtrain/test/example/cdec.ini +++ b/training/dtrain/test/example/cdec.ini @@ -4,7 +4,7 @@ scfg_max_span_limit=15 intersection_strategy=cube_pruning cubepruning_pop_limit=30 feature_function=WordPenalty -feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz +feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz # all currently working feature functions for translation: # (with those features active that were used in the ACL paper) #feature_function=ArityPenalty diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini index 72d50ca1..97fce7f0 100644 --- a/training/dtrain/test/example/dtrain.ini +++ b/training/dtrain/test/example/dtrain.ini @@ -1,7 +1,7 @@ -input=test/example/nc-wmt11.1k.gz # use '-' for STDIN +input=./nc-wmt11.1k.gz # use '-' for STDIN output=- # a weights file (add .gz for gzip compression) or STDOUT '-' select_weights=VOID # don't output weights -decoder_config=test/example/cdec.ini # config for cdec +decoder_config=./cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README new file mode 100644 index 00000000..89715105 --- /dev/null +++ b/training/dtrain/test/parallelize/README @@ -0,0 +1,5 @@ +run for example + ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs + +final weights will be in the file work/weights.3 + diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini index e118374b..e43ba1c4 100644 --- a/training/dtrain/test/parallelize/cdec.ini +++ b/training/dtrain/test/parallelize/cdec.ini @@ -4,7 +4,7 @@ intersection_strategy=cube_pruning cubepruning_pop_limit=200 scfg_max_span_limit=15 feature_function=WordPenalty -feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz +feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz #feature_function=ArityPenalty #feature_function=CMR2008ReorderingFeatures #feature_function=Dwarf diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz new file mode 100644 index 00000000..1e28a24b Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.0.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz new file mode 100644 index 00000000..372f5675 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.1.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz new file mode 100644 index 00000000..145d0dc0 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.2.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz new file mode 100644 index 00000000..105593ff Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.3.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz new file mode 100644 index 00000000..30781f48 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.4.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz new file mode 100644 index 00000000..834ee759 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.5.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz new file mode 100644 index 00000000..2e76f348 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.6.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz new file mode 100644 index 00000000..3741a887 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.7.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz new file mode 100644 index 00000000..ebf6bd0c Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.8.gz differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz new file mode 100644 index 00000000..c1791059 Binary files /dev/null and b/training/dtrain/test/parallelize/g/grammar.out.9.gz differ -- cgit v1.2.3 From b89fd90083b22e6d4ab469af001a1f15fbcd7da9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 11 Feb 2013 17:10:29 +0100 Subject: fixed l1 regularization iteration silliness --- training/dtrain/dtrain.cc | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'training') diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index 18286668..b317c365 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -246,7 +246,7 @@ main(int argc, char** argv) cerr << setw(25) << "k " << k << endl; cerr << setw(25) << "N " << N << endl; cerr << setw(25) << "T " << T << endl; - cerr << setw(25) << "scorer '" << scorer_str << "'" << endl; + cerr << setw(26) << "scorer '" << scorer_str << "'" << endl; if (scorer_str == "approx_bleu") cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl; cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; @@ -459,35 +459,40 @@ main(int argc, char** argv) } // l1 regularization + // please note that this penalizes _all_ weights + // (contrary to only the ones changed by the last update) + // after a _sentence_ (not after each example/pair) if (l1naive) { - for (unsigned d = 0; d < lambdas.size(); d++) { - weight_t v = lambdas.get(d); - lambdas.set_value(d, v - sign(v) * l1_reg); + FastSparseVector::iterator it = lambdas.begin(); + for (; it != lambdas.end(); ++it) { + it->second -= sign(it->second) * l1_reg; } } else if (l1clip) { - for (unsigned d = 0; d < lambdas.size(); d++) { - if (lambdas.nonzero(d)) { - weight_t v = lambdas.get(d); + FastSparseVector::iterator it = lambdas.begin(); + for (; it != lambdas.end(); ++it) { + if (it->second != 0) { + weight_t v = it->second; if (v > 0) { - lambdas.set_value(d, max(0., v - l1_reg)); + it->second = max(0., v - l1_reg); } else { - lambdas.set_value(d, min(0., v + l1_reg)); + it->second = min(0., v + l1_reg); } } } } else if (l1cumul) { weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input - for (unsigned d = 0; d < lambdas.size(); d++) { - if (lambdas.nonzero(d)) { - weight_t v = lambdas.get(d); - weight_t penalty = 0; + FastSparseVector::iterator it = lambdas.begin(); + for (; it != lambdas.end(); ++it) { + if (it->second != 0) { + weight_t v = it->second; + weight_t penalized = 0.; if (v > 0) { - penalty = max(0., v-(acc_penalty + cumulative_penalties.get(d))); + penalized = max(0., v-(acc_penalty + cumulative_penalties.get(it->first))); } else { - penalty = min(0., v+(acc_penalty - cumulative_penalties.get(d))); + penalized = min(0., v+(acc_penalty - cumulative_penalties.get(it->first))); } - lambdas.set_value(d, penalty); - cumulative_penalties.set_value(d, cumulative_penalties.get(d)+penalty); + it->second = penalized; + cumulative_penalties.set_value(it->first, cumulative_penalties.get(it->first)+penalized); } } } -- cgit v1.2.3 From 349ee7d5599bb973506c8bbb56926cf9d366b564 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 3 Mar 2013 12:06:25 +0100 Subject: dtrain parallelize.rb fixes --- training/dtrain/parallelize.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 9b0923f6..23f2a7ed 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -1,7 +1,7 @@ #!/usr/bin/env ruby -if ARGV.size != 7 +if ARGV.size != 8 STDERR.write "Usage: " STDERR.write "ruby parallelize.rb <#shards|predef> \n" exit @@ -95,6 +95,7 @@ end remaining_shards = num_shards while remaining_shards > 0 shards_at_once.times { + break if remaining_shards==0 qsub_str_start = qsub_str_end = '' local_end = '' if use_qsub -- cgit v1.2.3 From bf9ee90b00ebc1fc4f3ce16cb33bdbd1032675c9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 11 Mar 2013 15:30:09 +0100 Subject: parallelize.rb: proper command line arguments --- training/dtrain/parallelize.rb | 46 +++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 23f2a7ed..50c966d7 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -1,12 +1,30 @@ #!/usr/bin/env ruby +require 'trollop' -if ARGV.size != 8 - STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb <#shards|predef> \n" - exit +def usage + if ARGV.size != 8 + STDERR.write "Usage: " + STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] -s <#shards|0> -p -i -r [--qsub/-q]\n" + exit 1 + end +end +usage if not [12, 13, 14].include? ARGV.size + +opts = Trollop::options do + opt :config, "dtrain config file", :type => :string + opt :epochs, "number of epochs", :type => :int + opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false + opt :shards, "number of shards", :type => :int + opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999 + opt :input, "input", :type => :string + opt :references, "references", :type => :string + opt :qsub, "use qsub", :type => :bool, :default => false end +puts opts.to_s + + dtrain_dir = File.expand_path File.dirname(__FILE__) dtrain_bin = "#{dtrain_dir}/dtrain" ruby = '/usr/bin/ruby' @@ -14,22 +32,22 @@ lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' cat = '/bin/cat' -ini = ARGV[0] -epochs = ARGV[1].to_i +ini = opts[:config] +epochs = opts[:epochs] rand = false -rand = true if ARGV[2]=='true' +rand = true if opts[:randomize] predefined_shards = false -if ARGV[3] == 'predef' +if opts[:shards] == 0 predefined_shards = true - num_shards = -1 + num_shards = 0 else - num_shards = ARGV[3].to_i + num_shards = opts[:shards] end -shards_at_once = ARGV[4].to_i -input = ARGV[5] -refs = ARGV[6] +shards_at_once = opts[:processes_at_once] +input = opts[:input] +refs = opts[:references] use_qsub = false -use_qsub = true if ARGV[7] +use_qsub = true if opts[:qsub] `mkdir work` -- cgit v1.2.3 From 5125f56e6f0f5ee5427f2687eb5f962589ae4c5e Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 11 Mar 2013 15:35:41 +0100 Subject: parallelize.rb: proper command line arguments --- training/dtrain/parallelize.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 50c966d7..acfd7290 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -5,7 +5,7 @@ require 'trollop' def usage if ARGV.size != 8 STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] -s <#shards|0> -p -i -r [--qsub/-q]\n" + STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] -s <#shards|0> -p -i -r [--qsub/-q] --dtrain_binary \n" exit 1 end end @@ -20,13 +20,18 @@ opts = Trollop::options do opt :input, "input", :type => :string opt :references, "references", :type => :string opt :qsub, "use qsub", :type => :bool, :default => false + opt :dtrain_binary, "path to dtrain binary", :type => :string end puts opts.to_s dtrain_dir = File.expand_path File.dirname(__FILE__) -dtrain_bin = "#{dtrain_dir}/dtrain" +if not opts[:dtrain_binary] + dtrain_bin = "#{dtrain_dir}/dtrain" +else + dtrain_bin = opts[:dtrain_binary] +end ruby = '/usr/bin/ruby' lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' -- cgit v1.2.3 From 2482bc590bc38b0256322c52e135672a222e84d0 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 11 Mar 2013 15:57:34 +0100 Subject: parallelize.rb: proper command line arguments --- training/dtrain/parallelize.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index acfd7290..30fb0008 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -21,6 +21,7 @@ opts = Trollop::options do opt :references, "references", :type => :string opt :qsub, "use qsub", :type => :bool, :default => false opt :dtrain_binary, "path to dtrain binary", :type => :string + opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000" end puts opts.to_s @@ -34,7 +35,7 @@ else end ruby = '/usr/bin/ruby' lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" -lplp_args = 'l2 select_k 100000' +lplp_args = opts[:lplp_args] cat = '/bin/cat' ini = opts[:config] -- cgit v1.2.3 From a72761780f54734ba20800fd5f099032fa1cd947 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 11 Mar 2013 16:00:42 +0100 Subject: bla --- training/dtrain/parallelize.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 30fb0008..a1826e98 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -9,7 +9,7 @@ def usage exit 1 end end -usage if not [12, 13, 14].include? ARGV.size +usage if not [11, 12, 13, 14].include? ARGV.size opts = Trollop::options do opt :config, "dtrain config file", :type => :string @@ -24,8 +24,6 @@ opts = Trollop::options do opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000" end -puts opts.to_s - dtrain_dir = File.expand_path File.dirname(__FILE__) if not opts[:dtrain_binary] -- cgit v1.2.3 From fe8ad704d0b5ecf06c798d75d54789e6532fd3c1 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 09:28:04 +0100 Subject: resharding --- training/dtrain/parallelize.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'training') diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index a1826e98..fca9b10d 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -5,16 +5,16 @@ require 'trollop' def usage if ARGV.size != 8 STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] -s <#shards|0> -p -i -r [--qsub/-q] --dtrain_binary \n" + STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] [--reshard/-y] -s <#shards|0> -p -i -r [--qsub/-q] --dtrain_binary -l \"l2 select_k 100000\"\n" exit 1 end end -usage if not [11, 12, 13, 14].include? ARGV.size opts = Trollop::options do opt :config, "dtrain config file", :type => :string opt :epochs, "number of epochs", :type => :int opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false + opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false opt :shards, "number of shards", :type => :int opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999 opt :input, "input", :type => :string @@ -40,6 +40,8 @@ ini = opts[:config] epochs = opts[:epochs] rand = false rand = true if opts[:randomize] +reshard = false +reshard = true if opts[:reshard] predefined_shards = false if opts[:shards] == 0 predefined_shards = true @@ -142,7 +144,7 @@ end end `#{cat} work/weights.*.#{epoch} > work/weights_cat` `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}` - if rand and epoch+1!=epochs + if rand and reshard and epoch+1!=epochs input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand end } -- cgit v1.2.3 From 72b07dfc1534862aea06c102b4382513183ce253 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 09:56:26 +0100 Subject: added fixed BLEU+1 --- training/dtrain/dtrain.cc | 2 ++ training/dtrain/score.cc | 31 ++++++++++++++++++++++++++++++- training/dtrain/score.h | 5 +++++ 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'training') diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index b317c365..53487d34 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -163,6 +163,8 @@ main(int argc, char** argv) scorer = dynamic_cast(new BleuScorer); } else if (scorer_str == "stupid_bleu") { scorer = dynamic_cast(new StupidBleuScorer); + } else if (scorer_str == "fixed_stupid_bleu") { + scorer = dynamic_cast(new FixedStupidBleuScorer); } else if (scorer_str == "smooth_bleu") { scorer = dynamic_cast(new SmoothBleuScorer); } else if (scorer_str == "sum_bleu") { diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc index 34fc86a9..96d6e10a 100644 --- a/training/dtrain/score.cc +++ b/training/dtrain/score.cc @@ -49,7 +49,7 @@ BleuScorer::Score(vector& hyp, vector& ref, * for Machine Translation" * (Lin & Och '04) * - * NOTE: 0 iff no 1gram match + * NOTE: 0 iff no 1gram match ('grounded') */ score_t StupidBleuScorer::Score(vector& hyp, vector& ref, @@ -73,6 +73,35 @@ StupidBleuScorer::Score(vector& hyp, vector& ref, return brevity_penalty(hyp_len, ref_len) * exp(sum); } +/* + * fixed 'stupid' bleu + * + * as in "Optimizing for Sentence-Level BLEU+1 + * Yields Short Translations" + * (Nakov et al. '12) + */ +score_t +FixedStupidBleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + vector v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0, add = 0; + for (unsigned i = 0; i < M; i++) { + if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; + if (i == 1) add = 1; + sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); + } + return brevity_penalty(hyp_len, ref_len+1) * exp(sum); // <- fix +} + /* * smooth bleu * diff --git a/training/dtrain/score.h b/training/dtrain/score.h index f317c903..bddaa071 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -148,6 +148,11 @@ struct StupidBleuScorer : public LocalScorer score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); }; +struct FixedStupidBleuScorer : public LocalScorer +{ + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + struct SmoothBleuScorer : public LocalScorer { score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -- cgit v1.2.3 From 529c8f0671ce0b09c2a797278a8f84242c86465d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 10:29:13 +0100 Subject: removed hadoop/hstreaming mode --- training/dtrain/README.md | 28 +---- training/dtrain/dtrain.cc | 121 +------------------ training/dtrain/dtrain.h | 8 +- training/dtrain/hstreaming/avg.rb | 32 ----- training/dtrain/hstreaming/cdec.ini | 22 ---- training/dtrain/hstreaming/dtrain.ini | 15 --- training/dtrain/hstreaming/dtrain.sh | 9 -- training/dtrain/hstreaming/hadoop-streaming-job.sh | 30 ----- training/dtrain/hstreaming/lplp.rb | 131 --------------------- training/dtrain/hstreaming/red-test | 9 -- training/dtrain/lplp.rb | 131 +++++++++++++++++++++ training/dtrain/parallelize.rb | 4 +- training/dtrain/test/example/cdec.ini | 2 +- 13 files changed, 144 insertions(+), 398 deletions(-) delete mode 100755 training/dtrain/hstreaming/avg.rb delete mode 100644 training/dtrain/hstreaming/cdec.ini delete mode 100644 training/dtrain/hstreaming/dtrain.ini delete mode 100755 training/dtrain/hstreaming/dtrain.sh delete mode 100755 training/dtrain/hstreaming/hadoop-streaming-job.sh delete mode 100755 training/dtrain/hstreaming/lplp.rb delete mode 100644 training/dtrain/hstreaming/red-test create mode 100755 training/dtrain/lplp.rb (limited to 'training') diff --git a/training/dtrain/README.md b/training/dtrain/README.md index 7edabbf1..2ab2f232 100644 --- a/training/dtrain/README.md +++ b/training/dtrain/README.md @@ -13,36 +13,18 @@ Builds when building cdec, see ../BUILDING . To build only parts needed for dtrain do ``` autoreconf -ifv - ./configure [--disable-gtest] - cd dtrain/; make + ./configure + cd training/dtrain/; make ``` Running ------- -To run this on a dev set locally: -``` - #define DTRAIN_LOCAL -``` -otherwise remove that line or undef, then recompile. You need a single -grammar file or input annotated with per-sentence grammars (psg) as you -would use with cdec. Additionally you need to give dtrain a file with -references (--refs) when running locally. - -The input for use with hadoop streaming looks like this: -``` - \t\t\t -``` -To convert a psg to this format you need to replace all "\n" -by "\t". Make sure there are no tabs in your data. - -For an example of local usage (with the 'distributed' format) -the see test/example/ . This expects dtrain to be built without -DTRAIN_LOCAL. +See directories under test/ . Legal ----- -Copyright (c) 2012 by Patrick Simianer +Copyright (c) 2012-2013 by Patrick Simianer -See the file ../LICENSE.txt for the licensing terms that this software is +See the file LICENSE.txt in the root folder for the licensing terms that this software is released under. diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index 53487d34..dfb5b351 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -12,9 +12,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("decoder_config", po::value(), "configuration file for cdec") ("print_weights", po::value(), "weights to print on each iteration") ("stop_after", po::value()->default_value(0), "stop after X input sentences") - ("tmp", po::value()->default_value("/tmp"), "temp dir to use") ("keep", po::value()->zero_tokens(), "keep weights files for each iteration") - ("hstreaming", po::value(), "run in hadoop streaming mode, arg is a task id") ("epochs", po::value()->default_value(10), "# of iterations T (per shard)") ("k", po::value()->default_value(100), "how many translations to sample") ("sample_from", po::value()->default_value("kbest"), "where to sample translations from: 'kbest', 'forest'") @@ -28,16 +26,14 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("gamma", po::value()->default_value(0.), "gamma for SVM (0 for perceptron)") ("select_weights", po::value()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") ("rescale", po::value()->zero_tokens(), "rescale weight vector after each input") - ("l1_reg", po::value()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") + ("l1_reg", po::value()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010) UNTESTED") ("l1_reg_strength", po::value(), "l1 regularization strength") ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO ("approx_bleu_d", po::value()->default_value(0.9), "discount for approx. BLEU") ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") ("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.") -#ifdef DTRAIN_LOCAL ("refs,r", po::value(), "references in local mode") -#endif ("noup", po::value()->zero_tokens(), "do not update weights"); po::options_description cl("Command Line Options"); cl.add_options() @@ -55,16 +51,6 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) cerr << cl << endl; return false; } - if (cfg->count("hstreaming") && (*cfg)["output"].as() != "-") { - cerr << "When using 'hstreaming' the 'output' param should be '-'." << endl; - return false; - } -#ifdef DTRAIN_LOCAL - if ((*cfg)["input"].as() == "-") { - cerr << "Can't use stdin as input with this binary. Recompile without DTRAIN_LOCAL" << endl; - return false; - } -#endif if ((*cfg)["sample_from"].as() != "kbest" && (*cfg)["sample_from"].as() != "forest") { cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as() << "', use 'kbest' or 'forest'." << endl; @@ -111,17 +97,8 @@ main(int argc, char** argv) if (cfg.count("verbose")) verbose = true; bool noup = false; if (cfg.count("noup")) noup = true; - bool hstreaming = false; - string task_id; - if (cfg.count("hstreaming")) { - hstreaming = true; - quiet = true; - task_id = cfg["hstreaming"].as(); - cerr.precision(17); - } bool rescale = false; if (cfg.count("rescale")) rescale = true; - HSReporter rep(task_id); bool keep = false; if (cfg.count("keep")) keep = true; @@ -224,16 +201,8 @@ main(int argc, char** argv) // buffer input for t > 0 vector src_str_buf; // source strings (decoder takes only strings) vector > ref_ids_buf; // references as WordID vecs - // where temp files go - string tmp_path = cfg["tmp"].as(); -#ifdef DTRAIN_LOCAL string refs_fn = cfg["refs"].as(); ReadFile refs(refs_fn); -#else - string grammar_buf_fn = gettmpf(tmp_path, "dtrain-grammars"); - ogzstream grammar_buf_out; - grammar_buf_out.open(grammar_buf_fn.c_str()); -#endif unsigned in_sz = std::numeric_limits::max(); // input index, input size vector > all_scores; @@ -270,9 +239,7 @@ main(int argc, char** argv) cerr << setw(25) << "max pairs " << max_pairs << endl; cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as() << "'" << endl; cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; -#ifdef DTRAIN_LOCAL cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl; -#endif cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; if (cfg.count("input_weights")) cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as() << "'" << endl; @@ -285,14 +252,10 @@ main(int argc, char** argv) for (unsigned t = 0; t < T; t++) // T epochs { - if (hstreaming) cerr << "reporter:status:Iteration #" << t+1 << " of " << T << endl; - time_t start, end; time(&start); -#ifndef DTRAIN_LOCAL igzstream grammar_buf_in; if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); -#endif score_t score_sum = 0.; score_t model_sum(0); unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0; @@ -340,52 +303,6 @@ main(int argc, char** argv) // getting input vector ref_ids; // reference as vector -#ifndef DTRAIN_LOCAL - vector in_split; // input: sid\tsrc\tref\tpsg - if (t == 0) { - // handling input - split_in(in, in_split); - if (hstreaming && ii == 0) cerr << "reporter:counter:" << task_id << ",First ID," << in_split[0] << endl; - // getting reference - vector ref_tok; - boost::split(ref_tok, in_split[2], boost::is_any_of(" ")); - register_and_convert(ref_tok, ref_ids); - ref_ids_buf.push_back(ref_ids); - // process and set grammar - bool broken_grammar = true; // ignore broken grammars - for (string::iterator it = in.begin(); it != in.end(); it++) { - if (!isspace(*it)) { - broken_grammar = false; - break; - } - } - if (broken_grammar) { - cerr << "Broken grammar for " << ii+1 << "! Ignoring this input." << endl; - continue; - } - boost::replace_all(in, "\t", "\n"); - in += "\n"; - grammar_buf_out << in << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl; - decoder.AddSupplementalGrammarFromString(in); - src_str_buf.push_back(in_split[1]); - // decode - observer->SetRef(ref_ids); - decoder.Decode(in_split[1], observer); - } else { - // get buffered grammar - string grammar_str; - while (true) { - string rule; - getline(grammar_buf_in, rule); - if (boost::starts_with(rule, DTRAIN_GRAMMAR_DELIM)) break; - grammar_str += rule + "\n"; - } - decoder.AddSupplementalGrammarFromString(grammar_str); - // decode - observer->SetRef(ref_ids_buf[ii]); - decoder.Decode(src_str_buf[ii], observer); - } -#else if (t == 0) { string r_; getline(*refs, r_); @@ -402,7 +319,6 @@ main(int argc, char** argv) decoder.Decode(in, observer); else decoder.Decode(src_str_buf[ii], observer); -#endif // get (scored) samples vector* samples = observer->GetSamples(); @@ -505,11 +421,6 @@ main(int argc, char** argv) ++ii; - if (hstreaming) { - rep.update_counter("Seen #"+boost::lexical_cast(t+1), 1u); - rep.update_counter("Seen", 1u); - } - } // input loop if (average) w_average += lambdas; @@ -518,21 +429,8 @@ main(int argc, char** argv) if (t == 0) { in_sz = ii; // remember size of input (# lines) - if (hstreaming) { - rep.update_counter("|Input|", ii); - rep.update_gcounter("|Input|", ii); - rep.update_gcounter("Shards", 1u); - } } -#ifndef DTRAIN_LOCAL - if (t == 0) { - grammar_buf_out.close(); - } else { - grammar_buf_in.close(); - } -#endif - // print some stats score_t score_avg = score_sum/(score_t)in_sz; score_t model_avg = model_sum/(score_t)in_sz; @@ -546,7 +444,7 @@ main(int argc, char** argv) } unsigned nonz = 0; - if (!quiet || hstreaming) nonz = (unsigned)lambdas.num_nonzero(); + if (!quiet) nonz = (unsigned)lambdas.num_nonzero(); if (!quiet) { cerr << _p5 << _p << "WEIGHTS" << endl; @@ -571,16 +469,6 @@ main(int argc, char** argv) cerr << " avg f count: " << f_count/(float)list_sz << endl; } - if (hstreaming) { - rep.update_counter("Score 1best avg #"+boost::lexical_cast(t+1), (unsigned)(score_avg*DTRAIN_SCALE)); - rep.update_counter("Model 1best avg #"+boost::lexical_cast(t+1), (unsigned)(model_avg*DTRAIN_SCALE)); - rep.update_counter("Pairs avg #"+boost::lexical_cast(t+1), (unsigned)((npairs/(weight_t)in_sz)*DTRAIN_SCALE)); - rep.update_counter("Rank errors avg #"+boost::lexical_cast(t+1), (unsigned)((rank_errors/(weight_t)in_sz)*DTRAIN_SCALE)); - rep.update_counter("Margin violations avg #"+boost::lexical_cast(t+1), (unsigned)((margin_violations/(weight_t)in_sz)*DTRAIN_SCALE)); - rep.update_counter("Non zero feature count #"+boost::lexical_cast(t+1), nonz); - rep.update_gcounter("Non zero feature count #"+boost::lexical_cast(t+1), nonz); - } - pair remember; remember.first = score_avg; remember.second = model_avg; @@ -611,10 +499,6 @@ main(int argc, char** argv) if (average) w_average /= (weight_t)T; -#ifndef DTRAIN_LOCAL - unlink(grammar_buf_fn.c_str()); -#endif - if (!noup) { if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl; if (select_weights == "last" || average) { // last, average @@ -651,7 +535,6 @@ main(int argc, char** argv) } } } - if (output_fn == "-" && hstreaming) cout << "__SHARD_COUNT__\t1" << endl; if (!quiet) cerr << "done" << endl; } diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index 572fd613..f368d810 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -1,14 +1,12 @@ #ifndef _DTRAIN_H_ #define _DTRAIN_H_ -#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs - // DO NOT USE WITH SVM! -#define DTRAIN_LOCAL +#undef DTRAIN_FASTER_PERCEPTRON // only consider actually misranked pairs + // DO NOT ENABLE WITH SVM (gamma > 0) OR loss_margin! + #define DTRAIN_DOTS 10 // after how many inputs to display a '.' -#define DTRAIN_GRAMMAR_DELIM "########EOS########" #define DTRAIN_SCALE 100000 - #include #include #include diff --git a/training/dtrain/hstreaming/avg.rb b/training/dtrain/hstreaming/avg.rb deleted file mode 100755 index 2599c732..00000000 --- a/training/dtrain/hstreaming/avg.rb +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env ruby -# first arg may be an int of custom shard count - -shard_count_key = "__SHARD_COUNT__" - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -w = {} -c = {} -w.default = 0 -c.default = 0 -while line = STDIN.gets - key, val = line.split /\s/ - w[key] += val.to_f - c[key] += 1 -end - -if ARGV.size == 0 - shard_count = w["__SHARD_COUNT__"] -else - shard_count = ARGV[0].to_f -end -w.each_key { |k| - if k == shard_count_key - next - else - puts "#{k}\t#{w[k]/shard_count}" - #puts "# #{c[k]}" - end -} - diff --git a/training/dtrain/hstreaming/cdec.ini b/training/dtrain/hstreaming/cdec.ini deleted file mode 100644 index d4f5cecd..00000000 --- a/training/dtrain/hstreaming/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=30 -feature_function=WordPenalty -feature_function=KLanguageModel nc-wmt11.en.srilm.gz -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/hstreaming/dtrain.ini b/training/dtrain/hstreaming/dtrain.ini deleted file mode 100644 index a2c219a1..00000000 --- a/training/dtrain/hstreaming/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -input=- -output=- -decoder_config=cdec.ini -tmp=/var/hadoop/mapred/local/ -epochs=1 -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -pair_threshold=0 -select_weights=last diff --git a/training/dtrain/hstreaming/dtrain.sh b/training/dtrain/hstreaming/dtrain.sh deleted file mode 100755 index 877ff94c..00000000 --- a/training/dtrain/hstreaming/dtrain.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -# script to run dtrain with a task id - -pushd . &>/dev/null -cd .. -ID=$(basename $(pwd)) # attempt_... -popd &>/dev/null -./dtrain -c dtrain.ini --hstreaming $ID - diff --git a/training/dtrain/hstreaming/hadoop-streaming-job.sh b/training/dtrain/hstreaming/hadoop-streaming-job.sh deleted file mode 100755 index 92419956..00000000 --- a/training/dtrain/hstreaming/hadoop-streaming-job.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh - -EXP=a_simple_test - -# change these vars to fit your hadoop installation -HADOOP_HOME=/usr/lib/hadoop-0.20 -JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar -HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" - - IN=input_on_hdfs -OUT=output_weights_on_hdfs - -# you can -reducer to NONE if you want to -# do feature selection/averaging locally (e.g. to -# keep weights of all epochs) -$HSTREAMING \ - -mapper "dtrain.sh" \ - -reducer "ruby lplp.rb l2 select_k 100000" \ - -input $IN \ - -output $OUT \ - -file dtrain.sh \ - -file lplp.rb \ - -file ../dtrain \ - -file dtrain.ini \ - -file cdec.ini \ - -file ../test/example/nc-wmt11.en.srilm.gz \ - -jobconf mapred.reduce.tasks=30 \ - -jobconf mapred.max.map.failures.percent=0 \ - -jobconf mapred.job.name="dtrain $EXP" - diff --git a/training/dtrain/hstreaming/lplp.rb b/training/dtrain/hstreaming/lplp.rb deleted file mode 100755 index f0cd58c5..00000000 --- a/training/dtrain/hstreaming/lplp.rb +++ /dev/null @@ -1,131 +0,0 @@ -# lplp.rb - -# norms -def l0(feature_column, n) - if feature_column.size >= n then return 1 else return 0 end -end - -def l1(feature_column, n=-1) - return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i } -end - -def l2(feature_column, n=-1) - return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i } -end - -def linfty(feature_column, n=-1) - return feature_column.map { |i| i.abs }.max -end - -# stats -def median(feature_column, n) - return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2] -end - -def mean(feature_column, n) - return feature_column.reduce { |sum, i| sum+i } / n -end - -# selection -def select_k(weights, norm_fun, n, k=10000) - weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p| - puts "#{p[0]}\t#{mean(p[1], n)}" - k -= 1 - if k == 0 then break end - } -end - -def cut(weights, norm_fun, n, epsilon=0.0001) - weights.each { |k,v| - if norm_fun.call(v, n).abs >= epsilon - puts "#{k}\t#{mean(v, n)}" - end - } -end - -# test -def _test() - puts - w = {} - w["a"] = [1, 2, 3] - w["b"] = [1, 2] - w["c"] = [66] - w["d"] = [10, 20, 30] - n = 3 - puts w.to_s - puts - puts "select_k" - puts "l0 expect ad" - select_k(w, method(:l0), n, 2) - puts "l1 expect cd" - select_k(w, method(:l1), n, 2) - puts "l2 expect c" - select_k(w, method(:l2), n, 1) - puts - puts "cut" - puts "l1 expect cd" - cut(w, method(:l1), n, 7) - puts - puts "median" - a = [1,2,3,4,5] - puts a.to_s - puts median(a, 5) - puts - puts "#{median(a, 7)} <- that's because we add missing 0s:" - puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s - puts - puts "mean expect bc" - w.clear - w["a"] = [2] - w["b"] = [2.1] - w["c"] = [2.2] - cut(w, method(:mean), 1, 2.05) - exit -end -#_test() - -# actually do something -def usage() - puts "lplp.rb [n] < " - puts " l0...: norms for selection" - puts "select_k: only output top k (according to the norm of their column vector) features" - puts " cut: output features with weight >= threshold" - puts " n: if we do not have a shard count use this number for averaging" - exit -end - -if ARGV.size < 3 then usage end -norm_fun = method(ARGV[0].to_sym) -type = ARGV[1] -x = ARGV[2].to_f - -shard_count_key = "__SHARD_COUNT__" - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -w = {} -shard_count = 0 -while line = STDIN.gets - key, val = line.split /\s+/ - if key == shard_count_key - shard_count += 1 - next - end - if w.has_key? key - w[key].push val.to_f - else - w[key] = [val.to_f] - end -end - -if ARGV.size == 4 then shard_count = ARGV[3].to_f end - -if type == 'cut' - cut(w, norm_fun, shard_count, x) -elsif type == 'select_k' - select_k(w, norm_fun, shard_count, x) -else - puts "oh oh" -end - diff --git a/training/dtrain/hstreaming/red-test b/training/dtrain/hstreaming/red-test deleted file mode 100644 index 2623d697..00000000 --- a/training/dtrain/hstreaming/red-test +++ /dev/null @@ -1,9 +0,0 @@ -a 1 -b 2 -c 3.5 -a 1 -b 2 -c 3.5 -d 1 -e 2 -__SHARD_COUNT__ 2 diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb new file mode 100755 index 00000000..f0cd58c5 --- /dev/null +++ b/training/dtrain/lplp.rb @@ -0,0 +1,131 @@ +# lplp.rb + +# norms +def l0(feature_column, n) + if feature_column.size >= n then return 1 else return 0 end +end + +def l1(feature_column, n=-1) + return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i } +end + +def l2(feature_column, n=-1) + return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i } +end + +def linfty(feature_column, n=-1) + return feature_column.map { |i| i.abs }.max +end + +# stats +def median(feature_column, n) + return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2] +end + +def mean(feature_column, n) + return feature_column.reduce { |sum, i| sum+i } / n +end + +# selection +def select_k(weights, norm_fun, n, k=10000) + weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p| + puts "#{p[0]}\t#{mean(p[1], n)}" + k -= 1 + if k == 0 then break end + } +end + +def cut(weights, norm_fun, n, epsilon=0.0001) + weights.each { |k,v| + if norm_fun.call(v, n).abs >= epsilon + puts "#{k}\t#{mean(v, n)}" + end + } +end + +# test +def _test() + puts + w = {} + w["a"] = [1, 2, 3] + w["b"] = [1, 2] + w["c"] = [66] + w["d"] = [10, 20, 30] + n = 3 + puts w.to_s + puts + puts "select_k" + puts "l0 expect ad" + select_k(w, method(:l0), n, 2) + puts "l1 expect cd" + select_k(w, method(:l1), n, 2) + puts "l2 expect c" + select_k(w, method(:l2), n, 1) + puts + puts "cut" + puts "l1 expect cd" + cut(w, method(:l1), n, 7) + puts + puts "median" + a = [1,2,3,4,5] + puts a.to_s + puts median(a, 5) + puts + puts "#{median(a, 7)} <- that's because we add missing 0s:" + puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s + puts + puts "mean expect bc" + w.clear + w["a"] = [2] + w["b"] = [2.1] + w["c"] = [2.2] + cut(w, method(:mean), 1, 2.05) + exit +end +#_test() + +# actually do something +def usage() + puts "lplp.rb [n] < " + puts " l0...: norms for selection" + puts "select_k: only output top k (according to the norm of their column vector) features" + puts " cut: output features with weight >= threshold" + puts " n: if we do not have a shard count use this number for averaging" + exit +end + +if ARGV.size < 3 then usage end +norm_fun = method(ARGV[0].to_sym) +type = ARGV[1] +x = ARGV[2].to_f + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +shard_count = 0 +while line = STDIN.gets + key, val = line.split /\s+/ + if key == shard_count_key + shard_count += 1 + next + end + if w.has_key? key + w[key].push val.to_f + else + w[key] = [val.to_f] + end +end + +if ARGV.size == 4 then shard_count = ARGV[3].to_f end + +if type == 'cut' + cut(w, norm_fun, shard_count, x) +elsif type == 'select_k' + select_k(w, norm_fun, shard_count, x) +else + puts "oh oh" +end + diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index fca9b10d..24e7f49e 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -80,7 +80,7 @@ def make_shards(input, refs, num_shards, epoch, rand) shard_refs = File.new refs_fn, 'w+' refs_fns << refs_fn 0.upto(shard_sz-1) { |i| - j = index.pop + j = index.pop shard_in.write in_lines[j] shard_refs.write refs_lines[j] } @@ -125,7 +125,7 @@ end if use_qsub qsub_str_start = "qsub -cwd -sync y -b y -j y -o work/out.#{shard}.#{epoch} -N dtrain.#{shard}.#{epoch} \"" qsub_str_end = "\"" - local_end = '' + local_end = '' else local_end = "&>work/out.#{shard}.#{epoch}" end diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini index 068ebd4d..0215416d 100644 --- a/training/dtrain/test/example/cdec.ini +++ b/training/dtrain/test/example/cdec.ini @@ -2,7 +2,7 @@ formalism=scfg add_pass_through_rules=true scfg_max_span_limit=15 intersection_strategy=cube_pruning -cubepruning_pop_limit=30 +cubepruning_pop_limit=200 feature_function=WordPenalty feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz # all currently working feature functions for translation: -- cgit v1.2.3 From 2a48d73eb794fdd736d1df035c8a31af887cde0a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 11:31:18 +0100 Subject: overhauled ruby scripts and examples --- training/dtrain/dtrain.cc | 2 - training/dtrain/examples/parallelized/README | 5 + training/dtrain/examples/parallelized/cdec.ini | 22 + training/dtrain/examples/parallelized/dtrain.ini | 16 + .../examples/parallelized/grammar/grammar.out.0.gz | Bin 0 -> 8318 bytes .../examples/parallelized/grammar/grammar.out.1.gz | Bin 0 -> 358560 bytes .../examples/parallelized/grammar/grammar.out.2.gz | Bin 0 -> 1014466 bytes .../examples/parallelized/grammar/grammar.out.3.gz | Bin 0 -> 391811 bytes .../examples/parallelized/grammar/grammar.out.4.gz | Bin 0 -> 149590 bytes .../examples/parallelized/grammar/grammar.out.5.gz | Bin 0 -> 537024 bytes .../examples/parallelized/grammar/grammar.out.6.gz | Bin 0 -> 291286 bytes .../examples/parallelized/grammar/grammar.out.7.gz | Bin 0 -> 1038140 bytes .../examples/parallelized/grammar/grammar.out.8.gz | Bin 0 -> 419889 bytes .../examples/parallelized/grammar/grammar.out.9.gz | Bin 0 -> 409140 bytes training/dtrain/examples/parallelized/in | 10 + training/dtrain/examples/parallelized/refs | 10 + training/dtrain/examples/parallelized/work/out.0.0 | 61 + training/dtrain/examples/parallelized/work/out.0.1 | 62 + training/dtrain/examples/parallelized/work/out.1.0 | 61 + training/dtrain/examples/parallelized/work/out.1.1 | 62 + .../dtrain/examples/parallelized/work/shard.0.0.in | 5 + .../examples/parallelized/work/shard.0.0.refs | 5 + .../dtrain/examples/parallelized/work/shard.1.0.in | 5 + .../examples/parallelized/work/shard.1.0.refs | 5 + .../dtrain/examples/parallelized/work/weights.0 | 12 + .../dtrain/examples/parallelized/work/weights.0.0 | 12 + .../dtrain/examples/parallelized/work/weights.0.1 | 12 + .../dtrain/examples/parallelized/work/weights.1 | 12 + .../dtrain/examples/parallelized/work/weights.1.0 | 11 + .../dtrain/examples/parallelized/work/weights.1.1 | 12 + training/dtrain/examples/standard/README | 2 + training/dtrain/examples/standard/cdec.ini | 26 + training/dtrain/examples/standard/dtrain.ini | 24 + training/dtrain/examples/standard/expected-output | 1206 ++++++++++++++++++++ training/dtrain/examples/standard/nc-wmt11.de.gz | Bin 0 -> 58324 bytes training/dtrain/examples/standard/nc-wmt11.en.gz | Bin 0 -> 49600 bytes .../dtrain/examples/standard/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes .../dtrain/examples/standard/nc-wmt11.grammar.gz | Bin 0 -> 1399924 bytes training/dtrain/examples/toy/cdec.ini | 3 + training/dtrain/examples/toy/dtrain.ini | 13 + training/dtrain/examples/toy/expected-output | 77 ++ training/dtrain/examples/toy/grammar.gz | Bin 0 -> 219 bytes training/dtrain/examples/toy/src | 2 + training/dtrain/examples/toy/tgt | 2 + training/dtrain/lplp.rb | 18 +- training/dtrain/parallelize.rb | 26 +- training/dtrain/test/example/README | 8 - training/dtrain/test/example/cdec.ini | 25 - training/dtrain/test/example/dtrain.ini | 22 - training/dtrain/test/example/expected-output | 89 -- training/dtrain/test/parallelize/README | 5 - training/dtrain/test/parallelize/cdec.ini | 22 - training/dtrain/test/parallelize/dtrain.ini | 15 - .../dtrain/test/parallelize/g/grammar.out.0.gz | Bin 8318 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.1.gz | Bin 358560 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.2.gz | Bin 1014466 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.3.gz | Bin 391811 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.4.gz | Bin 149590 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.5.gz | Bin 537024 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.6.gz | Bin 291286 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.7.gz | Bin 1038140 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.8.gz | Bin 419889 -> 0 bytes .../dtrain/test/parallelize/g/grammar.out.9.gz | Bin 409140 -> 0 bytes training/dtrain/test/parallelize/in | 10 - training/dtrain/test/parallelize/refs | 10 - training/dtrain/test/toy/cdec.ini | 2 - training/dtrain/test/toy/dtrain.ini | 12 - training/dtrain/test/toy/input | 2 - 68 files changed, 1771 insertions(+), 252 deletions(-) create mode 100644 training/dtrain/examples/parallelized/README create mode 100644 training/dtrain/examples/parallelized/cdec.ini create mode 100644 training/dtrain/examples/parallelized/dtrain.ini create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.0.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.1.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.2.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.3.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.4.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.5.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.6.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.7.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.8.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.9.gz create mode 100644 training/dtrain/examples/parallelized/in create mode 100644 training/dtrain/examples/parallelized/refs create mode 100644 training/dtrain/examples/parallelized/work/out.0.0 create mode 100644 training/dtrain/examples/parallelized/work/out.0.1 create mode 100644 training/dtrain/examples/parallelized/work/out.1.0 create mode 100644 training/dtrain/examples/parallelized/work/out.1.1 create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.in create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.refs create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.in create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.refs create mode 100644 training/dtrain/examples/parallelized/work/weights.0 create mode 100644 training/dtrain/examples/parallelized/work/weights.0.0 create mode 100644 training/dtrain/examples/parallelized/work/weights.0.1 create mode 100644 training/dtrain/examples/parallelized/work/weights.1 create mode 100644 training/dtrain/examples/parallelized/work/weights.1.0 create mode 100644 training/dtrain/examples/parallelized/work/weights.1.1 create mode 100644 training/dtrain/examples/standard/README create mode 100644 training/dtrain/examples/standard/cdec.ini create mode 100644 training/dtrain/examples/standard/dtrain.ini create mode 100644 training/dtrain/examples/standard/expected-output create mode 100644 training/dtrain/examples/standard/nc-wmt11.de.gz create mode 100644 training/dtrain/examples/standard/nc-wmt11.en.gz create mode 100644 training/dtrain/examples/standard/nc-wmt11.en.srilm.gz create mode 100644 training/dtrain/examples/standard/nc-wmt11.grammar.gz create mode 100644 training/dtrain/examples/toy/cdec.ini create mode 100644 training/dtrain/examples/toy/dtrain.ini create mode 100644 training/dtrain/examples/toy/expected-output create mode 100644 training/dtrain/examples/toy/grammar.gz create mode 100644 training/dtrain/examples/toy/src create mode 100644 training/dtrain/examples/toy/tgt delete mode 100644 training/dtrain/test/example/README delete mode 100644 training/dtrain/test/example/cdec.ini delete mode 100644 training/dtrain/test/example/dtrain.ini delete mode 100644 training/dtrain/test/example/expected-output delete mode 100644 training/dtrain/test/parallelize/README delete mode 100644 training/dtrain/test/parallelize/cdec.ini delete mode 100644 training/dtrain/test/parallelize/dtrain.ini delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.0.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.1.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.2.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.3.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.4.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.5.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.6.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.7.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.8.gz delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.9.gz delete mode 100644 training/dtrain/test/parallelize/in delete mode 100644 training/dtrain/test/parallelize/refs delete mode 100644 training/dtrain/test/toy/cdec.ini delete mode 100644 training/dtrain/test/toy/dtrain.ini delete mode 100644 training/dtrain/test/toy/input (limited to 'training') diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index dfb5b351..fcb46db2 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -254,8 +254,6 @@ main(int argc, char** argv) time_t start, end; time(&start); - igzstream grammar_buf_in; - if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); score_t score_sum = 0.; score_t model_sum(0); unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0; diff --git a/training/dtrain/examples/parallelized/README b/training/dtrain/examples/parallelized/README new file mode 100644 index 00000000..89715105 --- /dev/null +++ b/training/dtrain/examples/parallelized/README @@ -0,0 +1,5 @@ +run for example + ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs + +final weights will be in the file work/weights.3 + diff --git a/training/dtrain/examples/parallelized/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini new file mode 100644 index 00000000..e43ba1c4 --- /dev/null +++ b/training/dtrain/examples/parallelized/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/examples/parallelized/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini new file mode 100644 index 00000000..f19ef891 --- /dev/null +++ b/training/dtrain/examples/parallelized/dtrain.ini @@ -0,0 +1,16 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=1.0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +# newer version of the grammar extractor use different feature names: +#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +decoder_config=cdec.ini diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz new file mode 100644 index 00000000..1e28a24b Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz new file mode 100644 index 00000000..372f5675 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz new file mode 100644 index 00000000..145d0dc0 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz new file mode 100644 index 00000000..105593ff Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz new file mode 100644 index 00000000..30781f48 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz new file mode 100644 index 00000000..834ee759 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz new file mode 100644 index 00000000..2e76f348 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz new file mode 100644 index 00000000..3741a887 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz new file mode 100644 index 00000000..ebf6bd0c Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz new file mode 100644 index 00000000..c1791059 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz differ diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in new file mode 100644 index 00000000..51d01fe7 --- /dev/null +++ b/training/dtrain/examples/parallelized/in @@ -0,0 +1,10 @@ +europas nach rassen geteiltes haus +ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . +der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . +während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . +eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . +die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . +das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . +die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . +der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . +genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/training/dtrain/examples/parallelized/refs b/training/dtrain/examples/parallelized/refs new file mode 100644 index 00000000..632e27b0 --- /dev/null +++ b/training/dtrain/examples/parallelized/refs @@ -0,0 +1,10 @@ +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0 new file mode 100644 index 00000000..7a00ed0f --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.0.0 @@ -0,0 +1,61 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 3121929377 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.0.0.in' + refs 'work/shard.0.0.refs' + output 'work/weights.0.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = +0.2663 + WordPenalty = -0.0079042 + LanguageModel = +0.44782 + LanguageModel_OOV = -0.0401 + PhraseModel_0 = -0.193 + PhraseModel_1 = +0.71321 + PhraseModel_2 = +0.85196 + PhraseModel_3 = -0.43986 + PhraseModel_4 = -0.44803 + PhraseModel_5 = -0.0538 + PhraseModel_6 = -0.1788 + PassThrough = -0.1477 + --- + 1best avg score: 0.17521 (+0.17521) + 1best avg model score: 21.556 (+21.556) + avg # pairs: 1671.2 + avg # rank err: 1118.6 + avg # margin viol: 552.6 + non0 feature count: 12 + avg list sz: 100 + avg f count: 11.32 +(time 0.37 min, 4.4 s/S) + +Writing weights file to 'work/weights.0.0' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.17521]. +This took 0.36667 min. diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1 new file mode 100644 index 00000000..e2bd6649 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.0.1 @@ -0,0 +1,62 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 2767202922 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.0.0.in' + refs 'work/shard.0.0.refs' + output 'work/weights.0.1' + weights in 'work/weights.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.2699 + WordPenalty = +0.080605 + LanguageModel = -0.026572 + LanguageModel_OOV = -0.30025 + PhraseModel_0 = -0.32076 + PhraseModel_1 = +0.67451 + PhraseModel_2 = +0.92 + PhraseModel_3 = -0.36402 + PhraseModel_4 = -0.592 + PhraseModel_5 = -0.0269 + PhraseModel_6 = -0.28755 + PassThrough = -0.33285 + --- + 1best avg score: 0.26638 (+0.26638) + 1best avg model score: 53.197 (+53.197) + avg # pairs: 2028.6 + avg # rank err: 998.2 + avg # margin viol: 918.8 + non0 feature count: 12 + avg list sz: 100 + avg f count: 10.496 +(time 0.32 min, 3.8 s/S) + +Writing weights file to 'work/weights.0.1' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.26638]. +This took 0.31667 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0 new file mode 100644 index 00000000..6e790e38 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.1.0 @@ -0,0 +1,61 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 1432415010 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.1.0.in' + refs 'work/shard.1.0.refs' + output 'work/weights.1.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.3815 + WordPenalty = +0.20064 + LanguageModel = +0.95304 + LanguageModel_OOV = -0.264 + PhraseModel_0 = -0.22362 + PhraseModel_1 = +0.12254 + PhraseModel_2 = +0.26328 + PhraseModel_3 = +0.38018 + PhraseModel_4 = -0.48654 + PhraseModel_5 = +0 + PhraseModel_6 = -0.3645 + PassThrough = -0.2216 + --- + 1best avg score: 0.10863 (+0.10863) + 1best avg model score: -4.9841 (-4.9841) + avg # pairs: 1345.4 + avg # rank err: 822.4 + avg # margin viol: 501 + non0 feature count: 11 + avg list sz: 100 + avg f count: 11.814 +(time 0.45 min, 5.4 s/S) + +Writing weights file to 'work/weights.1.0' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.10863]. +This took 0.45 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1 new file mode 100644 index 00000000..0b984761 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.1.1 @@ -0,0 +1,62 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 1771918374 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.1.0.in' + refs 'work/shard.1.0.refs' + output 'work/weights.1.1' + weights in 'work/weights.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.3178 + WordPenalty = +0.11092 + LanguageModel = +0.17269 + LanguageModel_OOV = -0.13485 + PhraseModel_0 = -0.45371 + PhraseModel_1 = +0.38789 + PhraseModel_2 = +0.75311 + PhraseModel_3 = -0.38163 + PhraseModel_4 = -0.58817 + PhraseModel_5 = -0.0269 + PhraseModel_6 = -0.27315 + PassThrough = -0.16745 + --- + 1best avg score: 0.13169 (+0.13169) + 1best avg model score: 24.226 (+24.226) + avg # pairs: 1951.2 + avg # rank err: 985.4 + avg # margin viol: 951 + non0 feature count: 12 + avg list sz: 100 + avg f count: 11.224 +(time 0.42 min, 5 s/S) + +Writing weights file to 'work/weights.1.1' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.13169]. +This took 0.41667 min. diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in new file mode 100644 index 00000000..92f9c78e --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.0.0.in @@ -0,0 +1,5 @@ +europas nach rassen geteiltes haus +ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . +der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . +während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . +eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs new file mode 100644 index 00000000..bef68fee --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs @@ -0,0 +1,5 @@ +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in new file mode 100644 index 00000000..b7695ce7 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.1.0.in @@ -0,0 +1,5 @@ +die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . +das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . +die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . +der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . +genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs new file mode 100644 index 00000000..6076f6d5 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs @@ -0,0 +1,5 @@ +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0 new file mode 100644 index 00000000..ddd595a8 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0 @@ -0,0 +1,12 @@ +LanguageModel 0.7004298992212881 +PhraseModel_2 0.5576194336478857 +PhraseModel_1 0.41787318415343155 +PhraseModel_4 -0.46728502545635164 +PhraseModel_3 -0.029839521598455515 +Glue -0.05760000000000068 +PhraseModel_6 -0.2716499999999978 +PhraseModel_0 -0.20831031065605327 +LanguageModel_OOV -0.15205000000000077 +PassThrough -0.1846500000000006 +WordPenalty 0.09636994553433414 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0 new file mode 100644 index 00000000..c9370b18 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0.0 @@ -0,0 +1,12 @@ +WordPenalty -0.0079041595706392243 +LanguageModel 0.44781580828279532 +LanguageModel_OOV -0.04010000000000042 +Glue 0.26629999999999948 +PhraseModel_0 -0.19299677809125185 +PhraseModel_1 0.71321026861732773 +PhraseModel_2 0.85195540993310537 +PhraseModel_3 -0.43986310822842656 +PhraseModel_4 -0.44802855630415955 +PhraseModel_5 -0.053800000000000514 +PhraseModel_6 -0.17879999999999835 +PassThrough -0.14770000000000036 diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1 new file mode 100644 index 00000000..8fad3de8 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0.1 @@ -0,0 +1,12 @@ +WordPenalty 0.080605055841244472 +LanguageModel -0.026571720531022844 +LanguageModel_OOV -0.30024999999999141 +Glue -0.26989999999999842 +PhraseModel_2 0.92000295209089566 +PhraseModel_1 0.67450748692470841 +PhraseModel_4 -0.5920000014976784 +PhraseModel_3 -0.36402437203127397 +PhraseModel_6 -0.28754999999999603 +PhraseModel_0 -0.32076244202907672 +PassThrough -0.33284999999999004 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1 new file mode 100644 index 00000000..03058a16 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1 @@ -0,0 +1,12 @@ +PhraseModel_2 0.8365578543552836 +PhraseModel_4 -0.5900840266009169 +PhraseModel_1 0.5312000609786991 +PhraseModel_0 -0.3872342271319619 +PhraseModel_3 -0.3728279676912084 +Glue -0.2938500000000036 +PhraseModel_6 -0.2803499999999967 +PassThrough -0.25014999999999626 +LanguageModel_OOV -0.21754999999999702 +LanguageModel 0.07306061161169894 +WordPenalty 0.09576193325966899 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0 new file mode 100644 index 00000000..6a6a65c1 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1.0 @@ -0,0 +1,11 @@ +WordPenalty 0.20064405063930751 +LanguageModel 0.9530439901597807 +LanguageModel_OOV -0.26400000000000112 +Glue -0.38150000000000084 +PhraseModel_0 -0.22362384322085468 +PhraseModel_1 0.12253609968953538 +PhraseModel_2 0.26328345736266612 +PhraseModel_3 0.38018406503151553 +PhraseModel_4 -0.48654149460854373 +PhraseModel_6 -0.36449999999999722 +PassThrough -0.22160000000000085 diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1 new file mode 100644 index 00000000..f56ea4a2 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1.1 @@ -0,0 +1,12 @@ +WordPenalty 0.1109188106780935 +LanguageModel 0.17269294375442074 +LanguageModel_OOV -0.13485000000000266 +Glue -0.3178000000000088 +PhraseModel_2 0.75311275661967159 +PhraseModel_1 0.38789263503268989 +PhraseModel_4 -0.58816805170415531 +PhraseModel_3 -0.38163156335114284 +PhraseModel_6 -0.27314999999999739 +PhraseModel_0 -0.45370601223484697 +PassThrough -0.16745000000000249 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README new file mode 100644 index 00000000..ce37d31a --- /dev/null +++ b/training/dtrain/examples/standard/README @@ -0,0 +1,2 @@ +Call `dtrain` from this folder with ../../dtrain -c dtrain.ini . + diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini new file mode 100644 index 00000000..e1edc68d --- /dev/null +++ b/training/dtrain/examples/standard/cdec.ini @@ -0,0 +1,26 @@ +formalism=scfg +add_pass_through_rules=true +scfg_max_span_limit=15 +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +grammar=nc-wmt11.grammar.gz +feature_function=WordPenalty +feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz +# all currently working feature functions for translation: +# (with those features active that were used in the ACL paper) +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +feature_function=RuleIdentityFeatures +feature_function=RuleSourceBigramFeatures +feature_function=RuleTargetBigramFeatures +feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini new file mode 100644 index 00000000..a05e9c29 --- /dev/null +++ b/training/dtrain/examples/standard/dtrain.ini @@ -0,0 +1,24 @@ +input=./nc-wmt11.de.gz +refs=./nc-wmt11.en.gz +output=- # a weights file (add .gz for gzip compression) or STDOUT '-' +select_weights=avg # output average (over epochs) weight vector +decoder_config=./cdec.ini # config for cdec +# weights for these features will be printed on each iteration +print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV +# newer version of the grammar extractor use different feature names: +#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +stop_after=10 # stop epoch after 10 inputs + +# interesting stuff +epochs=2 # run over input 2 times +k=100 # use 100best lists +N=4 # optimize (approx) BLEU4 +scorer=stupid_bleu # use 'stupid' BLEU+1 +learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) +gamma=0 # use SVM reg +sample_from=kbest # use kbest lists (as opposed to forest) +filter=uniq # only unique entries in kbest (surface form) +pair_sampling=XYX # +hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here +pair_threshold=0 # minimum distance in BLEU (here: > 0) +loss_margin=0 diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output new file mode 100644 index 00000000..8d72f4c3 --- /dev/null +++ b/training/dtrain/examples/standard/expected-output @@ -0,0 +1,1206 @@ + cdec cfg './cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ./nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** + Example feature: Shape_S00000_T00000 +Seeding random number sequence to 1511823303 + +dtrain +Parameters: + k 100 + N 4 + T 2 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 1 + gamma 0 + loss margin 0 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'avg' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg './cdec.ini' + input './nc-wmt11.de.gz' + refs './nc-wmt11.en.gz' + output '-' + stop_after 10 +(a dot represents 10 inputs) +Iteration #1 of 2. + . 10 +Stopping after 10 input sentences. +WEIGHTS + EgivenFCoherent = +0 + SampleCountF = +0 + CountEF = +0 + MaxLexFgivenE = +0 + MaxLexEgivenF = +0 + IsSingletonF = +0 + IsSingletonFE = +0 + Glue = -576 + WordPenalty = +417.79 + PassThrough = -1455 + LanguageModel = +5117.5 + LanguageModel_OOV = -1307 + --- + 1best avg score: 0.27697 (+0.27697) + 1best avg model score: -47918 (-47918) + avg # pairs: 1129.8 + avg # rank err: 581.9 + avg # margin viol: 0 + non0 feature count: 703 + avg list sz: 90.9 + avg f count: 100.09 +(time 0.33 min, 2 s/S) + +Iteration #2 of 2. + . 10 +WEIGHTS + EgivenFCoherent = +0 + SampleCountF = +0 + CountEF = +0 + MaxLexFgivenE = +0 + MaxLexEgivenF = +0 + IsSingletonF = +0 + IsSingletonFE = +0 + Glue = -622 + WordPenalty = +898.56 + PassThrough = -2578 + LanguageModel = +8066.2 + LanguageModel_OOV = -2590 + --- + 1best avg score: 0.37119 (+0.094226) + 1best avg model score: -1.3174e+05 (-83822) + avg # pairs: 1214.9 + avg # rank err: 584.1 + avg # margin viol: 0 + non0 feature count: 1115 + avg list sz: 91.3 + avg f count: 90.755 +(time 0.27 min, 1.6 s/S) + +Writing weights file to '-' ... +R:X:NX_sein:N1_its 61.5 +WordPenalty 658.17328732437022 +LanguageModel 6591.8747593425214 +LanguageModel_OOV -1948.5 +R:X:das_NX:this_N1 12 +R:X:NX_sein_NX:N1_from_ever_being_able_to_N2 30 +R:X:NX_bemühen:N1_effort 2.5 +RBS:X_bemühen 2.5 +R:X:sich:sich -17.5 +RBT:_sich -17.5 +RBT:sich_ -17.5 +RBS:sich_X 17.5 +RBS:_als 147 +RBS:als_ -59 +Shape_S10000_T10000 -1711.5 +RBT:_when 84 +R:X:zum_NX:as_N1 -134 +RBS:_zum -30 +R:X:als_NX:as_N1 63 +R:X:zum_NX:'s_N1 33 +R:X:zum_NX:the_N1 24 +RBS:X_sich -12 +R:X:zum_NX:to_N1 -36 +R:X:zum_NX:with_the_N1 83 +R:X:NX_zum:N1_the -66 +R:X:NX_zum:N1_to 66 +R:X:als_NX:when_N1 84 +RBS:als_das 59 +RBS:X_das -104 +R:X:NX_das:N1_a 28.5 +R:X:er_sich_NX:he_N1 86.5 +RBS:er_sich 29.5 +R:X:NX_das:N1_it -6 +R:X:er_sich_NX:him_N1 -57 +RBT:_declared -488 +R:X:NX_das:N1_that -5 +RBT:declared_ -8 +R:X:NX_das:N1_the -57 +R:X:NX_das:N1_this -17 +R:X:NX_.:N1_. -323 +RBS:X_. 134 +R:X:NX_.:N1_debate_. 6.5 +R:X:NX_.:N1_disruptions_. -14.5 +R:X:NX_.:N1_established_. 7.5 +R:X:NX_.:N1_heading_. 17 +R:X:NX_.:N1_on_. 94 +R:X:NX_.:N1_pace_. 51.5 +R:X:NX_das_NX:N1_a_growing_N2 -45 +R:X:general:general -23.5 +R:X:NX_.:N1_politics_. 84 +R:X:NX_das_NX:N1_a_N2 -0.5 +R:X:NX_.:N1_power_. -99.5 +RBS:general_ -23.5 +R:X:NX_.:N1_-_range_missiles_. -28.5 +Shape_S11000_T11000 40 +RBT:general_ -23.5 +RBT:_. -645 +R:X:betrat:entered -91 +R:X:NX_.:N1_war_. 68.5 +RBS:_betrat 23.5 +Shape_S11000_T01100 475.5 +RBT:_entered -91 +RBT:entered_ -91 +R:X:NX_das_NX:N1_the_N2 -2 +R:X:betrat:betrat 114.5 +RBT:_betrat 114.5 +RBT:betrat_ 114.5 +R:X:12:12 79 +R:X:maßnahmen:action 24 +R:X:.:. -566 +RBS:12_ 79 +RBS:_maßnahmen -44.5 +RBS:_. -645 +RBT:._ -566 +RBT:_action 24 +RBT:12_ 79 +RBT:action_ 24 +R:X:maßnahmen:actions -13 +RBT:_actions -13 +RBT:actions_ -13 +R:X:12_NX:12_N1 -79 +RBT:declared_a -428 +RBS:12_X -79 +RBT:a_state -428 +RBT:state_of -428 +R:X:maßnahmen:maßnahmen -55.5 +R:X:internationale_NX:global_N1 -270 +RBS:X_am 316.5 +RBT:_maßnahmen -55.5 +RBS:am_ 267.5 +RBT:maßnahmen_ -55.5 +RBS:_den 883 +R:X:internationale_NX:international_N1 270 +RBS:den_X -286.5 +R:X:NX_am:N1_of 267.5 +R:X:NX_als:N1_a -273.5 +RBS:am_X -281 +R:X:den_NX:'s_N1 -31 +R:X:NX_am_NX:N1_of_N2 -30 +R:X:NX_am_NX:N1_on_N2 79 +R:X:NX_als:N1_'s 273.5 +R:X:NX_betrat:N1_entered -23.5 +R:X:ins_NX:into_the_N1 -32.5 +RBS:X_betrat -23.5 +RBT:into_the -55 +R:X:ins_NX:into_N1 32.5 +RBT:_their 303 +R:X:general_NX:general_N1 23.5 +RBS:general_X 23.5 +RBS:_am -316.5 +R:X:den_NX:the_N1 89 +R:X:den_NX_.:the_N1_. 86.5 +R:X:NX_und:and_N1 -216 +RBS:X_und -203.5 +RBS:und_ 522.5 +RBT:_and 438.5 +R:X:am_NX:at_N1 23 +R:X:NX_als_das:N1_than_the 59 +R:X:NX_und:N1_- -114 +R:X:NX_und:N1_, 114 +R:X:am_NX:of_N1 -4 +R:X:am_NX:on_N1 -158.5 +R:X:am_NX:the_N1 -190 +RBS:_seine -16.5 +RBS:seine_ 39 +R:X:oktober:october -79.5 +R:X:seine:his -5.5 +RBS:_oktober -79.5 +R:X:seine:its 50 +RBT:_october -79.5 +RBT:october_ -79.5 +R:X:seine_NX:a_N1 7.5 +RBS:seine_X -39 +R:X:NX_und_NX:and_N1_N2 -22 +RBS:und_X 160.5 +R:X:seine_NX:his_N1 -97 +R:X:seine_NX:its_N1 102.5 +R:X:NX_und_NX:N1_,_and_N2 -4 +R:X:NX_maßnahmen:N1_actions 44.5 +RBS:X_maßnahmen 44.5 +R:X:seine_NX_als:his_N1_than 5.5 +R:X:seine_NX_als:its_N1_as -64.5 +R:X:NX_und_NX:N1_,_N2 -7 +Shape_S01100_T11000 -312.5 +RBS:und_den -822.5 +Shape_S01100_T01100 -537.5 +Shape_S01100_T11100 15 +R:X:NX_seine:'s_N1 -5.5 +RBS:X_seine 16.5 +RBS:X_den -38 +R:X:amerika_NX_sich_NX:america_N1_N2 -12 +R:X:NX_seine_NX:'s_N1_N2 22 +R:X:auf_NX_den_NX:to_N1_the_N2 -23 +R:X:auf_NX_den_NX:to_N1_N2 -23 +RBS:_unterstützen -716 +RBS:unterstützen_ -1 +Shape_S11100_T11000 783.5 +Shape_S11100_T01100 -716 +Shape_S11100_T11100 488 +R:X:unterstützen:unterstützen -1 +RBT:_unterstützen -1 +RBT:unterstützen_ -1 +R:X:unterstützen_NX:support_N1 -715 +RBS:unterstützen_X -715 +RBT:_will -6 +RBS:X_unterstützen 716 +RBT:_if 35 +R:X:NX_den_NX_.:N1_N2_. 41 +R:X:verfassung:constitution 15 +RBS:_verfassung -43 +RBT:_constitution 15 +RBT:constitution_ 15 +R:X:verfassung:constitutional 9.5 +RBT:_constitutional 9.5 +RBS:unterstützen_. 716 +RBT:constitutional_ 9.5 +R:X:NX_unterstützen_.:N1_. 716 +R:X:verfassung:verfassung -67.5 +R:X:eine_NX:an_N1 162 +RBT:_verfassung -67.5 +RBT:verfassung_ -67.5 +R:X:und:, -21.5 +R:X:,_NX_zu_NX:to_N2_N1 -153 +RBS:_und -389.5 +R:X:und:and -35 +RBS:angeführten_ -716 +RBT:and_ -35 +RBT:_as 63 +RBS:versucht_ 68 +R:X:und:with -3 +R:X:eine_NX:is_N1 -162 +RBS:angeführten_X 716 +R:X:und:und 91 +RBT:_und 91 +RBT:und_ 91 +R:X:versucht:tried 68 +RBT:tried_ 68 +RBS:versucht_X -68 +R:X:versucht_NX:tried_N1 -68 +R:X:und_NX:and_N1 250 +R:X:und_NX:with_N1 -18 +R:X:und_NX:,_N1 -7 +R:X:und_NX:N1_and -12 +R:X:und_den_NX:and_N1 -716 +R:X:er:he 17 +R:X:NX_eine:N1_is -7 +RBS:_er -47.5 +RBS:er_ 54 +RBT:_he 485.5 +RBT:he_ 17 +RBT:_him -1 +R:X:und_NX_.:,_N1_. -3 +R:X:er:his 91 +R:X:und_den_NX_.:and_the_N1_. 88 +R:X:NX_eine:N1_will 7 +R:X:er:it 3 +R:X:und_den_NX_.:and_N1_. -216.5 +R:X:er:er -196 +RBT:_er -196 +RBT:er_ -196 +RBS:er_X 8 +R:X:er_NX:he_N1 399 +R:X:er_NX:it_N1 -379 +Shape_S01010_T01010 -599 +RBS:pakistanischen_ 43 +R:X:NX_versucht:N1_tried 196 +RBT:_pakistan -43 +RBT:_pakistani 2 +R:X:er_NX_,_NX:he_N1_N2 -12 +R:X:NX_hat_er:N1_,_he_has 196 +RBS:hat_er 196 +R:X:NX_er:he_N1 -17 +RBS:X_er -148.5 +RBS:pakistanischen_X -43 +R:X:NX_er:it_N1 -7 +RBS:X_verfassung 43 +R:X:NX_verfassung:N1_'s_constitution 43 +R:X:NX_hat_NX_versucht:N1_N2_has_tried -190 +R:X:NX_hat_NX_versucht:N1_,_N2_has_tried -6 +RBS:der_pakistanischen 43 +RBS:X_pakistanischen -43 +RBS:_aber 46 +RBS:,_als -147 +RBT:_but -321 +R:X:aber_NX:but_N1 46 +R:X:von_NX_angeführten:N1_-_led -716 +R:X:von_NX_angeführten_NX:N1_-_led_N2 716 +RBS:,_aber -114 +RBS:X_aber 68 +R:X:,_als_NX:,_as_N1 -40 +R:X:NX_aber_NX_,:N1_N2_to 68 +R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_. -43 +R:X:NX_,_aber_NX:N1_,_N2 -114 +RBS:_rahmen 43 +RBS:rahmen_ 43 +R:X:rahmen:within 20 +R:X:rahmen:rahmen 23 +RBT:_rahmen 23 +RBT:rahmen_ 23 +Shape_S01110_T11010 35.5 +R:X:NX_der_pakistanischen:N1_pakistan 43 +Shape_S01110_T01110 -1195 +Shape_S01110_T11110 -6.5 +R:X:NX_,_NX_er:N1_N2_he -33 +RBS:geben_X -577.5 +RBS:_gestalten 196 +Shape_S01110_T01011 278 +RBS:gestalten_ 196 +RBS:geben_und 577.5 +R:X:gestalten:more 221 +Shape_S01110_T01111 -181.5 +RBT:_more 221 +RBT:more_ 221 +R:X:gestalten:gestalten -25 +RBT:_gestalten -25 +RBT:gestalten_ -25 +R:X:effektiver:effectively -151 +RBS:_effektiver 54 +RBS:effektiver_ -221 +RBT:_effectively -151 +RBT:effectively_ -151 +R:X:effektiver:effektiver -99 +RBT:_effektiver -99 +RBT:effektiver_ -99 +Shape_S11110_T11010 -1130 +RBS:zu_geben -107.5 +R:X:effektiver_zu_NX:N1_effectively 304 +RBS:effektiver_zu 221 +RBS:X_geben 107.5 +Shape_S11110_T01110 621 +Shape_S11110_T11110 -75 +RBS:X_gestalten -196 +R:X:NX_gestalten_.:N1_. -196 +RBS:gestalten_. -196 +R:X:terror:terror 672 +RBS:_terror -16 +RBS:terror_ 640 +R:X:den:- -4 +RBT:_terror 136 +RBT:terror_ 646 +RBS:den_ 42.5 +R:X:den:for -11.5 +R:X:terror:terrorism -54 +RBT:_terrorism -54 +Shape_S11110_T11011 -4.5 +RBT:terrorism_ -54 +R:X:terror_NX:terror_N1 -634 +R:X:den:of -17 +RBS:terror_X -640 +R:X:den:'s 32.5 +Shape_S11110_T01111 -1.5 +R:X:NX_effektiver:N1_more_effectively 29 +RBS:X_effektiver -54 +R:X:den:the 68 +R:X:NX_geben_und:N1_and 107.5 +R:X:NX_effektiver_zu_NX:N1_N2_effectively -83 +R:X:den:to -33 +RBS:1999_ -302.5 +R:X:,_NX_zu_geben_NX:to_N1_N2 -577.5 +R:X:den:with -10 +RBS:X_terror -4.5 +R:X:,_NX_zu_geben_und:to_N1_and 470 +R:X:NX_1999:N1_1999 -302.5 +R:X:NX_1999_NX:N2_N1_1999 302.5 +RBS:1999_X 302.5 +R:X:den_NX_zu:to_N1 783.5 +R:X:NX_rahmen_der:N1_the -43 +RBS:X_rahmen -43 +RBS:rahmen_der -43 +RBS:gegen_ 22.5 +R:X:gegen:against -2 +RBT:_against -2 +RBT:against_ -2 +R:X:._NX:._N1 -79 +RBS:._X -79.5 +RBS:gegen_den -22.5 +R:X:NX_._oktober:october_N1 79.5 +RBS:._oktober 79.5 +R:X:am_NX_._NX:the_N2_N1 -0.5 +R:X:gegen_den_NX:on_N1 2 +RBS:den_terror 20.5 +RBT:on_terror -26 +R:X:NX_den_terror:the_N1_terror 29 +R:X:den_NX_den_NX:the_N1_N2 -110.5 +R:X:den_NX_den_NX:N2_the_N1 -95 +RBT:_the -1.5 +R:X:krieg:war -4.5 +RBS:_krieg -22 +R:X:musharraf:musharraf 43 +RBS:krieg_ -4.5 +RBT:_war -22 +RBS:_musharraf 66.5 +RBS:musharraf_ -23.5 +RBT:war_ -4.5 +R:X:musharraf_NX:musharraf_imposed_N1 23.5 +RBS:musharraf_X 23.5 +RBT:musharraf_imposed 23.5 +RBS:krieg_gegen 4.5 +R:X:musharraf_NX:musharraf_N1 107 +R:X:krieg_gegen:war_on 24.5 +RBT:war_on -17.5 +RBS:X_gegen -4.5 +R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2 -20 +R:X:musharraf_NX_,_als_NX:musharraf_N1_N2 -87 +R:X:krieg_gegen_den_NX:war_on_N1 -16 +R:X:krieg_gegen_den_terror:war_on_terror -26 +R:X:pervez:pervez 22 +RBS:_pervez 22 +RBS:pervez_ 57.5 +RBS:X_krieg 22 +RBT:_pervez 22 +RBT:pervez_ 22 +RBS:pervez_musharraf -57.5 +RBS:X_musharraf -9 +R:X:NX_musharraf:N1_musharraf -9 +R:X:den_NX_gegen_den:the_N1_on -4.5 +R:X:den_NX_den_terror:the_N1_terror -3 +R:X:NX_krieg_gegen_den_terror:N1_war_on_terror 22 +R:X:den_NX_den_terror_NX:N2_the_N1_terror -1.5 +RBT:_project 91 +RBS:hat_ 2 +RBS:X_- 14 +R:X:NX_-:,_N1 48.5 +R:X:NX_-:N1_months_of 32 +R:X:NX_-:N1_relief_and 64 +R:X:NX_-:N1_'s -144.5 +RBS:hat_X -198 +R:X:und_NX_terror_NX:and_N2_N1_terror -4.5 +RBT:and_ -4.5 +R:X:sorgen:bring -19 +RBS:X_pervez -22 +RBT:_bring -19 +RBT:bring_ -19 +R:X:sorgen:ensure 19 +RBT:_ensure 19 +RBT:ensure_ 19 +R:X:NX_-_NX:N1_N2_security -4 +R:X:NX_projekt_NX:N2_N1_project -156 +R:X:NX_-_NX_.:N1_N2_. 18 +R:X:NX_projekt_NX_.:N2_N1_project_. 156 +RBS:_- -14 +RBT:to_ensure 0.5 +R:X:NX_hat:has_N1 -5 +R:X:NX_hat:N1_, 3 +R:X:NX_hat:,_N1 21.5 +R:X:NX_hat:N1_has -17 +R:X:NX_hat:N1_is -0.5 +R:X:-_NX:of_N1 -26 +R:X:-_NX:'s_N1 -58 +R:X:NX_hat_NX:N1_,_N2 -73 +R:X:NX_hat_NX:N1_N2_has 28 +R:X:-_NX:-_N1 122 +R:X:NX_hat_NX:N1_,_N2_has 21 +R:X:-_NX:--_N1 -21 +R:X:-_NX:,_N1 -31 +R:X:stabilität:stability -118 +RBS:_stabilität -129 +RBT:_stability -118 +RBT:stability_ -118 +R:X:stabilität:stabilität -11 +RBT:_stabilität -11 +RBT:stabilität_ -11 +RBT:_country 253 +RBS:_für 101 +RBS:für_ 129 +RBS:X_ihres -16 +R:X:NX_ihres_NX:N1_of_their_N2 -16 +R:X:für:that 129 +RBT:_political -16 +RBS:für_X -129 +R:X:,_NX_und_NX:,_N1_N2 -2 +R:X:für_NX:to_N1 -28 +R:X:NX_stabilität:N1_stability 129 +RBS:X_stabilität 129 +RBS:X_für 22 +RBT:_with -109 +RBS:,_für -123 +R:X:,_für_NX:,_N1 15.5 +R:X:,_NX_den_NX_zu:to_N2_N1 69 +R:X:NX_für_NX_.:N1_N2_. 22 +RBS:_ihres 16 +R:X:ihres_NX:its_N1 -50 +R:X:ihres_NX:their_N1 66 +R:X:NX_zu_verkaufen_NX:sell_N1_N2 140.5 +RBS:verkaufen_X 140.5 +RBS:_würde -204 +RBS:würde_ -117 +R:X:würde:would -204 +RBS:würde_X 126 +R:X:in_NX_hat_NX:in_N1_N2 22 +R:X:NX_dem_NX_pervez:N1_N2_pervez 35.5 +RBS:_halten 284 +RBS:halten_ 204 +R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf -57.5 +Shape_S01111_T01011 560.5 +Shape_S01111_T11011 -20.5 +Shape_S01111_T01111 -5 +RBT:_maintain 30 +R:X:halten:halten 284 +RBT:_halten 284 +RBT:halten_ 284 +RBS:halten_X -204 +R:X:NX_würde:if_N1 35 +RBS:X_würde 204 +R:X:NX_würde:will_N1 -6 +Shape_S11111_T11010 69 +R:X:NX_würde:would_face_a_N1 -9.5 +RBT:would_face -18.5 +RBT:face_a -18.5 +Shape_S11111_T11110 -57 +R:X:NX_würde:would_N1 78 +R:X:NX_würde:N1_will -10.5 +R:X:NX_würde_NX:would_N1_N2 126 +R:X:NX_würde_.:would_face_a_N1_. -9 +RBS:würde_. -9 +PhraseModel_0 -2973.8953021225416 +R:X:vielleicht:may -177 +PhraseModel_1 -4012.0052074229625 +PhraseModel_2 -1203.5725821427027 +RBS:vielleicht_ -284 +PhraseModel_3 2747.8420998127522 +PhraseModel_4 -3205.3163436680484 +PhraseModel_5 720.5 +PhraseModel_6 275 +R:X:vielleicht:vielleicht -107 +RBT:_vielleicht -107 +RBT:vielleicht_ -107 +R:X:vielleicht_NX:perhaps_N1 284 +RBS:vielleicht_X 284 +R:X:NX_halten:maintain_the_N1 -29 +RBS:X_halten -284 +RBT:maintain_the -174 +R:X:NX_halten:N1_hold -51 +R:X:NX_halten_NX:N2_maintain_the_N1 -204 +RBT:_maintain -204 +RBS:_versprechen 30 +RBS:versprechen_ -75 +RBT:_commitment 107 +R:X:versprechen_NX:commitment_N1 30 +RBS:versprechen_X 75 +R:X:NX_versprechen:N1_commitment -75 +RBS:X_versprechen -30 +R:X:NX_,_für_NX:N1_,_N2 -138.5 +R:X:NX_versprechen_NX:N1_commitment_N2 45 +RBS:_dass -451 +RBS:dass_ -91.5 +R:X:dass_NX:that_N1 -451 +RBS:dass_X 91.5 +R:X:NX_er_sein:N1_to_make_up_for_his -91.5 +RBS:er_sein -91.5 +R:X:seine_NX_und:a_N1_, -15 +R:X:NX_,_NX_und:N1_N2_, 129 +RBS:,_dass 851.5 +R:X:NX_,_dass:N1_keep -27 +R:X:NX_,_dass:N1_said_that -0.5 +R:X:NX_,_dass:N1_to_let -9.5 +R:X:NX_dass:that_N1 -8.5 +RBS:X_dass -400.5 +R:X:NX_dass:N1_let -51.5 +R:X:NX_dass:N1_see -243.5 +R:X:NX_dass:N1_thought -97 +R:X:NX_,_dass_NX:N1_that_N2 134 +Glue -599 +PassThrough -2016.5 +R:X:musharrafs:his 2 +RBS:musharrafs_ -29 +R:X:NX_und_den:N1_and_the 22 +RBT:_his 250.5 +RBT:his_ 160.5 +R:X:musharrafs:musharraf -1.5 +RBT:_musharraf 135.5 +RBT:musharraf_ 41.5 +R:X:NX_,_dass_NX_.:N1_N2_. 91.5 +R:X:musharrafs:musharrafs -29.5 +RBT:_musharrafs -29.5 +RBT:musharrafs_ -29.5 +RBS:sie_X 346 +RBS:_X -1369.5 +R:X:dies:so -74.5 +RBS:X_ -1743 +RBS:dies_ -348 +R:X:dies:so_,_this 47 +RBT:so_, 47 +R:X:sie_NX:it_N1 22 +RBT:,_this 47 +R:X:dies:that -256.5 +R:X:NX_?:N1_? -134.5 +R:X:dies:these -5.5 +RBS:X_? -235 +RBT:_these -5.5 +RBT:these_ -5.5 +R:X:NX_?:N1_consulting_? -100.5 +R:X:dies:this -58.5 +R:X:letzter_NX:last_N1 -14 +RBS:_letzter -20 +RBS:letzter_X 19.5 +RBT:_last -2 +R:X:letzter:last 7 +RBS:letzter_ -19.5 +R:X:sein:be 1.5 +RBT:last_ 7 +R:X:letzter:late 11.5 +RBT:_they -6 +RBS:sein_ 68 +RBT:_late 11.5 +R:X:ist_NX:be_N1 464.5 +RBT:_be -10.5 +RBT:late_ 11.5 +R:X:sie_NX:they_N1 -22 +RBS:_ist 415.5 +RBT:be_ 120 +R:X:letzter:letzter -24.5 +RBS:ist_X 8 +R:X:sein:being -16 +RBT:_letzter -24.5 +R:X:ist_NX:has_N1 16 +RBT:_being -79 +RBT:letzter_ -24.5 +R:X:ist_NX:is_at_N1 6 +RBT:being_ -16 +R:X:musharrafs_NX:his_N1 -25 +R:X:sein:his 73 +RBS:musharrafs_X 29 +R:X:ist_NX:is_well_N1 6 +R:X:sein:its -15.5 +R:X:musharrafs_NX:musharraf_'s_N1 77.5 +R:X:sein:sein 55 +RBT:musharraf_'s 55.5 +R:X:ist_NX:is_N1 23 +RBT:_sein 55 +R:X:musharrafs_NX:musharraf_N1 -23.5 +R:X:ist_NX:more_N1 -130.5 +RBT:sein_ 55 +R:X:NX_letzter:N1_late -26.5 +R:X:ist_NX:N1_be 176 +R:X:ziel:aim -32.5 +RBS:X_letzter 20 +R:X:ist_NX:N1_has -67 +RBS:_ziel -143 +R:X:NX_letzter:N1_'s_last 13 +R:X:ist_NX:N1_is -19 +RBS:ziel_ -219 +R:S:NS_NX:N1_N2 -599 +R:X:ist_NX:N1_,_is 18 +RBT:_aim -32.5 +RBS:_S -599 +R:X:ist_NX:N1_it_is 49 +RBT:aim_ -32.5 +RBS:S_X -599 +R:X:ist:are -65.5 +R:X:ziel:goal 45 +R:X:NX_letzter_NX:N1_'s_last_N2 33.5 +RBS:ist_ -8 +RBT:_goal 45 +R:X:?:? 235 +RBT:goal_ 45 +RBS:_? 235 +R:X:ziel:target -22.5 +RBT:_? 235 +RBS:X__ -347 +RBT:_target -22.5 +RBT:?_ 235 +RBT:target_ -22.5 +R:X:ist:'s -61 +R:X:ziel:targets -18 +RBS:in_ -22 +RBT:_targets -18 +RBT:targets_ -18 +RBT:_, 24.5 +R:X:ziel:ziel -125 +RBT:,_ -38 +R:X:NX___NX:N1___N2 -347 +R:X:dies_NX:so_N1 200 +RBT:_ziel -125 +RBS:dies_X 256 +RBT:ziel_ -125 +RBT:_at 23 +R:X:dies_NX:this_to_N1 156.5 +R:X:ziel_NX:goal_N1 49 +RBT:this_to 156.5 +RBS:ziel_X 219 +R:X:dies_NX:this_N1 -100.5 +R:X:ziel_NX:targets_N1 -19 +R:X:dies_ist:could_be 118.5 +R:X:ziel_NX:target_N1 -20 +RBS:dies_ist 92 +R:X:sein_NX:being_able_to_N1 -71.5 +RBT:in_ -65.5 +R:X:in:for 31 +RBT:_could 118.5 +RBS:sein_X -68 +RBT:could_be 118.5 +RBT:being_able -63 +RBT:_for 14.5 +RBT:able_to -63 +RBT:for_ 14.5 +R:X:sein_NX:be_N1 -10 +R:X:sein_NX:his_N1 184.5 +RBS:X_ist -507.5 +R:X:sein_NX:its_N1 -26.5 +R:X:in:in -53 +R:X:sein_NX:N1_be -174.5 +R:X:NX_ziel:N1_aim -32.5 +RBT:_in -75.5 +RBS:X_ziel 143 +R:X:NX_ziel:N1_goal 20 +R:X:NX_ziel:N1_target -26.5 +R:X:NX_ziel:N1_targets -27 +RBT:_into -270 +R:X:NX_ziel_NX:N1_goal_N2 60 +R:X:NX_ziel_NX:N1_targets_N2 -6 +R:X:NX_sie_NX_,_dass:N1_N2_that 346 +R:X:NX_ziel_NX:N1_target_N2 -6 +R:X:dies_ist_NX:this_is_N1 -26.5 +R:X:NX_ziel_NX:N2_N1_goal 161 +RBT:_of -38 +RBT:of_ -17 +R:X:NX_ist_NX:is_N1_N2 -129 +RBS:_die 428.5 +R:X:NX_ist_NX:is_N1_,_N2 16.5 +RBS:die_ -116 +RBT:_on -653.5 +RBT:on_ 84.5 +R:X:NX_ist_NX:'s_N1_N2 -41.5 +R:X:die:, -9 +RBT:_over 45 +R:X:die:a -5 +R:X:NX_ist_NX:N1_has_N2 -104.5 +R:X:blieben_NX:remained_N1 135 +R:X:die:an -123 +R:X:NX_ist_NX:N1_is_at_N2 -5.5 +RBS:_blieben 187.5 +R:X:NX_ist_NX:N1_is_well_N2 -5 +RBS:blieben_X -13 +RBT:_are -65.5 +RBT:_'s 16 +R:X:NX_ist_NX:N1_is_N2 -31 +RBT:are_ -65.5 +RBT:'s_ -28.5 +R:X:blieben_NX:N1_remained 81.5 +R:X:NX_ist_NX:N1_,_is_N2 59.5 +R:X:die:by -10 +R:X:die:its 302.5 +RBS:_pakistanis 57 +RBS:pakistanis_ 116.5 +RBT:_to 93.5 +RBT:_pakistanis 161 +R:X:NX_ist_NX:N1_N2_has -75 +R:X:die:the -28 +RBT:to_ 18 +R:X:NX_ist_NX:N1_N2_is -97.5 +R:X:pakistanis_NX:pakistanis_N1 57 +R:X:NX_ist_NX:N1_,_N2_is -1 +RBT:_those -6 +RBT:_within 20 +RBT:within_ 20 +RBS:pakistanis_X -116.5 +R:X:NX_blieben_NX:N1_,_N2_remained -229.5 +R:X:NX_ist_NX:N2_is_N1 -47 +RBS:X_blieben -187.5 +RBT:_is -21 +R:X:NX_pakistanis:pakistanis_,_N1 235.5 +RBS:X_pakistanis -57 +RBT:pakistanis_, 104 +R:X:NX_pakistanis:N1_pakistanis -119 +R:X:NX_ist_NX:N2_N1_is -46.5 +RBS:blieben_ 13 +RBT:_is -251 +R:X:blieben:blieben -29 +RBT:_blieben -29 +RBT:blieben_ -29 +R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2 -23 +RBS:_zu -560 +R:X:NX_pakistanis_NX:N1_pakistanis_N2 -150.5 +RBS:zu_X -717.5 +R:X:NX_blieben:N1_,_remained 42 +RBS:__ 347 +RBS:_ein 37.5 +RBS:ein_ -9.5 +RBS:der_ -88.5 +R:X:zu_NX:for_N1 43 +R:X:__NX:__N1 -97 +RBT:_- 113 +RBT:-_ -4 +R:X:__NX:,_N1 444 +R:X:zu_NX:in_N1 37.5 +RBT:_a -27.5 +RBT:a_ -5 +RBS:sie_ -346 +RBT:the_ 40 +R:X:zu_NX:to_N1 -716 +R:X:zu_NX:with_N1 40.5 +R:X:zu_NX:N1_on 30 +RBT:_the 324.5 +R:X:NX_sie:but_N1 -346 +RBS:X_ein -37.5 +RBT:be_transformed -12 +R:X:medien:media 299.5 +RBS:_medien -71.5 +RBT:_with 54.5 +RBS:medien_ -156 +RBT:with_ -19 +RBT:_media 299.5 +R:X:NX_ein:N1_has_an -3.5 +RBT:media_ 299.5 +R:X:NX_ein:N1_put_forward_a -6 +R:X:medien:medien -371 +RBT:_medien -371 +RBT:medien_ -371 +RBS:der_X 45 +RBS:medien_X 156 +R:X:NX_zu_NX:in_N2_N1 -9.5 +RBS:X_zu 339 +RBT:in_ -2.5 +R:X:NX_zu_NX:of_N2_N1 -52.5 +RBT:to_ -102.5 +RBT:_to 30 +R:X:,_dass_NX:that_N1 317 +R:X:NX_zu_NX:to_N2_N1 19 +R:X:NX_zu_NX:N1_in_N2 -2 +R:X:NX_zu_NX:N1_is_N2 -2 +RBS:X_macht -0.5 +R:X:NX_zu_NX:N1_to_N2 48 +R:X:NX_macht_NX:N1_N2_does -0.5 +R:X:NX_zu_NX:N2_N1_to -28 +R:X:NX_zu_NX_.:to_N2_N1_. 22.5 +RBS:an_ 28 +R:X:NX_zu_NX_.:N1_is_N2_. -3.5 +R:X:NX_zu_NX_.:N1_to_N2_. 7.5 +R:X:NX_zu_NX_.:N1_with_N2_. -3 +R:X:NX_zu_NX_.:N1_N2_. -221.5 +R:X:NX_zu_NX_.:N2_N1_. 4.5 +R:X:freien:free -83.5 +RBS:_freien -118 +RBS:freien_ -201.5 +RBT:_free 210 +RBT:free_ -83.5 +R:X:freien:freien -276 +RBT:_freien -276 +RBT:freien_ -276 +RBT:_an 31.5 +R:X:freien_NX:free_N1 248 +RBT:an_ -123 +RBS:freien_X 201.5 +R:X:NX_medien:N1_media -90 +RBS:X_medien 71.5 +R:X:amerika:america 193 +RBS:_amerika -36 +R:X:NX_medien_NX:N2_N1_media 5 +R:X:an_NX:in_N1 210 +R:X:freien_NX_.:free_N1_. -6.5 +RBS:amerika_ -131 +R:X:NX_medien_NX_.:N2_N1_media_. 151 +RBT:_america 283.5 +RBT:america_ 193 +R:X:die_NX:an_N1 -7.5 +R:X:amerika:american -3 +RBS:die_X -45.5 +RBT:_american -3 +RBT:american_ -3 +R:X:amerika:amerika -321 +RBS:_jener 62.5 +R:X:die_NX:a_N1 19 +RBT:_amerika -321 +RBS:jener_X 62.5 +RBT:amerika_ -321 +R:X:jener_NX:the_N1 62.5 +R:X:an_NX:to_N1 -210 +RBS:X_jener -62.5 +RBS:amerika_X 131 +R:X:amerika_NX:america_N1 107 +R:X:die_NX:is_N1 -2.5 +RBS:an_der -28 +R:X:auf:, -5 +R:X:die_NX:its_N1 -14 +RBS:auf_ 46.5 +R:X:die_NX:'s_N1 46.5 +RBS:X_der 71 +R:X:NX_der:N1_for -74 +R:X:NX_der:N1_in -43 +R:X:auf:in -5.5 +RBT:_choice -103 +R:X:die_NX:the_N1 -86.5 +RBT:_decision 103 +R:X:auf:on 60 +R:X:die_NX:those_N1 -6 +R:X:NX_der:N1_to 72 +R:X:entscheidung_NX:choice_is_N1 -103 +R:X:die_NX:with_N1 73.5 +R:X:auf:auf -3 +RBT:choice_is -103 +RBT:_auf -3 +R:X:entscheidung_NX:decision_N1 103 +R:X:die_NX:,_N1 57 +R:X:die_NX:N1_is -0.5 +RBT:auf_ -3 +R:X:die_NX:N1_'s -1 +RBS:auf_X -46.5 +R:X:die_NX:N1_the -1 +R:X:NX_freien:N1_free 158 +RBT:of_ -13 +RBS:X_freien 118 +R:X:NX_der_NX:over_N2_N1 45 +R:X:NX_freien_NX:N1_free_N2 -34 +R:X:NX_freien_NX:N1_free_,_N2 -6 +RBT:over_ 45 +R:X:die_NX_medien:the_N1_media 5.5 +R:X:auf_NX:in_N1 -46.5 +RBT:the_ -0.5 +R:X:auf_NX:on_N1 66 +R:X:auf_NX:to_N1 -2 +R:X:auf_NX:,_N1 -18 +RBS:X_amerika 36 +RBT:_may -177 +RBS:und_die 139.5 +RBT:may_ -177 +RBT:_ 585.5 +RBT:_would -18.5 +RBS:X_die -568 +RBT:would_ -204 +R:X:NX_die:the_N1 34.5 +R:X:NX_amerika_NX:N2_N1_america 36 +R:X:terroranschläge:terrorist -22 +R:X:NX_die:,_N1 -42 +R:X:NX_die:N1_, -173 +RBS:_terroranschläge -161.5 +RBS:der_macht 0.5 +R:X:NX_die:-_N1 -5 +RBS:terroranschläge_ -46 +R:X:NX_die:N1_a -1 +RBT:_terrorist -119.5 +R:X:NX_der_macht_NX:N1_hold_N2_power 28 +RBT:terrorist_ -22 +R:X:,:, -2.5 +RBT:terrorist_attacks 77.5 +RBS:_, -182 +RBT:attacks_ 28 +RBS:,_ -160.5 +R:X:terroranschläge:terroranschläge -52 +RBT:_terroranschläge -52 +RBT:__ -139 +RBT:terroranschläge_ -52 +R:X:NX_die:N1_its -128.5 +RBS:terroranschläge_X 46 +RBT:_-- -64 +R:X:terroranschläge_NX:terrorist_attacks_N1 -87.5 +RBT:_by -10 +RBT:by_ -10 +R:X:,:out -3.5 +RBT:_out -3.5 +R:X:und_die_NX:and_N1 218 +RBT:out_ -3.5 +RBT:_that -261.5 +R:X:NX_die_NX:the_N1_N2 -1 +RBT:that_ -127.5 +R:X:NX_die_NX:the_N2_N1 -4 +RBS:,_X -335 +RBT:,_as -40 +R:X:,_NX:in_N1 -239 +R:X:,_NX:of_N1 -4 +R:X:,_NX:on_N1 -166 +R:X:,_NX:to_N1 649 +R:X:NX_die_NX:N1_the_N2 -4 +R:X:,_NX:,_N1 -399 +R:X:,_NX:__N1 -42 +R:X:,_NX:--_N1 -102 +R:X:,_an:to 28 +RBS:,_an 28 +R:X:NX_die_NX:N1_,_N2 -5 +R:X:NX_die_NX:N1_N2_the -4 +RBS:X_an -28 +RBS:die_terroranschläge 161.5 +R:X:die_terroranschläge:,_terrorist_attacks 28 +RBT:,_terrorist 175 +R:X:die_terroranschläge_NX:,_terrorist_attacks_N1 147 +R:X:NX_so:N1_as -1.5 +R:X:justiz:judiciary -90 +RBS:_justiz -1 +RBS:justiz_ -220.5 +R:X:NX_so:N1_that -14 +RBT:_judiciary 215 +R:X:NX_so:N1_the 15.5 +RBT:judiciary_ -90 +R:X:justiz:justiz -216 +RBT:_justiz -216 +RBT:justiz_ -216 +R:X:justiz_NX:judiciary_N1 305 +RBS:justiz_X 205 +RBS:_brachten -28 +RBS:justiz_und 15.5 +RBS:brachten_ -175 +R:X:NX_und_die:'s_N1_and -5 +R:X:brachten:brachten -175 +RBT:_brachten -175 +RBT:brachten_ -175 +R:X:NX_an_der:N1_the -0.5 +R:X:brachten_NX:N1_brought 147 +RBS:brachten_X 175 +R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1 -13.5 +R:X:NX_und_die:N1_'s -12 +R:X:NX_und_die_NX:'s_N2_N1 -16 +RBS:_2001 -14.5 +RBS:2001_ 28 +RBT:_2001 37.5 +R:X:NX_und_die_NX:N1_and_N2 -159 +RBT:2001_ 28 +R:X:2001_NX:2001_N1 147 +RBS:2001_X -28 +R:X:NX_brachten_NX:N1_N2_brought 28 +RBS:X_brachten 28 +RBT:,_ -109.5 +R:X:2001_NX_die_NX:2001_,_N2_N1 -161.5 +R:X:unabhängige:independent 38 +RBT:2001_, -109.5 +RBS:_unabhängige 127 +RBS:unabhängige_ -197 +RBT:_independent 343 +RBT:independent_ 38 +RBT:_september -13.5 +R:X:unabhängige:unabhängige -198 +RBT:_unabhängige -198 +RBS:ein_X 9.5 +RBT:unabhängige_ -198 +RBS:september_X -14.5 +R:X:unabhängige_NX:independent_N1 287 +R:X:ein_NX:an_N1 132 +R:X:ein_NX:any_N1 25 +RBS:unabhängige_X 197 +R:X:NX_justiz:N1_judiciary 85.5 +R:X:NX_an_der_macht_NX:N1_of_power_N2 -27.5 +RBS:X_justiz 1 +R:X:NX_justiz_NX:N1_judiciary_N2 -43 +R:X:NX_justiz_und:N1_judiciary_and 15.5 +RBS:_11 -13.5 +R:X:NX_unabhängige:N1_independent -37 +R:X:ein_NX:a_N1 -93 +RBS:X_unabhängige -127 +R:X:ein_NX:one_N1 -15 +R:X:NX_unabhängige_NX:N1_independent_N2 -90 +R:X:ein_NX:-_N1 -11.5 +R:X:NX_ein_NX:an_N1_N2 -6 +R:X:NX_ein_NX:be_transformed_N1_N2 -22 +RBS:X_, -3.5 +RBS:september_2001 14.5 +RBT:,_2001 14.5 +R:X:NX_,:to_N1 68 +R:X:NX_,:N1__ 1 +R:X:NX_,:N1_-- -172.5 +R:X:11_._september_2001_NX:september_11_,_2001_N1 -13.5 +R:X:die_NX_und_NX:the_N1_N2 -10 +R:X:NX_,:N1_for -127.5 +R:X:NX_,:N1_in -13.5 +R:X:NX_,:N1_of -55 +R:X:NX_,:N1_on 257.5 +R:X:NX_,:N1_out -58 +RBS:am_11 13.5 +R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2 -57 +R:X:NX_,:N1_refuses_to -232.5 +R:X:die_NX_und_die:the_N1_and 148 +R:X:die_NX_und_die:the_N1_and_the -2.5 +RBT:the_september 13.5 +R:X:die_NX_die_NX:the_N1_N2 -3 +R:X:am_11_._september_NX:the_september_11_,_N1 -14.5 +R:X:die_NX_und_die_NX:the_N1_and_N2 -32 +RBS:zu_ 672 +R:X:NX_,_NX:N1_,_N2 -78 +R:X:NX_,_NX:N1_N2_, 80 +R:X:am_11_._september_2001:the_september_11_,_2001 28 +R:X:zu:for -5 +R:X:zu:in -7 +R:X:zu:to 23 +R:X:taliban:taliban -251.5 +RBS:_taliban -223.5 +RBS:taliban_ -157.5 +R:X:zu:with -6 +R:X:verzweifelten:desperate 28.5 +RBT:_taliban -205.5 +RBT:_desperate 28.5 +RBT:taliban_ -107 +RBT:desperate_ 28.5 +R:X:taliban_NX:taliban_N1 28 +R:X:verzweifelten:verzweifelten -28.5 +RBS:taliban_X 157.5 +R:X:NX_zu:to_N1 -229 +RBT:_verzweifelten -28.5 +R:X:den_taliban:the_taliban 144.5 +RBT:verzweifelten_ -28.5 +RBS:den_taliban 223.5 +RBT:the_taliban 144.5 +R:X:NX_zu:N1_for -152 +R:X:NX_zu:N1_in -6 +R:X:NX_zu:N1_is 251 +R:X:NX_zu:N1_of -49.5 +RBS:_dem 22 +RBT:_its 458 +RBT:its_ 337 +R:X:NX_den_taliban:N1_taliban -50.5 +R:X:NX_den_taliban_NX:N1_taliban_N2 -2.5 +R:X:NX_den_taliban_NX:N2_N1_taliban 132 +R:X:erklärte:declared -8 +RBS:_erklärte -185.5 +RBS:erklärte_ -124.5 +RBT:_declaring -9 +R:X:erklärte:erklärte -116.5 +RBT:_erklärte -116.5 +RBT:erklärte_ -116.5 +R:X:erklärte_NX:declared_N1 -52 +RBS:erklärte_X -61 +RBS:jener_ -62.5 +R:X:erklärte_NX:declaring_N1 -9 +RBS:erklärte_, 185.5 +R:X:NX_jener:N1_of -62.5 +R:X:dem_NX:the_N1 22 +R:X:verkaufen:sell -153 +RBS:_verkaufen -153 +RBS:verkaufen_ -140.5 +RBT:sell_ -153 +RBS:bereit_ 86 +RBS:zu_verkaufen 153 +RBS:_bemühen -2.5 +R:X:bereit:bereit 86 +RBT:_bereit 86 +RBT:bereit_ 86 +R:X:bereit_NX:ready_N1 -31 +RBS:bereit_X -86 +R:X:bereit_NX:N1_ready -55 +RBS:X_zum 30 +R:X:bemühen:bemühen -2.5 +R:X:NX_erklärte_,:N1_, 110 +RBT:_bemühen -2.5 +RBS:X_erklärte 185.5 +RBT:bemühen_ -2.5 +R:X:NX_erklärte_,_NX:N1_,_N2 75.5 +RBS:in_X 22 +RBS:_sich -17.5 +R:X:NX_zu_verkaufen:sell_N1 12.5 +RBS:sich_ -17.5 +R:X:NX_zum_NX:N2_to_further_N1 30 +RBS:_das 45 +RBS:das_ 2.5 +RBT:to_further 30 +RBT:_it -381 +RBT:it_ 3 +RBT:_so 172.5 +RBT:so_ -74.5 +RBT:_this 9.5 +RBT:this_ -11.5 +RBS:X_dem -22 +R:X:das_NX:a_growing_N1 77 +RBS:das_X -2.5 +RBT:a_growing -41 +R:X:das_NX:be_N1 169 +R:X:das_NX:its_N1 -95 +R:X:das_NX:so_N1 -38 +RBS:X_sein 91.5 +R:X:das_NX:the_N1 -80 +done + +--- +Best iteration: 2 [SCORE 'stupid_bleu'=0.37119]. +This took 0.6 min. diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz new file mode 100644 index 00000000..0741fd92 Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.de.gz differ diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz new file mode 100644 index 00000000..1c0bd401 Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.en.gz differ diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz new file mode 100644 index 00000000..7ce81057 Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz differ diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz new file mode 100644 index 00000000..ce4024a1 Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.grammar.gz differ diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini new file mode 100644 index 00000000..b14f4819 --- /dev/null +++ b/training/dtrain/examples/toy/cdec.ini @@ -0,0 +1,3 @@ +formalism=scfg +add_pass_through_rules=true +grammar=grammar.gz diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini new file mode 100644 index 00000000..cd715f26 --- /dev/null +++ b/training/dtrain/examples/toy/dtrain.ini @@ -0,0 +1,13 @@ +decoder_config=cdec.ini +input=src +refs=tgt +output=- +print_weights=logp shell_rule house_rule small_rule little_rule PassThrough +k=4 +N=4 +epochs=2 +scorer=bleu +sample_from=kbest +filter=uniq +pair_sampling=all +learning_rate=1 diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output new file mode 100644 index 00000000..1da2aadd --- /dev/null +++ b/training/dtrain/examples/toy/expected-output @@ -0,0 +1,77 @@ +Warning: hi_lo only works with pair_sampling XYX. + cdec cfg 'cdec.ini' +Seeding random number sequence to 1664825829 + +dtrain +Parameters: + k 4 + N 4 + T 2 + scorer 'bleu' + sample from 'kbest' + filter 'uniq' + learning rate 1 + gamma 0 + loss margin 0 + pairs 'all' + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'src' + refs 'tgt' + output '-' +(a dot represents 10 inputs) +Iteration #1 of 2. + 2 +WEIGHTS + logp = +0 + shell_rule = -1 + house_rule = +2 + small_rule = -2 + little_rule = +3 + PassThrough = -5 + --- + 1best avg score: 0.5 (+0.5) + 1best avg model score: 2.5 (+2.5) + avg # pairs: 4 + avg # rank err: 1.5 + avg # margin viol: 0 + non0 feature count: 6 + avg list sz: 4 + avg f count: 2.875 +(time 0 min, 0 s/S) + +Iteration #2 of 2. + 2 +WEIGHTS + logp = +0 + shell_rule = -1 + house_rule = +2 + small_rule = -2 + little_rule = +3 + PassThrough = -5 + --- + 1best avg score: 1 (+0.5) + 1best avg model score: 5 (+2.5) + avg # pairs: 5 + avg # rank err: 0 + avg # margin viol: 0 + non0 feature count: 6 + avg list sz: 4 + avg f count: 3 +(time 0 min, 0 s/S) + +Writing weights file to '-' ... +house_rule 2 +little_rule 3 +Glue -4 +PassThrough -5 +small_rule -2 +shell_rule -1 +done + +--- +Best iteration: 2 [SCORE 'bleu'=1]. +This took 0 min. diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz new file mode 100644 index 00000000..8eb0d29e Binary files /dev/null and b/training/dtrain/examples/toy/grammar.gz differ diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src new file mode 100644 index 00000000..87e39ef2 --- /dev/null +++ b/training/dtrain/examples/toy/src @@ -0,0 +1,2 @@ +ich sah ein kleines haus +ich fand ein kleines haus diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt new file mode 100644 index 00000000..174926b3 --- /dev/null +++ b/training/dtrain/examples/toy/tgt @@ -0,0 +1,2 @@ +i saw a little house +i found a little house diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb index f0cd58c5..86e835e8 100755 --- a/training/dtrain/lplp.rb +++ b/training/dtrain/lplp.rb @@ -84,34 +84,28 @@ def _test() end #_test() -# actually do something + def usage() - puts "lplp.rb [n] < " + puts "lplp.rb <#shards> < " puts " l0...: norms for selection" puts "select_k: only output top k (according to the norm of their column vector) features" puts " cut: output features with weight >= threshold" puts " n: if we do not have a shard count use this number for averaging" - exit + exit 1 end -if ARGV.size < 3 then usage end +if ARGV.size < 4 then usage end norm_fun = method(ARGV[0].to_sym) type = ARGV[1] x = ARGV[2].to_f - -shard_count_key = "__SHARD_COUNT__" +shard_count = ARGV[3].to_f STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' w = {} -shard_count = 0 while line = STDIN.gets key, val = line.split /\s+/ - if key == shard_count_key - shard_count += 1 - next - end if w.has_key? key w[key].push val.to_f else @@ -119,8 +113,6 @@ while line = STDIN.gets end end -if ARGV.size == 4 then shard_count = ARGV[3].to_f end - if type == 'cut' cut(w, norm_fun, shard_count, x) elsif type == 'select_k' diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 24e7f49e..e661416e 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -3,16 +3,15 @@ require 'trollop' def usage - if ARGV.size != 8 - STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] [--reshard/-y] -s <#shards|0> -p -i -r [--qsub/-q] --dtrain_binary -l \"l2 select_k 100000\"\n" - exit 1 - end + STDERR.write "Usage: " + STDERR.write "ruby parallelize.rb -c [-e ] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p ] -i -r [--qsub/-q] [--dtrain_binary ] [-l \"l2 select_k 100000\"]\n" + exit 1 end opts = Trollop::options do opt :config, "dtrain config file", :type => :string - opt :epochs, "number of epochs", :type => :int + opt :epochs, "number of epochs", :type => :int, :default => 10 + opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000" opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false opt :shards, "number of shards", :type => :int @@ -21,8 +20,8 @@ opts = Trollop::options do opt :references, "references", :type => :string opt :qsub, "use qsub", :type => :bool, :default => false opt :dtrain_binary, "path to dtrain binary", :type => :string - opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000" end +usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references] dtrain_dir = File.expand_path File.dirname(__FILE__) @@ -32,16 +31,14 @@ else dtrain_bin = opts[:dtrain_binary] end ruby = '/usr/bin/ruby' -lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" +lplp_rb = "#{dtrain_dir}/lplp.rb" lplp_args = opts[:lplp_args] cat = '/bin/cat' ini = opts[:config] epochs = opts[:epochs] -rand = false -rand = true if opts[:randomize] -reshard = false -reshard = true if opts[:reshard] +rand = opts[:randomize] +reshard = opts[:reshard] predefined_shards = false if opts[:shards] == 0 predefined_shards = true @@ -49,11 +46,10 @@ if opts[:shards] == 0 else num_shards = opts[:shards] end -shards_at_once = opts[:processes_at_once] input = opts[:input] refs = opts[:references] -use_qsub = false -use_qsub = true if opts[:qsub] +use_qsub = opts[:qsub] +shards_at_once = opts[:processes_at_once] `mkdir work` diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README deleted file mode 100644 index 2df77086..00000000 --- a/training/dtrain/test/example/README +++ /dev/null @@ -1,8 +0,0 @@ -Small example of input format for distributed training. -Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini . - -For this to work, undef 'DTRAIN_LOCAL' in dtrain.h -and recompile. - -data can be found here: http://simianer.de/#dtrain - diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini deleted file mode 100644 index 0215416d..00000000 --- a/training/dtrain/test/example/cdec.ini +++ /dev/null @@ -1,25 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -feature_function=WordPenalty -feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini deleted file mode 100644 index 97fce7f0..00000000 --- a/training/dtrain/test/example/dtrain.ini +++ /dev/null @@ -1,22 +0,0 @@ -input=./nc-wmt11.1k.gz # use '-' for STDIN -output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights -decoder_config=./cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=2 # run over input 2 times -k=100 # use 100best lists -N=4 # optimize (approx) BLEU4 -scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) -gamma=0 # use SVM reg -sample_from=kbest # use kbest lists (as opposed to forest) -filter=uniq # only unique entries in kbest (surface form) -pair_sampling=XYX -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) -loss_margin=0 diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output deleted file mode 100644 index 05326763..00000000 --- a/training/dtrain/test/example/expected-output +++ /dev/null @@ -1,89 +0,0 @@ - cdec cfg 'test/example/cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading test/example/nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** - Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2912000813 - -dtrain -Parameters: - k 100 - N 4 - T 2 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 1 - gamma 0 - loss margin 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'VOID' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'test/example/cdec.ini' - input 'test/example/nc-wmt11.1k.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 2. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = -637 - WordPenalty = +1064 - LanguageModel = +1175.3 - LanguageModel_OOV = -1437 - PhraseModel_0 = +1935.6 - PhraseModel_1 = +2499.3 - PhraseModel_2 = +964.96 - PhraseModel_3 = +1410.8 - PhraseModel_4 = -5977.9 - PhraseModel_5 = +522 - PhraseModel_6 = +1089 - PassThrough = -1308 - --- - 1best avg score: 0.16963 (+0.16963) - 1best avg model score: 64485 (+64485) - avg # pairs: 1494.4 - avg # rank err: 702.6 - avg # margin viol: 0 - non0 feature count: 528 - avg list sz: 85.7 - avg f count: 102.75 -(time 0.083 min, 0.5 s/S) - -Iteration #2 of 2. - . 10 -WEIGHTS - Glue = -1196 - WordPenalty = +809.52 - LanguageModel = +3112.1 - LanguageModel_OOV = -1464 - PhraseModel_0 = +3895.5 - PhraseModel_1 = +4683.4 - PhraseModel_2 = +1092.8 - PhraseModel_3 = +1079.6 - PhraseModel_4 = -6827.7 - PhraseModel_5 = -888 - PhraseModel_6 = +142 - PassThrough = -1335 - --- - 1best avg score: 0.277 (+0.10736) - 1best avg model score: -3110.5 (-67595) - avg # pairs: 1144.2 - avg # rank err: 529.1 - avg # margin viol: 0 - non0 feature count: 859 - avg list sz: 74.9 - avg f count: 112.84 -(time 0.067 min, 0.4 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. -This took 0.15 min. diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README deleted file mode 100644 index 89715105..00000000 --- a/training/dtrain/test/parallelize/README +++ /dev/null @@ -1,5 +0,0 @@ -run for example - ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs - -final weights will be in the file work/weights.3 - diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini deleted file mode 100644 index e43ba1c4..00000000 --- a/training/dtrain/test/parallelize/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini deleted file mode 100644 index 03f9d240..00000000 --- a/training/dtrain/test/parallelize/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -decoder_config=cdec.ini diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz deleted file mode 100644 index 1e28a24b..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.0.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz deleted file mode 100644 index 372f5675..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.1.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz deleted file mode 100644 index 145d0dc0..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.2.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz deleted file mode 100644 index 105593ff..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.3.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz deleted file mode 100644 index 30781f48..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.4.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz deleted file mode 100644 index 834ee759..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.5.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz deleted file mode 100644 index 2e76f348..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.6.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz deleted file mode 100644 index 3741a887..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.7.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz deleted file mode 100644 index ebf6bd0c..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.8.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz deleted file mode 100644 index c1791059..00000000 Binary files a/training/dtrain/test/parallelize/g/grammar.out.9.gz and /dev/null differ diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in deleted file mode 100644 index 3b7dec39..00000000 --- a/training/dtrain/test/parallelize/in +++ /dev/null @@ -1,10 +0,0 @@ -europas nach rassen geteiltes haus -ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . -der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . -während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . -eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . -die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . -das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . -die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . -der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . -genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs deleted file mode 100644 index 632e27b0..00000000 --- a/training/dtrain/test/parallelize/refs +++ /dev/null @@ -1,10 +0,0 @@ -europe 's divided racial house -a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . -the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . -while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . -an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . -mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . -it will not , as america 's racial history clearly shows . -race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . -the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . -this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini deleted file mode 100644 index 98b02d44..00000000 --- a/training/dtrain/test/toy/cdec.ini +++ /dev/null @@ -1,2 +0,0 @@ -formalism=scfg -add_pass_through_rules=true diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini deleted file mode 100644 index a091732f..00000000 --- a/training/dtrain/test/toy/dtrain.ini +++ /dev/null @@ -1,12 +0,0 @@ -decoder_config=test/toy/cdec.ini -input=test/toy/input -output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough -k=4 -N=4 -epochs=2 -scorer=bleu -sample_from=kbest -filter=uniq -pair_sampling=all -learning_rate=1 diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input deleted file mode 100644 index 4d10a9ea..00000000 --- a/training/dtrain/test/toy/input +++ /dev/null @@ -1,2 +0,0 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -- cgit v1.2.3 From ce2f5608e15a3d3e080ab4b26b5f263fead215e2 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 12:46:03 +0100 Subject: make perceptron automatically faster --- training/dtrain/dtrain.cc | 36 ++++++++++++++++++++---------------- training/dtrain/pairsampling.h | 21 ++++++--------------- 2 files changed, 26 insertions(+), 31 deletions(-) (limited to 'training') diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index fcb46db2..2bb4ec98 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -6,7 +6,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) { po::options_description ini("Configuration File Options"); ini.add_options() - ("input", po::value()->default_value("-"), "input file") + ("input", po::value()->default_value("-"), "input file (src)") + ("refs,r", po::value(), "references") ("output", po::value()->default_value("-"), "output weights file, '-' for STDOUT") ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") ("decoder_config", po::value(), "configuration file for cdec") @@ -33,8 +34,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") ("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.") - ("refs,r", po::value(), "references in local mode") - ("noup", po::value()->zero_tokens(), "do not update weights"); + ("noup", po::value()->zero_tokens(), "do not update weights") + ("pair_stats", po::value()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0"); po::options_description cl("Command Line Options"); cl.add_options() ("config,c", po::value(), "dtrain config file") @@ -124,6 +125,10 @@ main(int argc, char** argv) vector print_weights; if (cfg.count("print_weights")) boost::split(print_weights, cfg["print_weights"].as(), boost::is_any_of(" ")); + bool pair_stats = false; + if (cfg.count("pair_stats")) pair_stats = true; + bool faster_perceptron = false; + if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true; // setup decoder register_feature_functions(); @@ -346,25 +351,26 @@ main(int argc, char** argv) // get pairs vector > pairs; if (pair_sampling == "all") - all_pairs(samples, pairs, pair_threshold, max_pairs); + all_pairs(samples, pairs, pair_threshold, max_pairs, faster_perceptron); if (pair_sampling == "XYX") - partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo); + partXYX(samples, pairs, pair_threshold, max_pairs, faster_perceptron, hi_lo); if (pair_sampling == "PRO") PROsampling(samples, pairs, pair_threshold, max_pairs); npairs += pairs.size(); for (vector >::iterator it = pairs.begin(); it != pairs.end(); it++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - bool rank_error = true; // pair sampling already did this for us - rank_errors++; - score_t margin = std::numeric_limits::max(); -#else - bool rank_error = it->first.model <= it->second.model; + bool rank_error; + score_t margin; + if (faster_perceptron) { // we only have considering misranked pairs + rank_error = true; // pair sampling already did this for us + margin = std::numeric_limits::max(); + } else { + rank_error = it->first.model <= it->second.model; + margin = fabs(fabs(it->first.model) - fabs(it->second.model)); + if (!rank_error && margin < loss_margin) margin_violations++; + } if (rank_error) rank_errors++; - score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model)); - if (!rank_error && margin < loss_margin) margin_violations++; -#endif if (scale_bleu_diff) eta = it->first.score - it->second.score; if (rank_error || margin < loss_margin) { SparseVector diff_vec = it->first.f - it->second.f; @@ -458,10 +464,8 @@ main(int argc, char** argv) cerr << _np << npairs/(float)in_sz << endl; cerr << " avg # rank err: "; cerr << rank_errors/(float)in_sz << endl; -#ifndef DTRAIN_FASTER_PERCEPTRON cerr << " avg # margin viol: "; cerr << margin_violations/(float)in_sz << endl; -#endif cerr << " non0 feature count: " << nonz << endl; cerr << " avg list sz: " << list_sz/(float)in_sz << endl; cerr << " avg f count: " << f_count/(float)list_sz << endl; diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h index 84be1efb..3f67e209 100644 --- a/training/dtrain/pairsampling.h +++ b/training/dtrain/pairsampling.h @@ -19,7 +19,7 @@ cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) } inline void -all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, bool misranked_only, float _unused=1) { sort(s->begin(), s->end(), cmp_hyp_by_score_d); unsigned sz = s->size(); @@ -27,6 +27,7 @@ all_pairs(vector* s, vector >& training, sc unsigned count = 0; for (unsigned i = 0; i < sz-1; i++) { for (unsigned j = i+1; j < sz; j++) { + if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -51,7 +52,7 @@ all_pairs(vector* s, vector >& training, sc */ inline void -partXYX(vector* s, vector >& training, score_t threshold, unsigned max, float hi_lo) +partXYX(vector* s, vector >& training, score_t threshold, unsigned max, bool misranked_only, float hi_lo) { unsigned sz = s->size(); if (sz < 2) return; @@ -64,9 +65,7 @@ partXYX(vector* s, vector >& training, scor unsigned count = 0; for (unsigned i = 0; i < sep_hi; i++) { for (unsigned j = sep_hi; j < sz; j++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - if ((*s)[i].model <= (*s)[j].model) { -#endif + if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -78,9 +77,6 @@ partXYX(vector* s, vector >& training, scor b = true; break; } -#ifdef DTRAIN_FASTER_PERCEPTRON - } -#endif } if (b) break; } @@ -88,9 +84,7 @@ partXYX(vector* s, vector >& training, scor while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; for (unsigned i = sep_hi; i < sz-sep_lo; i++) { for (unsigned j = sz-sep_lo; j < sz; j++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - if ((*s)[i].model <= (*s)[j].model) { -#endif + if (misranked_only && !((*s)[i].model <= (*s)[j].model)) continue; if (threshold > 0) { if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) training.push_back(make_pair((*s)[i], (*s)[j])); @@ -99,9 +93,6 @@ partXYX(vector* s, vector >& training, scor training.push_back(make_pair((*s)[i], (*s)[j])); } if (++count == max) return; -#ifdef DTRAIN_FASTER_PERCEPTRON - } -#endif } } } @@ -119,7 +110,7 @@ _PRO_cmp_pair_by_diff_d(pair a, pair b return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); } inline void -PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0) { unsigned max_count = 5000, count = 0, sz = s->size(); bool b = false; -- cgit v1.2.3 From d2b1c3d182863b7d39d22b589661d71608bebac8 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 16:06:05 +0100 Subject: fix --- training/dtrain/dtrain.cc | 18 +- training/dtrain/dtrain.h | 3 - training/dtrain/examples/standard/dtrain.ini | 8 +- training/dtrain/examples/standard/expected-output | 1163 +-------------------- 4 files changed, 39 insertions(+), 1153 deletions(-) (limited to 'training') diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index 2bb4ec98..149f87d4 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -34,8 +34,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") ("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.") - ("noup", po::value()->zero_tokens(), "do not update weights") - ("pair_stats", po::value()->zero_tokens(), "stats about correctly ranked/misranked pairs even if loss_margin=0 and gamma=0"); + ("noup", po::value()->zero_tokens(), "do not update weights"); po::options_description cl("Command Line Options"); cl.add_options() ("config,c", po::value(), "dtrain config file") @@ -125,10 +124,7 @@ main(int argc, char** argv) vector print_weights; if (cfg.count("print_weights")) boost::split(print_weights, cfg["print_weights"].as(), boost::is_any_of(" ")); - bool pair_stats = false; - if (cfg.count("pair_stats")) pair_stats = true; - bool faster_perceptron = false; - if (gamma==0 && loss_margin==0 && !pair_stats) faster_perceptron = true; + // setup decoder register_feature_functions(); @@ -185,6 +181,11 @@ main(int argc, char** argv) weight_t eta = cfg["learning_rate"].as(); weight_t gamma = cfg["gamma"].as(); + // faster perceptron: consider only misranked pairs, see + // DO NOT ENABLE WITH SVM (gamma > 0) OR loss_margin! + bool faster_perceptron = false; + if (gamma==0 && loss_margin==0) faster_perceptron = true; + // l1 regularization bool l1naive = false; bool l1clip = false; @@ -232,6 +233,7 @@ main(int argc, char** argv) else cerr << setw(25) << "learning rate " << "bleu diff" << endl; cerr << setw(25) << "gamma " << gamma << endl; cerr << setw(25) << "loss margin " << loss_margin << endl; + cerr << setw(25) << "faster perceptron " << faster_perceptron << endl; cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl; if (pair_sampling == "XYX") cerr << setw(25) << "hi lo " << hi_lo << endl; @@ -461,7 +463,9 @@ main(int argc, char** argv) cerr << _np << " 1best avg model score: " << model_avg; cerr << _p << " (" << model_diff << ")" << endl; cerr << " avg # pairs: "; - cerr << _np << npairs/(float)in_sz << endl; + cerr << _np << npairs/(float)in_sz; + if (faster_perceptron) cerr << " (meaningless)"; + cerr << endl; cerr << " avg # rank err: "; cerr << rank_errors/(float)in_sz << endl; cerr << " avg # margin viol: "; diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h index f368d810..eb0b9f17 100644 --- a/training/dtrain/dtrain.h +++ b/training/dtrain/dtrain.h @@ -1,9 +1,6 @@ #ifndef _DTRAIN_H_ #define _DTRAIN_H_ -#undef DTRAIN_FASTER_PERCEPTRON // only consider actually misranked pairs - // DO NOT ENABLE WITH SVM (gamma > 0) OR loss_margin! - #define DTRAIN_DOTS 10 // after how many inputs to display a '.' #define DTRAIN_SCALE 100000 diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini index a05e9c29..e1072d30 100644 --- a/training/dtrain/examples/standard/dtrain.ini +++ b/training/dtrain/examples/standard/dtrain.ini @@ -1,12 +1,12 @@ input=./nc-wmt11.de.gz refs=./nc-wmt11.en.gz output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=avg # output average (over epochs) weight vector +select_weights=VOID # output average (over epochs) weight vector decoder_config=./cdec.ini # config for cdec # weights for these features will be printed on each iteration -print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough # newer version of the grammar extractor use different feature names: -#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +#print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV stop_after=10 # stop epoch after 10 inputs # interesting stuff @@ -21,4 +21,4 @@ filter=uniq # only unique entries in kbest (surface form) pair_sampling=XYX # hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here pair_threshold=0 # minimum distance in BLEU (here: > 0) -loss_margin=0 +loss_margin=0 # update if correctly ranked, but within this margin diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output index 8d72f4c3..7cd09dbf 100644 --- a/training/dtrain/examples/standard/expected-output +++ b/training/dtrain/examples/standard/expected-output @@ -4,7 +4,7 @@ Reading ./nc-wmt11.en.srilm.gz ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 **************************************************************************************************** Example feature: Shape_S00000_T00000 -Seeding random number sequence to 1511823303 +Seeding random number sequence to 2679584485 dtrain Parameters: @@ -17,10 +17,11 @@ Parameters: learning rate 1 gamma 0 loss margin 0 + faster perceptron 1 pairs 'XYX' hi lo 0.1 pair threshold 0 - select weights 'avg' + select weights 'VOID' l1 reg 0 'none' max pairs 4294967295 cdec cfg './cdec.ini' @@ -33,1174 +34,58 @@ Iteration #1 of 2. . 10 Stopping after 10 input sentences. WEIGHTS - EgivenFCoherent = +0 - SampleCountF = +0 - CountEF = +0 - MaxLexFgivenE = +0 - MaxLexEgivenF = +0 - IsSingletonF = +0 - IsSingletonFE = +0 Glue = -576 WordPenalty = +417.79 - PassThrough = -1455 LanguageModel = +5117.5 LanguageModel_OOV = -1307 + PhraseModel_0 = -1612 + PhraseModel_1 = -2159.6 + PhraseModel_2 = -677.36 + PhraseModel_3 = +2663.8 + PhraseModel_4 = -1025.9 + PhraseModel_5 = -8 + PhraseModel_6 = +70 + PassThrough = -1455 --- 1best avg score: 0.27697 (+0.27697) 1best avg model score: -47918 (-47918) - avg # pairs: 1129.8 + avg # pairs: 581.9 (meaningless) avg # rank err: 581.9 avg # margin viol: 0 non0 feature count: 703 avg list sz: 90.9 avg f count: 100.09 -(time 0.33 min, 2 s/S) +(time 0.25 min, 1.5 s/S) Iteration #2 of 2. . 10 WEIGHTS - EgivenFCoherent = +0 - SampleCountF = +0 - CountEF = +0 - MaxLexFgivenE = +0 - MaxLexEgivenF = +0 - IsSingletonF = +0 - IsSingletonFE = +0 Glue = -622 WordPenalty = +898.56 - PassThrough = -2578 LanguageModel = +8066.2 LanguageModel_OOV = -2590 + PhraseModel_0 = -4335.8 + PhraseModel_1 = -5864.4 + PhraseModel_2 = -1729.8 + PhraseModel_3 = +2831.9 + PhraseModel_4 = -5384.8 + PhraseModel_5 = +1449 + PhraseModel_6 = +480 + PassThrough = -2578 --- 1best avg score: 0.37119 (+0.094226) 1best avg model score: -1.3174e+05 (-83822) - avg # pairs: 1214.9 + avg # pairs: 584.1 (meaningless) avg # rank err: 584.1 avg # margin viol: 0 non0 feature count: 1115 avg list sz: 91.3 avg f count: 90.755 -(time 0.27 min, 1.6 s/S) +(time 0.3 min, 1.8 s/S) Writing weights file to '-' ... -R:X:NX_sein:N1_its 61.5 -WordPenalty 658.17328732437022 -LanguageModel 6591.8747593425214 -LanguageModel_OOV -1948.5 -R:X:das_NX:this_N1 12 -R:X:NX_sein_NX:N1_from_ever_being_able_to_N2 30 -R:X:NX_bemühen:N1_effort 2.5 -RBS:X_bemühen 2.5 -R:X:sich:sich -17.5 -RBT:_sich -17.5 -RBT:sich_ -17.5 -RBS:sich_X 17.5 -RBS:_als 147 -RBS:als_ -59 -Shape_S10000_T10000 -1711.5 -RBT:_when 84 -R:X:zum_NX:as_N1 -134 -RBS:_zum -30 -R:X:als_NX:as_N1 63 -R:X:zum_NX:'s_N1 33 -R:X:zum_NX:the_N1 24 -RBS:X_sich -12 -R:X:zum_NX:to_N1 -36 -R:X:zum_NX:with_the_N1 83 -R:X:NX_zum:N1_the -66 -R:X:NX_zum:N1_to 66 -R:X:als_NX:when_N1 84 -RBS:als_das 59 -RBS:X_das -104 -R:X:NX_das:N1_a 28.5 -R:X:er_sich_NX:he_N1 86.5 -RBS:er_sich 29.5 -R:X:NX_das:N1_it -6 -R:X:er_sich_NX:him_N1 -57 -RBT:_declared -488 -R:X:NX_das:N1_that -5 -RBT:declared_ -8 -R:X:NX_das:N1_the -57 -R:X:NX_das:N1_this -17 -R:X:NX_.:N1_. -323 -RBS:X_. 134 -R:X:NX_.:N1_debate_. 6.5 -R:X:NX_.:N1_disruptions_. -14.5 -R:X:NX_.:N1_established_. 7.5 -R:X:NX_.:N1_heading_. 17 -R:X:NX_.:N1_on_. 94 -R:X:NX_.:N1_pace_. 51.5 -R:X:NX_das_NX:N1_a_growing_N2 -45 -R:X:general:general -23.5 -R:X:NX_.:N1_politics_. 84 -R:X:NX_das_NX:N1_a_N2 -0.5 -R:X:NX_.:N1_power_. -99.5 -RBS:general_ -23.5 -R:X:NX_.:N1_-_range_missiles_. -28.5 -Shape_S11000_T11000 40 -RBT:general_ -23.5 -RBT:_. -645 -R:X:betrat:entered -91 -R:X:NX_.:N1_war_. 68.5 -RBS:_betrat 23.5 -Shape_S11000_T01100 475.5 -RBT:_entered -91 -RBT:entered_ -91 -R:X:NX_das_NX:N1_the_N2 -2 -R:X:betrat:betrat 114.5 -RBT:_betrat 114.5 -RBT:betrat_ 114.5 -R:X:12:12 79 -R:X:maßnahmen:action 24 -R:X:.:. -566 -RBS:12_ 79 -RBS:_maßnahmen -44.5 -RBS:_. -645 -RBT:._ -566 -RBT:_action 24 -RBT:12_ 79 -RBT:action_ 24 -R:X:maßnahmen:actions -13 -RBT:_actions -13 -RBT:actions_ -13 -R:X:12_NX:12_N1 -79 -RBT:declared_a -428 -RBS:12_X -79 -RBT:a_state -428 -RBT:state_of -428 -R:X:maßnahmen:maßnahmen -55.5 -R:X:internationale_NX:global_N1 -270 -RBS:X_am 316.5 -RBT:_maßnahmen -55.5 -RBS:am_ 267.5 -RBT:maßnahmen_ -55.5 -RBS:_den 883 -R:X:internationale_NX:international_N1 270 -RBS:den_X -286.5 -R:X:NX_am:N1_of 267.5 -R:X:NX_als:N1_a -273.5 -RBS:am_X -281 -R:X:den_NX:'s_N1 -31 -R:X:NX_am_NX:N1_of_N2 -30 -R:X:NX_am_NX:N1_on_N2 79 -R:X:NX_als:N1_'s 273.5 -R:X:NX_betrat:N1_entered -23.5 -R:X:ins_NX:into_the_N1 -32.5 -RBS:X_betrat -23.5 -RBT:into_the -55 -R:X:ins_NX:into_N1 32.5 -RBT:_their 303 -R:X:general_NX:general_N1 23.5 -RBS:general_X 23.5 -RBS:_am -316.5 -R:X:den_NX:the_N1 89 -R:X:den_NX_.:the_N1_. 86.5 -R:X:NX_und:and_N1 -216 -RBS:X_und -203.5 -RBS:und_ 522.5 -RBT:_and 438.5 -R:X:am_NX:at_N1 23 -R:X:NX_als_das:N1_than_the 59 -R:X:NX_und:N1_- -114 -R:X:NX_und:N1_, 114 -R:X:am_NX:of_N1 -4 -R:X:am_NX:on_N1 -158.5 -R:X:am_NX:the_N1 -190 -RBS:_seine -16.5 -RBS:seine_ 39 -R:X:oktober:october -79.5 -R:X:seine:his -5.5 -RBS:_oktober -79.5 -R:X:seine:its 50 -RBT:_october -79.5 -RBT:october_ -79.5 -R:X:seine_NX:a_N1 7.5 -RBS:seine_X -39 -R:X:NX_und_NX:and_N1_N2 -22 -RBS:und_X 160.5 -R:X:seine_NX:his_N1 -97 -R:X:seine_NX:its_N1 102.5 -R:X:NX_und_NX:N1_,_and_N2 -4 -R:X:NX_maßnahmen:N1_actions 44.5 -RBS:X_maßnahmen 44.5 -R:X:seine_NX_als:his_N1_than 5.5 -R:X:seine_NX_als:its_N1_as -64.5 -R:X:NX_und_NX:N1_,_N2 -7 -Shape_S01100_T11000 -312.5 -RBS:und_den -822.5 -Shape_S01100_T01100 -537.5 -Shape_S01100_T11100 15 -R:X:NX_seine:'s_N1 -5.5 -RBS:X_seine 16.5 -RBS:X_den -38 -R:X:amerika_NX_sich_NX:america_N1_N2 -12 -R:X:NX_seine_NX:'s_N1_N2 22 -R:X:auf_NX_den_NX:to_N1_the_N2 -23 -R:X:auf_NX_den_NX:to_N1_N2 -23 -RBS:_unterstützen -716 -RBS:unterstützen_ -1 -Shape_S11100_T11000 783.5 -Shape_S11100_T01100 -716 -Shape_S11100_T11100 488 -R:X:unterstützen:unterstützen -1 -RBT:_unterstützen -1 -RBT:unterstützen_ -1 -R:X:unterstützen_NX:support_N1 -715 -RBS:unterstützen_X -715 -RBT:_will -6 -RBS:X_unterstützen 716 -RBT:_if 35 -R:X:NX_den_NX_.:N1_N2_. 41 -R:X:verfassung:constitution 15 -RBS:_verfassung -43 -RBT:_constitution 15 -RBT:constitution_ 15 -R:X:verfassung:constitutional 9.5 -RBT:_constitutional 9.5 -RBS:unterstützen_. 716 -RBT:constitutional_ 9.5 -R:X:NX_unterstützen_.:N1_. 716 -R:X:verfassung:verfassung -67.5 -R:X:eine_NX:an_N1 162 -RBT:_verfassung -67.5 -RBT:verfassung_ -67.5 -R:X:und:, -21.5 -R:X:,_NX_zu_NX:to_N2_N1 -153 -RBS:_und -389.5 -R:X:und:and -35 -RBS:angeführten_ -716 -RBT:and_ -35 -RBT:_as 63 -RBS:versucht_ 68 -R:X:und:with -3 -R:X:eine_NX:is_N1 -162 -RBS:angeführten_X 716 -R:X:und:und 91 -RBT:_und 91 -RBT:und_ 91 -R:X:versucht:tried 68 -RBT:tried_ 68 -RBS:versucht_X -68 -R:X:versucht_NX:tried_N1 -68 -R:X:und_NX:and_N1 250 -R:X:und_NX:with_N1 -18 -R:X:und_NX:,_N1 -7 -R:X:und_NX:N1_and -12 -R:X:und_den_NX:and_N1 -716 -R:X:er:he 17 -R:X:NX_eine:N1_is -7 -RBS:_er -47.5 -RBS:er_ 54 -RBT:_he 485.5 -RBT:he_ 17 -RBT:_him -1 -R:X:und_NX_.:,_N1_. -3 -R:X:er:his 91 -R:X:und_den_NX_.:and_the_N1_. 88 -R:X:NX_eine:N1_will 7 -R:X:er:it 3 -R:X:und_den_NX_.:and_N1_. -216.5 -R:X:er:er -196 -RBT:_er -196 -RBT:er_ -196 -RBS:er_X 8 -R:X:er_NX:he_N1 399 -R:X:er_NX:it_N1 -379 -Shape_S01010_T01010 -599 -RBS:pakistanischen_ 43 -R:X:NX_versucht:N1_tried 196 -RBT:_pakistan -43 -RBT:_pakistani 2 -R:X:er_NX_,_NX:he_N1_N2 -12 -R:X:NX_hat_er:N1_,_he_has 196 -RBS:hat_er 196 -R:X:NX_er:he_N1 -17 -RBS:X_er -148.5 -RBS:pakistanischen_X -43 -R:X:NX_er:it_N1 -7 -RBS:X_verfassung 43 -R:X:NX_verfassung:N1_'s_constitution 43 -R:X:NX_hat_NX_versucht:N1_N2_has_tried -190 -R:X:NX_hat_NX_versucht:N1_,_N2_has_tried -6 -RBS:der_pakistanischen 43 -RBS:X_pakistanischen -43 -RBS:_aber 46 -RBS:,_als -147 -RBT:_but -321 -R:X:aber_NX:but_N1 46 -R:X:von_NX_angeführten:N1_-_led -716 -R:X:von_NX_angeführten_NX:N1_-_led_N2 716 -RBS:,_aber -114 -RBS:X_aber 68 -R:X:,_als_NX:,_as_N1 -40 -R:X:NX_aber_NX_,:N1_N2_to 68 -R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_. -43 -R:X:NX_,_aber_NX:N1_,_N2 -114 -RBS:_rahmen 43 -RBS:rahmen_ 43 -R:X:rahmen:within 20 -R:X:rahmen:rahmen 23 -RBT:_rahmen 23 -RBT:rahmen_ 23 -Shape_S01110_T11010 35.5 -R:X:NX_der_pakistanischen:N1_pakistan 43 -Shape_S01110_T01110 -1195 -Shape_S01110_T11110 -6.5 -R:X:NX_,_NX_er:N1_N2_he -33 -RBS:geben_X -577.5 -RBS:_gestalten 196 -Shape_S01110_T01011 278 -RBS:gestalten_ 196 -RBS:geben_und 577.5 -R:X:gestalten:more 221 -Shape_S01110_T01111 -181.5 -RBT:_more 221 -RBT:more_ 221 -R:X:gestalten:gestalten -25 -RBT:_gestalten -25 -RBT:gestalten_ -25 -R:X:effektiver:effectively -151 -RBS:_effektiver 54 -RBS:effektiver_ -221 -RBT:_effectively -151 -RBT:effectively_ -151 -R:X:effektiver:effektiver -99 -RBT:_effektiver -99 -RBT:effektiver_ -99 -Shape_S11110_T11010 -1130 -RBS:zu_geben -107.5 -R:X:effektiver_zu_NX:N1_effectively 304 -RBS:effektiver_zu 221 -RBS:X_geben 107.5 -Shape_S11110_T01110 621 -Shape_S11110_T11110 -75 -RBS:X_gestalten -196 -R:X:NX_gestalten_.:N1_. -196 -RBS:gestalten_. -196 -R:X:terror:terror 672 -RBS:_terror -16 -RBS:terror_ 640 -R:X:den:- -4 -RBT:_terror 136 -RBT:terror_ 646 -RBS:den_ 42.5 -R:X:den:for -11.5 -R:X:terror:terrorism -54 -RBT:_terrorism -54 -Shape_S11110_T11011 -4.5 -RBT:terrorism_ -54 -R:X:terror_NX:terror_N1 -634 -R:X:den:of -17 -RBS:terror_X -640 -R:X:den:'s 32.5 -Shape_S11110_T01111 -1.5 -R:X:NX_effektiver:N1_more_effectively 29 -RBS:X_effektiver -54 -R:X:den:the 68 -R:X:NX_geben_und:N1_and 107.5 -R:X:NX_effektiver_zu_NX:N1_N2_effectively -83 -R:X:den:to -33 -RBS:1999_ -302.5 -R:X:,_NX_zu_geben_NX:to_N1_N2 -577.5 -R:X:den:with -10 -RBS:X_terror -4.5 -R:X:,_NX_zu_geben_und:to_N1_and 470 -R:X:NX_1999:N1_1999 -302.5 -R:X:NX_1999_NX:N2_N1_1999 302.5 -RBS:1999_X 302.5 -R:X:den_NX_zu:to_N1 783.5 -R:X:NX_rahmen_der:N1_the -43 -RBS:X_rahmen -43 -RBS:rahmen_der -43 -RBS:gegen_ 22.5 -R:X:gegen:against -2 -RBT:_against -2 -RBT:against_ -2 -R:X:._NX:._N1 -79 -RBS:._X -79.5 -RBS:gegen_den -22.5 -R:X:NX_._oktober:october_N1 79.5 -RBS:._oktober 79.5 -R:X:am_NX_._NX:the_N2_N1 -0.5 -R:X:gegen_den_NX:on_N1 2 -RBS:den_terror 20.5 -RBT:on_terror -26 -R:X:NX_den_terror:the_N1_terror 29 -R:X:den_NX_den_NX:the_N1_N2 -110.5 -R:X:den_NX_den_NX:N2_the_N1 -95 -RBT:_the -1.5 -R:X:krieg:war -4.5 -RBS:_krieg -22 -R:X:musharraf:musharraf 43 -RBS:krieg_ -4.5 -RBT:_war -22 -RBS:_musharraf 66.5 -RBS:musharraf_ -23.5 -RBT:war_ -4.5 -R:X:musharraf_NX:musharraf_imposed_N1 23.5 -RBS:musharraf_X 23.5 -RBT:musharraf_imposed 23.5 -RBS:krieg_gegen 4.5 -R:X:musharraf_NX:musharraf_N1 107 -R:X:krieg_gegen:war_on 24.5 -RBT:war_on -17.5 -RBS:X_gegen -4.5 -R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2 -20 -R:X:musharraf_NX_,_als_NX:musharraf_N1_N2 -87 -R:X:krieg_gegen_den_NX:war_on_N1 -16 -R:X:krieg_gegen_den_terror:war_on_terror -26 -R:X:pervez:pervez 22 -RBS:_pervez 22 -RBS:pervez_ 57.5 -RBS:X_krieg 22 -RBT:_pervez 22 -RBT:pervez_ 22 -RBS:pervez_musharraf -57.5 -RBS:X_musharraf -9 -R:X:NX_musharraf:N1_musharraf -9 -R:X:den_NX_gegen_den:the_N1_on -4.5 -R:X:den_NX_den_terror:the_N1_terror -3 -R:X:NX_krieg_gegen_den_terror:N1_war_on_terror 22 -R:X:den_NX_den_terror_NX:N2_the_N1_terror -1.5 -RBT:_project 91 -RBS:hat_ 2 -RBS:X_- 14 -R:X:NX_-:,_N1 48.5 -R:X:NX_-:N1_months_of 32 -R:X:NX_-:N1_relief_and 64 -R:X:NX_-:N1_'s -144.5 -RBS:hat_X -198 -R:X:und_NX_terror_NX:and_N2_N1_terror -4.5 -RBT:and_ -4.5 -R:X:sorgen:bring -19 -RBS:X_pervez -22 -RBT:_bring -19 -RBT:bring_ -19 -R:X:sorgen:ensure 19 -RBT:_ensure 19 -RBT:ensure_ 19 -R:X:NX_-_NX:N1_N2_security -4 -R:X:NX_projekt_NX:N2_N1_project -156 -R:X:NX_-_NX_.:N1_N2_. 18 -R:X:NX_projekt_NX_.:N2_N1_project_. 156 -RBS:_- -14 -RBT:to_ensure 0.5 -R:X:NX_hat:has_N1 -5 -R:X:NX_hat:N1_, 3 -R:X:NX_hat:,_N1 21.5 -R:X:NX_hat:N1_has -17 -R:X:NX_hat:N1_is -0.5 -R:X:-_NX:of_N1 -26 -R:X:-_NX:'s_N1 -58 -R:X:NX_hat_NX:N1_,_N2 -73 -R:X:NX_hat_NX:N1_N2_has 28 -R:X:-_NX:-_N1 122 -R:X:NX_hat_NX:N1_,_N2_has 21 -R:X:-_NX:--_N1 -21 -R:X:-_NX:,_N1 -31 -R:X:stabilität:stability -118 -RBS:_stabilität -129 -RBT:_stability -118 -RBT:stability_ -118 -R:X:stabilität:stabilität -11 -RBT:_stabilität -11 -RBT:stabilität_ -11 -RBT:_country 253 -RBS:_für 101 -RBS:für_ 129 -RBS:X_ihres -16 -R:X:NX_ihres_NX:N1_of_their_N2 -16 -R:X:für:that 129 -RBT:_political -16 -RBS:für_X -129 -R:X:,_NX_und_NX:,_N1_N2 -2 -R:X:für_NX:to_N1 -28 -R:X:NX_stabilität:N1_stability 129 -RBS:X_stabilität 129 -RBS:X_für 22 -RBT:_with -109 -RBS:,_für -123 -R:X:,_für_NX:,_N1 15.5 -R:X:,_NX_den_NX_zu:to_N2_N1 69 -R:X:NX_für_NX_.:N1_N2_. 22 -RBS:_ihres 16 -R:X:ihres_NX:its_N1 -50 -R:X:ihres_NX:their_N1 66 -R:X:NX_zu_verkaufen_NX:sell_N1_N2 140.5 -RBS:verkaufen_X 140.5 -RBS:_würde -204 -RBS:würde_ -117 -R:X:würde:would -204 -RBS:würde_X 126 -R:X:in_NX_hat_NX:in_N1_N2 22 -R:X:NX_dem_NX_pervez:N1_N2_pervez 35.5 -RBS:_halten 284 -RBS:halten_ 204 -R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf -57.5 -Shape_S01111_T01011 560.5 -Shape_S01111_T11011 -20.5 -Shape_S01111_T01111 -5 -RBT:_maintain 30 -R:X:halten:halten 284 -RBT:_halten 284 -RBT:halten_ 284 -RBS:halten_X -204 -R:X:NX_würde:if_N1 35 -RBS:X_würde 204 -R:X:NX_würde:will_N1 -6 -Shape_S11111_T11010 69 -R:X:NX_würde:would_face_a_N1 -9.5 -RBT:would_face -18.5 -RBT:face_a -18.5 -Shape_S11111_T11110 -57 -R:X:NX_würde:would_N1 78 -R:X:NX_würde:N1_will -10.5 -R:X:NX_würde_NX:would_N1_N2 126 -R:X:NX_würde_.:would_face_a_N1_. -9 -RBS:würde_. -9 -PhraseModel_0 -2973.8953021225416 -R:X:vielleicht:may -177 -PhraseModel_1 -4012.0052074229625 -PhraseModel_2 -1203.5725821427027 -RBS:vielleicht_ -284 -PhraseModel_3 2747.8420998127522 -PhraseModel_4 -3205.3163436680484 -PhraseModel_5 720.5 -PhraseModel_6 275 -R:X:vielleicht:vielleicht -107 -RBT:_vielleicht -107 -RBT:vielleicht_ -107 -R:X:vielleicht_NX:perhaps_N1 284 -RBS:vielleicht_X 284 -R:X:NX_halten:maintain_the_N1 -29 -RBS:X_halten -284 -RBT:maintain_the -174 -R:X:NX_halten:N1_hold -51 -R:X:NX_halten_NX:N2_maintain_the_N1 -204 -RBT:_maintain -204 -RBS:_versprechen 30 -RBS:versprechen_ -75 -RBT:_commitment 107 -R:X:versprechen_NX:commitment_N1 30 -RBS:versprechen_X 75 -R:X:NX_versprechen:N1_commitment -75 -RBS:X_versprechen -30 -R:X:NX_,_für_NX:N1_,_N2 -138.5 -R:X:NX_versprechen_NX:N1_commitment_N2 45 -RBS:_dass -451 -RBS:dass_ -91.5 -R:X:dass_NX:that_N1 -451 -RBS:dass_X 91.5 -R:X:NX_er_sein:N1_to_make_up_for_his -91.5 -RBS:er_sein -91.5 -R:X:seine_NX_und:a_N1_, -15 -R:X:NX_,_NX_und:N1_N2_, 129 -RBS:,_dass 851.5 -R:X:NX_,_dass:N1_keep -27 -R:X:NX_,_dass:N1_said_that -0.5 -R:X:NX_,_dass:N1_to_let -9.5 -R:X:NX_dass:that_N1 -8.5 -RBS:X_dass -400.5 -R:X:NX_dass:N1_let -51.5 -R:X:NX_dass:N1_see -243.5 -R:X:NX_dass:N1_thought -97 -R:X:NX_,_dass_NX:N1_that_N2 134 -Glue -599 -PassThrough -2016.5 -R:X:musharrafs:his 2 -RBS:musharrafs_ -29 -R:X:NX_und_den:N1_and_the 22 -RBT:_his 250.5 -RBT:his_ 160.5 -R:X:musharrafs:musharraf -1.5 -RBT:_musharraf 135.5 -RBT:musharraf_ 41.5 -R:X:NX_,_dass_NX_.:N1_N2_. 91.5 -R:X:musharrafs:musharrafs -29.5 -RBT:_musharrafs -29.5 -RBT:musharrafs_ -29.5 -RBS:sie_X 346 -RBS:_X -1369.5 -R:X:dies:so -74.5 -RBS:X_ -1743 -RBS:dies_ -348 -R:X:dies:so_,_this 47 -RBT:so_, 47 -R:X:sie_NX:it_N1 22 -RBT:,_this 47 -R:X:dies:that -256.5 -R:X:NX_?:N1_? -134.5 -R:X:dies:these -5.5 -RBS:X_? -235 -RBT:_these -5.5 -RBT:these_ -5.5 -R:X:NX_?:N1_consulting_? -100.5 -R:X:dies:this -58.5 -R:X:letzter_NX:last_N1 -14 -RBS:_letzter -20 -RBS:letzter_X 19.5 -RBT:_last -2 -R:X:letzter:last 7 -RBS:letzter_ -19.5 -R:X:sein:be 1.5 -RBT:last_ 7 -R:X:letzter:late 11.5 -RBT:_they -6 -RBS:sein_ 68 -RBT:_late 11.5 -R:X:ist_NX:be_N1 464.5 -RBT:_be -10.5 -RBT:late_ 11.5 -R:X:sie_NX:they_N1 -22 -RBS:_ist 415.5 -RBT:be_ 120 -R:X:letzter:letzter -24.5 -RBS:ist_X 8 -R:X:sein:being -16 -RBT:_letzter -24.5 -R:X:ist_NX:has_N1 16 -RBT:_being -79 -RBT:letzter_ -24.5 -R:X:ist_NX:is_at_N1 6 -RBT:being_ -16 -R:X:musharrafs_NX:his_N1 -25 -R:X:sein:his 73 -RBS:musharrafs_X 29 -R:X:ist_NX:is_well_N1 6 -R:X:sein:its -15.5 -R:X:musharrafs_NX:musharraf_'s_N1 77.5 -R:X:sein:sein 55 -RBT:musharraf_'s 55.5 -R:X:ist_NX:is_N1 23 -RBT:_sein 55 -R:X:musharrafs_NX:musharraf_N1 -23.5 -R:X:ist_NX:more_N1 -130.5 -RBT:sein_ 55 -R:X:NX_letzter:N1_late -26.5 -R:X:ist_NX:N1_be 176 -R:X:ziel:aim -32.5 -RBS:X_letzter 20 -R:X:ist_NX:N1_has -67 -RBS:_ziel -143 -R:X:NX_letzter:N1_'s_last 13 -R:X:ist_NX:N1_is -19 -RBS:ziel_ -219 -R:S:NS_NX:N1_N2 -599 -R:X:ist_NX:N1_,_is 18 -RBT:_aim -32.5 -RBS:_S -599 -R:X:ist_NX:N1_it_is 49 -RBT:aim_ -32.5 -RBS:S_X -599 -R:X:ist:are -65.5 -R:X:ziel:goal 45 -R:X:NX_letzter_NX:N1_'s_last_N2 33.5 -RBS:ist_ -8 -RBT:_goal 45 -R:X:?:? 235 -RBT:goal_ 45 -RBS:_? 235 -R:X:ziel:target -22.5 -RBT:_? 235 -RBS:X__ -347 -RBT:_target -22.5 -RBT:?_ 235 -RBT:target_ -22.5 -R:X:ist:'s -61 -R:X:ziel:targets -18 -RBS:in_ -22 -RBT:_targets -18 -RBT:targets_ -18 -RBT:_, 24.5 -R:X:ziel:ziel -125 -RBT:,_ -38 -R:X:NX___NX:N1___N2 -347 -R:X:dies_NX:so_N1 200 -RBT:_ziel -125 -RBS:dies_X 256 -RBT:ziel_ -125 -RBT:_at 23 -R:X:dies_NX:this_to_N1 156.5 -R:X:ziel_NX:goal_N1 49 -RBT:this_to 156.5 -RBS:ziel_X 219 -R:X:dies_NX:this_N1 -100.5 -R:X:ziel_NX:targets_N1 -19 -R:X:dies_ist:could_be 118.5 -R:X:ziel_NX:target_N1 -20 -RBS:dies_ist 92 -R:X:sein_NX:being_able_to_N1 -71.5 -RBT:in_ -65.5 -R:X:in:for 31 -RBT:_could 118.5 -RBS:sein_X -68 -RBT:could_be 118.5 -RBT:being_able -63 -RBT:_for 14.5 -RBT:able_to -63 -RBT:for_ 14.5 -R:X:sein_NX:be_N1 -10 -R:X:sein_NX:his_N1 184.5 -RBS:X_ist -507.5 -R:X:sein_NX:its_N1 -26.5 -R:X:in:in -53 -R:X:sein_NX:N1_be -174.5 -R:X:NX_ziel:N1_aim -32.5 -RBT:_in -75.5 -RBS:X_ziel 143 -R:X:NX_ziel:N1_goal 20 -R:X:NX_ziel:N1_target -26.5 -R:X:NX_ziel:N1_targets -27 -RBT:_into -270 -R:X:NX_ziel_NX:N1_goal_N2 60 -R:X:NX_ziel_NX:N1_targets_N2 -6 -R:X:NX_sie_NX_,_dass:N1_N2_that 346 -R:X:NX_ziel_NX:N1_target_N2 -6 -R:X:dies_ist_NX:this_is_N1 -26.5 -R:X:NX_ziel_NX:N2_N1_goal 161 -RBT:_of -38 -RBT:of_ -17 -R:X:NX_ist_NX:is_N1_N2 -129 -RBS:_die 428.5 -R:X:NX_ist_NX:is_N1_,_N2 16.5 -RBS:die_ -116 -RBT:_on -653.5 -RBT:on_ 84.5 -R:X:NX_ist_NX:'s_N1_N2 -41.5 -R:X:die:, -9 -RBT:_over 45 -R:X:die:a -5 -R:X:NX_ist_NX:N1_has_N2 -104.5 -R:X:blieben_NX:remained_N1 135 -R:X:die:an -123 -R:X:NX_ist_NX:N1_is_at_N2 -5.5 -RBS:_blieben 187.5 -R:X:NX_ist_NX:N1_is_well_N2 -5 -RBS:blieben_X -13 -RBT:_are -65.5 -RBT:_'s 16 -R:X:NX_ist_NX:N1_is_N2 -31 -RBT:are_ -65.5 -RBT:'s_ -28.5 -R:X:blieben_NX:N1_remained 81.5 -R:X:NX_ist_NX:N1_,_is_N2 59.5 -R:X:die:by -10 -R:X:die:its 302.5 -RBS:_pakistanis 57 -RBS:pakistanis_ 116.5 -RBT:_to 93.5 -RBT:_pakistanis 161 -R:X:NX_ist_NX:N1_N2_has -75 -R:X:die:the -28 -RBT:to_ 18 -R:X:NX_ist_NX:N1_N2_is -97.5 -R:X:pakistanis_NX:pakistanis_N1 57 -R:X:NX_ist_NX:N1_,_N2_is -1 -RBT:_those -6 -RBT:_within 20 -RBT:within_ 20 -RBS:pakistanis_X -116.5 -R:X:NX_blieben_NX:N1_,_N2_remained -229.5 -R:X:NX_ist_NX:N2_is_N1 -47 -RBS:X_blieben -187.5 -RBT:_is -21 -R:X:NX_pakistanis:pakistanis_,_N1 235.5 -RBS:X_pakistanis -57 -RBT:pakistanis_, 104 -R:X:NX_pakistanis:N1_pakistanis -119 -R:X:NX_ist_NX:N2_N1_is -46.5 -RBS:blieben_ 13 -RBT:_is -251 -R:X:blieben:blieben -29 -RBT:_blieben -29 -RBT:blieben_ -29 -R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2 -23 -RBS:_zu -560 -R:X:NX_pakistanis_NX:N1_pakistanis_N2 -150.5 -RBS:zu_X -717.5 -R:X:NX_blieben:N1_,_remained 42 -RBS:__ 347 -RBS:_ein 37.5 -RBS:ein_ -9.5 -RBS:der_ -88.5 -R:X:zu_NX:for_N1 43 -R:X:__NX:__N1 -97 -RBT:_- 113 -RBT:-_ -4 -R:X:__NX:,_N1 444 -R:X:zu_NX:in_N1 37.5 -RBT:_a -27.5 -RBT:a_ -5 -RBS:sie_ -346 -RBT:the_ 40 -R:X:zu_NX:to_N1 -716 -R:X:zu_NX:with_N1 40.5 -R:X:zu_NX:N1_on 30 -RBT:_the 324.5 -R:X:NX_sie:but_N1 -346 -RBS:X_ein -37.5 -RBT:be_transformed -12 -R:X:medien:media 299.5 -RBS:_medien -71.5 -RBT:_with 54.5 -RBS:medien_ -156 -RBT:with_ -19 -RBT:_media 299.5 -R:X:NX_ein:N1_has_an -3.5 -RBT:media_ 299.5 -R:X:NX_ein:N1_put_forward_a -6 -R:X:medien:medien -371 -RBT:_medien -371 -RBT:medien_ -371 -RBS:der_X 45 -RBS:medien_X 156 -R:X:NX_zu_NX:in_N2_N1 -9.5 -RBS:X_zu 339 -RBT:in_ -2.5 -R:X:NX_zu_NX:of_N2_N1 -52.5 -RBT:to_ -102.5 -RBT:_to 30 -R:X:,_dass_NX:that_N1 317 -R:X:NX_zu_NX:to_N2_N1 19 -R:X:NX_zu_NX:N1_in_N2 -2 -R:X:NX_zu_NX:N1_is_N2 -2 -RBS:X_macht -0.5 -R:X:NX_zu_NX:N1_to_N2 48 -R:X:NX_macht_NX:N1_N2_does -0.5 -R:X:NX_zu_NX:N2_N1_to -28 -R:X:NX_zu_NX_.:to_N2_N1_. 22.5 -RBS:an_ 28 -R:X:NX_zu_NX_.:N1_is_N2_. -3.5 -R:X:NX_zu_NX_.:N1_to_N2_. 7.5 -R:X:NX_zu_NX_.:N1_with_N2_. -3 -R:X:NX_zu_NX_.:N1_N2_. -221.5 -R:X:NX_zu_NX_.:N2_N1_. 4.5 -R:X:freien:free -83.5 -RBS:_freien -118 -RBS:freien_ -201.5 -RBT:_free 210 -RBT:free_ -83.5 -R:X:freien:freien -276 -RBT:_freien -276 -RBT:freien_ -276 -RBT:_an 31.5 -R:X:freien_NX:free_N1 248 -RBT:an_ -123 -RBS:freien_X 201.5 -R:X:NX_medien:N1_media -90 -RBS:X_medien 71.5 -R:X:amerika:america 193 -RBS:_amerika -36 -R:X:NX_medien_NX:N2_N1_media 5 -R:X:an_NX:in_N1 210 -R:X:freien_NX_.:free_N1_. -6.5 -RBS:amerika_ -131 -R:X:NX_medien_NX_.:N2_N1_media_. 151 -RBT:_america 283.5 -RBT:america_ 193 -R:X:die_NX:an_N1 -7.5 -R:X:amerika:american -3 -RBS:die_X -45.5 -RBT:_american -3 -RBT:american_ -3 -R:X:amerika:amerika -321 -RBS:_jener 62.5 -R:X:die_NX:a_N1 19 -RBT:_amerika -321 -RBS:jener_X 62.5 -RBT:amerika_ -321 -R:X:jener_NX:the_N1 62.5 -R:X:an_NX:to_N1 -210 -RBS:X_jener -62.5 -RBS:amerika_X 131 -R:X:amerika_NX:america_N1 107 -R:X:die_NX:is_N1 -2.5 -RBS:an_der -28 -R:X:auf:, -5 -R:X:die_NX:its_N1 -14 -RBS:auf_ 46.5 -R:X:die_NX:'s_N1 46.5 -RBS:X_der 71 -R:X:NX_der:N1_for -74 -R:X:NX_der:N1_in -43 -R:X:auf:in -5.5 -RBT:_choice -103 -R:X:die_NX:the_N1 -86.5 -RBT:_decision 103 -R:X:auf:on 60 -R:X:die_NX:those_N1 -6 -R:X:NX_der:N1_to 72 -R:X:entscheidung_NX:choice_is_N1 -103 -R:X:die_NX:with_N1 73.5 -R:X:auf:auf -3 -RBT:choice_is -103 -RBT:_auf -3 -R:X:entscheidung_NX:decision_N1 103 -R:X:die_NX:,_N1 57 -R:X:die_NX:N1_is -0.5 -RBT:auf_ -3 -R:X:die_NX:N1_'s -1 -RBS:auf_X -46.5 -R:X:die_NX:N1_the -1 -R:X:NX_freien:N1_free 158 -RBT:of_ -13 -RBS:X_freien 118 -R:X:NX_der_NX:over_N2_N1 45 -R:X:NX_freien_NX:N1_free_N2 -34 -R:X:NX_freien_NX:N1_free_,_N2 -6 -RBT:over_ 45 -R:X:die_NX_medien:the_N1_media 5.5 -R:X:auf_NX:in_N1 -46.5 -RBT:the_ -0.5 -R:X:auf_NX:on_N1 66 -R:X:auf_NX:to_N1 -2 -R:X:auf_NX:,_N1 -18 -RBS:X_amerika 36 -RBT:_may -177 -RBS:und_die 139.5 -RBT:may_ -177 -RBT:_ 585.5 -RBT:_would -18.5 -RBS:X_die -568 -RBT:would_ -204 -R:X:NX_die:the_N1 34.5 -R:X:NX_amerika_NX:N2_N1_america 36 -R:X:terroranschläge:terrorist -22 -R:X:NX_die:,_N1 -42 -R:X:NX_die:N1_, -173 -RBS:_terroranschläge -161.5 -RBS:der_macht 0.5 -R:X:NX_die:-_N1 -5 -RBS:terroranschläge_ -46 -R:X:NX_die:N1_a -1 -RBT:_terrorist -119.5 -R:X:NX_der_macht_NX:N1_hold_N2_power 28 -RBT:terrorist_ -22 -R:X:,:, -2.5 -RBT:terrorist_attacks 77.5 -RBS:_, -182 -RBT:attacks_ 28 -RBS:,_ -160.5 -R:X:terroranschläge:terroranschläge -52 -RBT:_terroranschläge -52 -RBT:__ -139 -RBT:terroranschläge_ -52 -R:X:NX_die:N1_its -128.5 -RBS:terroranschläge_X 46 -RBT:_-- -64 -R:X:terroranschläge_NX:terrorist_attacks_N1 -87.5 -RBT:_by -10 -RBT:by_ -10 -R:X:,:out -3.5 -RBT:_out -3.5 -R:X:und_die_NX:and_N1 218 -RBT:out_ -3.5 -RBT:_that -261.5 -R:X:NX_die_NX:the_N1_N2 -1 -RBT:that_ -127.5 -R:X:NX_die_NX:the_N2_N1 -4 -RBS:,_X -335 -RBT:,_as -40 -R:X:,_NX:in_N1 -239 -R:X:,_NX:of_N1 -4 -R:X:,_NX:on_N1 -166 -R:X:,_NX:to_N1 649 -R:X:NX_die_NX:N1_the_N2 -4 -R:X:,_NX:,_N1 -399 -R:X:,_NX:__N1 -42 -R:X:,_NX:--_N1 -102 -R:X:,_an:to 28 -RBS:,_an 28 -R:X:NX_die_NX:N1_,_N2 -5 -R:X:NX_die_NX:N1_N2_the -4 -RBS:X_an -28 -RBS:die_terroranschläge 161.5 -R:X:die_terroranschläge:,_terrorist_attacks 28 -RBT:,_terrorist 175 -R:X:die_terroranschläge_NX:,_terrorist_attacks_N1 147 -R:X:NX_so:N1_as -1.5 -R:X:justiz:judiciary -90 -RBS:_justiz -1 -RBS:justiz_ -220.5 -R:X:NX_so:N1_that -14 -RBT:_judiciary 215 -R:X:NX_so:N1_the 15.5 -RBT:judiciary_ -90 -R:X:justiz:justiz -216 -RBT:_justiz -216 -RBT:justiz_ -216 -R:X:justiz_NX:judiciary_N1 305 -RBS:justiz_X 205 -RBS:_brachten -28 -RBS:justiz_und 15.5 -RBS:brachten_ -175 -R:X:NX_und_die:'s_N1_and -5 -R:X:brachten:brachten -175 -RBT:_brachten -175 -RBT:brachten_ -175 -R:X:NX_an_der:N1_the -0.5 -R:X:brachten_NX:N1_brought 147 -RBS:brachten_X 175 -R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1 -13.5 -R:X:NX_und_die:N1_'s -12 -R:X:NX_und_die_NX:'s_N2_N1 -16 -RBS:_2001 -14.5 -RBS:2001_ 28 -RBT:_2001 37.5 -R:X:NX_und_die_NX:N1_and_N2 -159 -RBT:2001_ 28 -R:X:2001_NX:2001_N1 147 -RBS:2001_X -28 -R:X:NX_brachten_NX:N1_N2_brought 28 -RBS:X_brachten 28 -RBT:,_ -109.5 -R:X:2001_NX_die_NX:2001_,_N2_N1 -161.5 -R:X:unabhängige:independent 38 -RBT:2001_, -109.5 -RBS:_unabhängige 127 -RBS:unabhängige_ -197 -RBT:_independent 343 -RBT:independent_ 38 -RBT:_september -13.5 -R:X:unabhängige:unabhängige -198 -RBT:_unabhängige -198 -RBS:ein_X 9.5 -RBT:unabhängige_ -198 -RBS:september_X -14.5 -R:X:unabhängige_NX:independent_N1 287 -R:X:ein_NX:an_N1 132 -R:X:ein_NX:any_N1 25 -RBS:unabhängige_X 197 -R:X:NX_justiz:N1_judiciary 85.5 -R:X:NX_an_der_macht_NX:N1_of_power_N2 -27.5 -RBS:X_justiz 1 -R:X:NX_justiz_NX:N1_judiciary_N2 -43 -R:X:NX_justiz_und:N1_judiciary_and 15.5 -RBS:_11 -13.5 -R:X:NX_unabhängige:N1_independent -37 -R:X:ein_NX:a_N1 -93 -RBS:X_unabhängige -127 -R:X:ein_NX:one_N1 -15 -R:X:NX_unabhängige_NX:N1_independent_N2 -90 -R:X:ein_NX:-_N1 -11.5 -R:X:NX_ein_NX:an_N1_N2 -6 -R:X:NX_ein_NX:be_transformed_N1_N2 -22 -RBS:X_, -3.5 -RBS:september_2001 14.5 -RBT:,_2001 14.5 -R:X:NX_,:to_N1 68 -R:X:NX_,:N1__ 1 -R:X:NX_,:N1_-- -172.5 -R:X:11_._september_2001_NX:september_11_,_2001_N1 -13.5 -R:X:die_NX_und_NX:the_N1_N2 -10 -R:X:NX_,:N1_for -127.5 -R:X:NX_,:N1_in -13.5 -R:X:NX_,:N1_of -55 -R:X:NX_,:N1_on 257.5 -R:X:NX_,:N1_out -58 -RBS:am_11 13.5 -R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2 -57 -R:X:NX_,:N1_refuses_to -232.5 -R:X:die_NX_und_die:the_N1_and 148 -R:X:die_NX_und_die:the_N1_and_the -2.5 -RBT:the_september 13.5 -R:X:die_NX_die_NX:the_N1_N2 -3 -R:X:am_11_._september_NX:the_september_11_,_N1 -14.5 -R:X:die_NX_und_die_NX:the_N1_and_N2 -32 -RBS:zu_ 672 -R:X:NX_,_NX:N1_,_N2 -78 -R:X:NX_,_NX:N1_N2_, 80 -R:X:am_11_._september_2001:the_september_11_,_2001 28 -R:X:zu:for -5 -R:X:zu:in -7 -R:X:zu:to 23 -R:X:taliban:taliban -251.5 -RBS:_taliban -223.5 -RBS:taliban_ -157.5 -R:X:zu:with -6 -R:X:verzweifelten:desperate 28.5 -RBT:_taliban -205.5 -RBT:_desperate 28.5 -RBT:taliban_ -107 -RBT:desperate_ 28.5 -R:X:taliban_NX:taliban_N1 28 -R:X:verzweifelten:verzweifelten -28.5 -RBS:taliban_X 157.5 -R:X:NX_zu:to_N1 -229 -RBT:_verzweifelten -28.5 -R:X:den_taliban:the_taliban 144.5 -RBT:verzweifelten_ -28.5 -RBS:den_taliban 223.5 -RBT:the_taliban 144.5 -R:X:NX_zu:N1_for -152 -R:X:NX_zu:N1_in -6 -R:X:NX_zu:N1_is 251 -R:X:NX_zu:N1_of -49.5 -RBS:_dem 22 -RBT:_its 458 -RBT:its_ 337 -R:X:NX_den_taliban:N1_taliban -50.5 -R:X:NX_den_taliban_NX:N1_taliban_N2 -2.5 -R:X:NX_den_taliban_NX:N2_N1_taliban 132 -R:X:erklärte:declared -8 -RBS:_erklärte -185.5 -RBS:erklärte_ -124.5 -RBT:_declaring -9 -R:X:erklärte:erklärte -116.5 -RBT:_erklärte -116.5 -RBT:erklärte_ -116.5 -R:X:erklärte_NX:declared_N1 -52 -RBS:erklärte_X -61 -RBS:jener_ -62.5 -R:X:erklärte_NX:declaring_N1 -9 -RBS:erklärte_, 185.5 -R:X:NX_jener:N1_of -62.5 -R:X:dem_NX:the_N1 22 -R:X:verkaufen:sell -153 -RBS:_verkaufen -153 -RBS:verkaufen_ -140.5 -RBT:sell_ -153 -RBS:bereit_ 86 -RBS:zu_verkaufen 153 -RBS:_bemühen -2.5 -R:X:bereit:bereit 86 -RBT:_bereit 86 -RBT:bereit_ 86 -R:X:bereit_NX:ready_N1 -31 -RBS:bereit_X -86 -R:X:bereit_NX:N1_ready -55 -RBS:X_zum 30 -R:X:bemühen:bemühen -2.5 -R:X:NX_erklärte_,:N1_, 110 -RBT:_bemühen -2.5 -RBS:X_erklärte 185.5 -RBT:bemühen_ -2.5 -R:X:NX_erklärte_,_NX:N1_,_N2 75.5 -RBS:in_X 22 -RBS:_sich -17.5 -R:X:NX_zu_verkaufen:sell_N1 12.5 -RBS:sich_ -17.5 -R:X:NX_zum_NX:N2_to_further_N1 30 -RBS:_das 45 -RBS:das_ 2.5 -RBT:to_further 30 -RBT:_it -381 -RBT:it_ 3 -RBT:_so 172.5 -RBT:so_ -74.5 -RBT:_this 9.5 -RBT:this_ -11.5 -RBS:X_dem -22 -R:X:das_NX:a_growing_N1 77 -RBS:das_X -2.5 -RBT:a_growing -41 -R:X:das_NX:be_N1 169 -R:X:das_NX:its_N1 -95 -R:X:das_NX:so_N1 -38 -RBS:X_sein 91.5 -R:X:das_NX:the_N1 -80 done --- Best iteration: 2 [SCORE 'stupid_bleu'=0.37119]. -This took 0.6 min. +This took 0.55 min. -- cgit v1.2.3 From a931d2df4bad5ecc220b62874fb63dc3b8d00ee9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 20 Mar 2013 12:56:46 -0400 Subject: switch to new score interface for mira --- training/mira/kbest_mira.cc | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'training') diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc index 8b7993dd..bcb261c9 100644 --- a/training/mira/kbest_mira.cc +++ b/training/mira/kbest_mira.cc @@ -8,9 +8,11 @@ #include #include +#include "stringlib.h" #include "hg_sampler.h" #include "sentence_metadata.h" -#include "scorer.h" +#include "ns.h" +#include "ns_docscorer.h" #include "verbose.h" #include "viterbi.h" #include "hg.h" @@ -91,8 +93,9 @@ struct GoodBadOracle { }; struct TrainingObserver : public DecoderObserver { - TrainingObserver(const int k, const DocScorer& d, bool sf, vector* o) : ds(d), oracles(*o), kbest_size(k), sample_forest(sf) {} - const DocScorer& ds; + TrainingObserver(const int k, const DocumentScorer& d, const EvaluationMetric& m, bool sf, vector* o) : ds(d), metric(m), oracles(*o), kbest_size(k), sample_forest(sf) {} + const DocumentScorer& ds; + const EvaluationMetric& metric; vector& oracles; std::tr1::shared_ptr cur_best; const int kbest_size; @@ -121,13 +124,16 @@ struct TrainingObserver : public DecoderObserver { if (sample_forest) { vector cur_prediction; ViterbiESentence(forest, &cur_prediction); - float sentscore = ds[sent_id]->ScoreCandidate(cur_prediction)->ComputeScore(); + SufficientStats sstats; + ds[sent_id]->Evaluate(cur_prediction, &sstats); + float sentscore = metric.ComputeScore(sstats); cur_best = MakeHypothesisInfo(ViterbiFeatures(forest), sentscore); vector samples; HypergraphSampler::sample_hypotheses(forest, kbest_size, &*rng, &samples); for (unsigned i = 0; i < samples.size(); ++i) { - sentscore = ds[sent_id]->ScoreCandidate(samples[i].words)->ComputeScore(); + ds[sent_id]->Evaluate(samples[i].words, &sstats); + float sentscore = metric.ComputeScore(sstats); if (invert_score) sentscore *= -1.0; if (!cur_good || sentscore > cur_good->mt_metric) cur_good = MakeHypothesisInfo(samples[i].fmap, sentscore); @@ -136,11 +142,13 @@ struct TrainingObserver : public DecoderObserver { } } else { KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); + SufficientStats sstats; for (int i = 0; i < kbest_size; ++i) { const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = kbest.LazyKthBest(forest.nodes_.size() - 1, i); if (!d) break; - float sentscore = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); + ds[sent_id]->Evaluate(d->yield, &sstats); + float sentscore = metric.ComputeScore(sstats); if (invert_score) sentscore *= -1.0; // cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; if (i == 0) @@ -192,15 +200,20 @@ int main(int argc, char** argv) { } vector corpus; ReadTrainingCorpus(conf["source"].as(), &corpus); - const string metric_name = conf["mt_metric"].as(); - ScoreType type = ScoreTypeFromString(metric_name); - if (type == TER) { - invert_score = true; - } else { - invert_score = false; + + string metric_name = UppercaseString(conf["evaluation_metric"].as()); + if (metric_name == "COMBI") { + cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n"; + metric_name = "COMB:TER=-0.5;IBM_BLEU=0.5"; + } else if (metric_name == "BLEU") { + cerr << "WARNING: 'BLEU' is ambiguous, assuming 'IBM_BLEU'\n"; + metric_name = "IBM_BLEU"; } - DocScorer ds(type, conf["reference"].as >(), ""); + EvaluationMetric* metric = EvaluationMetric::Instance(metric_name); + DocumentScorer ds(metric, conf["reference"].as >()); cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; + invert_score = metric->IsErrorMetric(); + if (ds.size() != corpus.size()) { cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; return 1; @@ -221,7 +234,7 @@ int main(int argc, char** argv) { assert(corpus.size() > 0); vector oracles(corpus.size()); - TrainingObserver observer(conf["k_best_size"].as(), ds, sample_forest, &oracles); + TrainingObserver observer(conf["k_best_size"].as(), ds, *metric, sample_forest, &oracles); int cur_sent = 0; int lcount = 0; int normalizer = 0; -- cgit v1.2.3 From da52ee6fa4af02b811b8b558ec8437384d2ba5bd Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 20 Mar 2013 13:00:22 -0400 Subject: bug fix --- training/mira/kbest_mira.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'training') diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc index bcb261c9..d59b4224 100644 --- a/training/mira/kbest_mira.cc +++ b/training/mira/kbest_mira.cc @@ -201,7 +201,7 @@ int main(int argc, char** argv) { vector corpus; ReadTrainingCorpus(conf["source"].as(), &corpus); - string metric_name = UppercaseString(conf["evaluation_metric"].as()); + string metric_name = UppercaseString(conf["mt_metric"].as()); if (metric_name == "COMBI") { cerr << "WARNING: 'combi' metric is no longer supported, switching to 'COMB:TER=-0.5;IBM_BLEU=0.5'\n"; metric_name = "COMB:TER=-0.5;IBM_BLEU=0.5"; -- cgit v1.2.3 From 2e589c5b297e27a82729084991841d8ab1e1d336 Mon Sep 17 00:00:00 2001 From: Avneesh Saluja Date: Thu, 28 Mar 2013 18:58:31 -0700 Subject: latent SVM --- training/latent_svm/Makefile.am | 6 + training/latent_svm/latent_svm.cc | 412 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 418 insertions(+) create mode 100644 training/latent_svm/Makefile.am create mode 100644 training/latent_svm/latent_svm.cc (limited to 'training') diff --git a/training/latent_svm/Makefile.am b/training/latent_svm/Makefile.am new file mode 100644 index 00000000..673b9159 --- /dev/null +++ b/training/latent_svm/Makefile.am @@ -0,0 +1,6 @@ +bin_PROGRAMS = latent_svm + +latent_svm_SOURCES = latent_svm.cc +latent_svm_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/latent_svm/latent_svm.cc b/training/latent_svm/latent_svm.cc new file mode 100644 index 00000000..ab9c1d5d --- /dev/null +++ b/training/latent_svm/latent_svm.cc @@ -0,0 +1,412 @@ +/* +Points to note regarding variable names: +total_loss and prev_loss actually refer not to loss, but the metric (usually BLEU) +*/ +#include +#include +#include +#include +#include + +//boost libraries +#include +#include +#include + +//cdec libraries +#include "config.h" +#include "hg_sampler.h" +#include "sentence_metadata.h" +#include "scorer.h" +#include "verbose.h" +#include "viterbi.h" +#include "hg.h" +#include "prob.h" +#include "kbest.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "fdict.h" +#include "weights.h" +#include "sparse_vector.h" +#include "sampler.h" + +using namespace std; +using boost::shared_ptr; +namespace po = boost::program_options; + +bool invert_score; +boost::shared_ptr rng; //random seed ptr + +void RandomPermutation(int len, vector* p_ids) { + vector& ids = *p_ids; + ids.resize(len); + for (int i = 0; i < len; ++i) ids[i] = i; + for (int i = len; i > 0; --i) { + int j = rng->next() * i; + if (j == i) i--; + swap(ids[i-1], ids[j]); + } +} + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("weights,w",po::value(),"[REQD] Input feature weights file") + ("input,i",po::value(),"[REQD] Input source file for development set") + ("passes,p", po::value()->default_value(15), "Number of passes through the training data") + ("weights_write_interval,n", po::value()->default_value(1000), "Number of lines between writing out weights") + ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") + ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("regularizer_strength,C", po::value()->default_value(0.01), "regularization strength") + ("mt_metric_scale,s", po::value()->default_value(1.0), "Cost function is -mt_metric_scale*BLEU") + ("costaug_log_bleu,l", "Flag converts BLEU to log space. Cost function is thus -mt_metric_scale*log(BLEU). Not on by default") + ("average,A", "Average the weights (this is a weighted average due to the scaling factor)") + ("mu,u", po::value()->default_value(0.0), "weight (between 0 and 1) to scale model score by for oracle selection") + ("stepsize_param,a", po::value()->default_value(0.01), "Stepsize parameter, during optimization") + ("stepsize_reduce,t", "Divide step size by sqrt(number of examples seen so far), as per Ratliff et al., 2007") + ("metric_threshold,T", po::value()->default_value(0.0), "Threshold for diff between oracle BLEU and cost-aug BLEU for updating the weights") + ("check_positive,P", "Check that the loss is positive before updating") + ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") + ("best_ever,b", "Keep track of the best hypothesis we've ever seen (metric score), and use that as the reference") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("decoder_config,c",po::value(),"Decoder configuration file"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("weights") || !conf->count("input") || !conf->count("decoder_config") || !conf->count("reference")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +double scaling_trick = 1; // see http://blog.smola.org/post/940672544/fast-quadratic-regularization-for-online-learning +/*computes and returns cost augmented score for negative example selection*/ +double cost_augmented_score(const LogVal model_score, const double mt_metric_score, const double mt_metric_scale, const bool logbleu) { + if(logbleu) { + if(mt_metric_score != 0) + // NOTE: log(model_score) is just the model score feature weights * features + return log(model_score) * scaling_trick + (- mt_metric_scale * log(mt_metric_score)); + else + return -1000000; + } + // NOTE: log(model_score) is just the model score feature weights * features + return log(model_score) * scaling_trick + (- mt_metric_scale * mt_metric_score); +} + +/*computes and returns mu score, for oracle selection*/ +double muscore(const vector& feature_weights, const SparseVector& feature_values, const double mt_metric_score, const double mu, const bool logbleu) { + if(logbleu) { + if(mt_metric_score != 0) + return feature_values.dot(feature_weights) * mu + (1 - mu) * log(mt_metric_score); + else + return feature_values.dot(feature_weights) * mu + (1 - mu) * (-1000000); // log(0) is -inf + } + return feature_values.dot(feature_weights) * mu + (1 - mu) * mt_metric_score; +} + +static const double kMINUS_EPSILON = -1e-6; + +struct HypothesisInfo { + SparseVector features; + double mt_metric_score; + // The model score changes when the feature weights change, so it is not stored here + // It must be recomputed every time +}; + +struct GoodOracle { + shared_ptr good; +}; + +struct TrainingObserver : public DecoderObserver { + TrainingObserver(const int k, + const DocScorer& d, + vector* o, + const vector& feat_weights, + const double metric_scale, + const double Mu, + const bool bestever, + const bool LogBleu) : ds(d), feature_weights(feat_weights), oracles(*o), kbest_size(k), mt_metric_scale(metric_scale), mu(Mu), best_ever(bestever), log_bleu(LogBleu) {} + const DocScorer& ds; + const vector& feature_weights; + vector& oracles; + shared_ptr cur_best; + shared_ptr cur_costaug_best; + shared_ptr cur_ref; + const int kbest_size; + const double mt_metric_scale; + const double mu; + const bool best_ever; + const bool log_bleu; + + const HypothesisInfo& GetCurrentBestHypothesis() const { + return *cur_best; + } + + const HypothesisInfo& GetCurrentCostAugmentedHypothesis() const { + return *cur_costaug_best; + } + + const HypothesisInfo& GetCurrentReference() const { + return *cur_ref; + } + + virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + UpdateOracles(smeta.GetSentenceID(), *hg); + } + + shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double metric) { + shared_ptr h(new HypothesisInfo); + h->features = feats; + h->mt_metric_score = metric; + return h; + } + + void UpdateOracles(int sent_id, const Hypergraph& forest) { + //shared_ptr& cur_ref = oracles[sent_id].good; + cur_ref = oracles[sent_id].good; + if(!best_ever) + cur_ref.reset(); + + KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); + double costaug_best_score = 0; + + for (int i = 0; i < kbest_size; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + double mt_metric_score = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); //this might need to change!! + const SparseVector& feature_vals = d->feature_values; + double costaugmented_score = cost_augmented_score(d->score, mt_metric_score, mt_metric_scale, log_bleu); //note that d->score, i.e., model score, is passed in + if (i == 0) { //i.e., setting up cur_best to be model score highest, and initializing costaug_best + cur_best = MakeHypothesisInfo(feature_vals, mt_metric_score); + cur_costaug_best = cur_best; + costaug_best_score = costaugmented_score; + } + if (costaugmented_score > costaug_best_score) { // kbest_mira's cur_bad, i.e., "fear" derivation + cur_costaug_best = MakeHypothesisInfo(feature_vals, mt_metric_score); + costaug_best_score = costaugmented_score; + } + double cur_muscore = mt_metric_score; + if (!cur_ref) // kbest_mira's cur_good, i.e., "hope" derivation + cur_ref = MakeHypothesisInfo(feature_vals, cur_muscore); + else { + double cur_ref_muscore = cur_ref->mt_metric_score; + if(mu > 0) { //select oracle with mixture of model score and BLEU + cur_ref_muscore = muscore(feature_weights, cur_ref->features, cur_ref->mt_metric_score, mu, log_bleu); + cur_muscore = muscore(feature_weights, d->feature_values, mt_metric_score, mu, log_bleu); + } + if (cur_muscore > cur_ref_muscore) //replace oracle + cur_ref = MakeHypothesisInfo(feature_vals, mt_metric_score); + } + } + } +}; + +void ReadTrainingCorpus(const string& fname, vector* c) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (!in) break; + c->push_back(line); + } +} + +bool ApproxEqual(double a, double b) { + if (a == b) return true; + return (fabs(a-b)/fabs(b)) < 0.000001; +} + +int main(int argc, char** argv) { + register_feature_functions(); + SetSilent(true); // turn off verbose decoder output + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + + const bool best_ever = conf.count("best_ever") > 0; + vector corpus; + ReadTrainingCorpus(conf["input"].as(), &corpus); + + const string metric_name = conf["mt_metric"].as(); //set up scoring; this may need to be changed!! + + ScoreType type = ScoreTypeFromString(metric_name); + if (type == TER) { + invert_score = true; + } else { + invert_score = false; + } + DocScorer ds(type, conf["reference"].as >(), ""); + cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; + if (ds.size() != corpus.size()) { + cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; + return 1; + } + + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + + // load initial weights + vector& decoder_weights = decoder.CurrentWeightVector(); //equivalent to "dense_weights" vector in kbest_mira.cc + SparseVector sparse_weights; //equivaelnt to kbest_mira.cc "lambdas" + Weights::InitFromFile(conf["weights"].as(), &decoder_weights); + Weights::InitSparseVector(decoder_weights, &sparse_weights); + + //initializing other algorithm and output parameters + const double c = conf["regularizer_strength"].as(); + const int weights_write_interval = conf["weights_write_interval"].as(); + const double mt_metric_scale = conf["mt_metric_scale"].as(); + const double mu = conf["mu"].as(); + const double metric_threshold = conf["metric_threshold"].as(); + const double stepsize_param = conf["stepsize_param"].as(); //step size in structured SGD optimization step + const bool stepsize_reduce = conf.count("stepsize_reduce") > 0; + const bool costaug_log_bleu = conf.count("costaug_log_bleu") > 0; + const bool average = conf.count("average") > 0; + const bool checkpositive = conf.count("check_positive") > 0; + + assert(corpus.size() > 0); + vector oracles(corpus.size()); + TrainingObserver observer(conf["k_best_size"].as(), // kbest size + ds, // doc scorer + &oracles, + decoder_weights, + mt_metric_scale, + mu, + best_ever, + costaug_log_bleu); + int cur_sent = 0; + int line_count = 0; + int normalizer = 0; + double total_loss = 0; + double prev_loss = 0; + int dots = 0; // progess bar + int cur_pass = 0; + SparseVector tot; + tot += sparse_weights; //add initial weights to total + normalizer++; //add 1 to normalizer + int max_iteration = conf["passes"].as(); + string msg = "# LatentSVM tuned weights"; + vector order; + int interval_counter = 0; + RandomPermutation(corpus.size(), &order); //shuffle corpus + while (line_count <= max_iteration * corpus.size()) { //loop over all (passes * num sentences) examples + //if ((interval_counter * 40 / weights_write_interval) > dots) { ++dots; cerr << '.'; } //check this + if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.';} + if (interval_counter == weights_write_interval) { //i.e., we need to write out weights + sparse_weights *= scaling_trick; + tot *= scaling_trick; + scaling_trick = 1; + cerr << " [SENTENCE NUMBER= " << cur_sent << "\n"; + cerr << " [AVG METRIC LAST INTERVAL =" << ((total_loss - prev_loss) / weights_write_interval) << "]\n"; + cerr << " [AVG METRIC THIS PASS THUS FAR =" << (total_loss / cur_sent) << "]\n"; + cerr << " [TOTAL LOSS: =" << total_loss << "\n"; + Weights::ShowLargestFeatures(decoder_weights); + //dots = 0; + interval_counter = 0; + prev_loss = total_loss; + if (average){ + SparseVector x = tot; + x /= normalizer; + ostringstream sa; + sa << "weights.latentsvm-" << line_count/weights_write_interval << "-avg.gz"; + x.init_vector(&decoder_weights); + Weights::WriteToFile(sa.str(), decoder_weights, true, &msg); + } + else { + ostringstream os; + os << "weights.latentsvm-" << line_count/weights_write_interval << ".gz"; + sparse_weights.init_vector(&decoder_weights); + Weights::WriteToFile(os.str(), decoder_weights, true, &msg); + } + } + if (corpus.size() == cur_sent) { //i.e., finished a pass + //cerr << " [AVG METRIC LAST PASS=" << (document_metric_score / corpus.size()) << "]\n"; + cerr << " [AVG METRIC LAST PASS=" << (total_loss / corpus.size()) << "]\n"; + cerr << " TOTAL LOSS: " << total_loss << "\n"; + Weights::ShowLargestFeatures(decoder_weights); + cur_sent = 0; + total_loss = 0; + dots = 0; + if(average) { + SparseVector x = tot; + x /= normalizer; + ostringstream sa; + sa << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz"; + x.init_vector(&decoder_weights); + Weights::WriteToFile(sa.str(), decoder_weights, true, &msg); + } + else { + ostringstream os; + os << "weights.latentsvm-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz"; + Weights::WriteToFile(os.str(), decoder_weights, true, &msg); + } + cur_pass++; + RandomPermutation(corpus.size(), &order); + } + if (cur_sent == 0) { //i.e., starting a new pass + cerr << "PASS " << (line_count / corpus.size() + 1) << endl; + } + sparse_weights.init_vector(&decoder_weights); // copy sparse_weights to the decoder weights + decoder.SetId(order[cur_sent]); //assign current sentence + decoder.Decode(corpus[order[cur_sent]], &observer); // decode/update oracles + + const HypothesisInfo& cur_best = observer.GetCurrentBestHypothesis(); //model score best + const HypothesisInfo& cur_costaug = observer.GetCurrentCostAugmentedHypothesis(); //(model + cost) best; cost = -metric_scale*log(BLEU) or -metric_scale*BLEU + //const HypothesisInfo& cur_ref = *oracles[order[cur_sent]].good; //this oracle-best line only picks based on BLEU + const HypothesisInfo& cur_ref = observer.GetCurrentReference(); //if mu > 0, this mu-mixed oracle will be picked; otherwise, only on BLEU + total_loss += cur_best.mt_metric_score; + + double step_size = stepsize_param; + if (stepsize_reduce){ // w_{t+1} = w_t - stepsize_t * grad(Loss) + step_size /= (sqrt(cur_sent+1.0)); + } + //actual update step - compute gradient, and modify sparse_weights + if(cur_ref.mt_metric_score - cur_costaug.mt_metric_score > metric_threshold) { + const double loss = (cur_costaug.features.dot(decoder_weights) - cur_ref.features.dot(decoder_weights)) * scaling_trick + mt_metric_scale * (cur_ref.mt_metric_score - cur_costaug.mt_metric_score); + if (!checkpositive || loss > 0.0) { //can update either all the time if check positive is off, or only when loss > 0 if it's on + sparse_weights -= cur_costaug.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick); // cost augmented hyp orig - + sparse_weights += cur_ref.features * step_size / ((1.0-2.0*step_size*c)*scaling_trick); // ref orig + + } + } + scaling_trick *= (1.0 - 2.0 * step_size * c); + + tot += sparse_weights; //for averaging purposes + normalizer++; //for averaging purposes + line_count++; + interval_counter++; + cur_sent++; + } + cerr << endl; + if(average) { + tot /= normalizer; + tot.init_vector(decoder_weights); + msg = "# Latent SSVM tuned weights (averaged vector)"; + Weights::WriteToFile("weights.latentsvm-final-avg.gz", decoder_weights, true, &msg); + cerr << "Optimization complete.\n" << "AVERAGED WEIGHTS: weights.latentsvm-final-avg.gz\n"; + } else { + Weights::WriteToFile("weights.latentsvm-final.gz", decoder_weights, true, &msg); + cerr << "Optimization complete.\n"; + } + return 0; +} + -- cgit v1.2.3 From 4c805062bf38fc1c97bf1f28adaa2dc8f160caad Mon Sep 17 00:00:00 2001 From: Avneesh Saluja Date: Thu, 28 Mar 2013 19:01:24 -0700 Subject: updated Makefiles --- training/Makefile.am | 1 + training/latent_svm/Makefile.am | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'training') diff --git a/training/Makefile.am b/training/Makefile.am index e95e045f..8ef3c939 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -6,6 +6,7 @@ SUBDIRS = \ dpmert \ pro \ dtrain \ + latent_svm \ mira \ rampion diff --git a/training/latent_svm/Makefile.am b/training/latent_svm/Makefile.am index 673b9159..65c5e038 100644 --- a/training/latent_svm/Makefile.am +++ b/training/latent_svm/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = latent_svm latent_svm_SOURCES = latent_svm.cc -latent_svm_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +latent_svm_LDADD = ../..//decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -- cgit v1.2.3 From 9a4f693870214e56d51aa22ceb97a67b34b7a0d0 Mon Sep 17 00:00:00 2001 From: vlade Date: Sat, 13 Apr 2013 00:48:10 -0400 Subject: inital commit of mira code --- training/mira/kbest_mirav5.cc | 1148 +++++++++++++++++++++++++++++++++++++++++ training/mira/run_mira.pl | 548 ++++++++++++++++++++ 2 files changed, 1696 insertions(+) create mode 100644 training/mira/kbest_mirav5.cc create mode 100755 training/mira/run_mira.pl (limited to 'training') diff --git a/training/mira/kbest_mirav5.cc b/training/mira/kbest_mirav5.cc new file mode 100644 index 00000000..cea5cf67 --- /dev/null +++ b/training/mira/kbest_mirav5.cc @@ -0,0 +1,1148 @@ +#include +#include +#include +#include +#include +#include + +#include "config.h" + + +#include +#include +#include + +#include "sentence_metadata.h" +#include "scorer.h" +#include "verbose.h" +#include "viterbi.h" +#include "hg.h" +#include "prob.h" +#include "kbest.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "fdict.h" +#include "time.h" +#include "sampler.h" + +#include "weights.h" +#include "sparse_vector.h" + +using namespace std; +using boost::shared_ptr; +namespace po = boost::program_options; + +bool invert_score; +boost::shared_ptr rng; +bool approx_score; +bool no_reweight; +bool no_select; +bool unique_kbest; +int update_list_size; +vector dense_weights_g; +double mt_metric_scale; +int optimizer; +int fear_select; +int hope_select; + +bool pseudo_doc; + +void SanityCheck(const vector& w) { + for (int i = 0; i < w.size(); ++i) { + assert(!isnan(w[i])); + assert(!isinf(w[i])); + } +} + +struct FComp { + const vector& w_; + FComp(const vector& w) : w_(w) {} + bool operator()(int a, int b) const { + return fabs(w_[a]) > fabs(w_[b]); + } +}; + +void ShowLargestFeatures(const vector& w) { + vector fnums(w.size()); + for (int i = 0; i < w.size(); ++i) + fnums[i] = i; + vector::iterator mid = fnums.begin(); + mid += (w.size() > 10 ? 10 : w.size()); + partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); + cerr << "TOP FEATURES:"; + for (vector::iterator i = fnums.begin(); i != mid; ++i) { + cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; + } + cerr << endl; +} + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input_weights,w",po::value(),"Input feature weights file") + ("source,i",po::value(),"Source file for development set") + ("passes,p", po::value()->default_value(15), "Number of passes through the training data") + ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") + ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("optimizer,o",po::value()->default_value(1), "Optimizer (sgd=1, mira 1-fear=2, full mira w/ cutting plane=3, full mira w/ nbest list=5, local update=4)") + ("fear,f",po::value()->default_value(1), "Fear selection (model-cost=1, max-cost=2, pred-base=3)") + ("hope,h",po::value()->default_value(1), "Hope selection (model+cost=1, max-cost=2, local-cost=3)") + ("max_step_size,C", po::value()->default_value(0.01), "regularization strength (C)") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") + ("approx_score,a", "Use smoothed sentence-level BLEU score for approximate scoring") + ("no_reweight,d","Do not reweight forest for cutting plane") + ("no_select,n", "Do not use selection heuristic") + ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") + ("update_k_best,b", po::value()->default_value(1), "Size of good, bad lists to perform update with") + ("unique_k_best,u", "Unique k-best translation list") + ("weights_output,O",po::value(),"Directory to write weights to") + ("output_dir,D",po::value(),"Directory to place output in") + ("decoder_config,c",po::value(),"Decoder configuration file"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,H", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("input_weights") || !conf->count("decoder_config") || !conf->count("reference")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +//load previous translation, store array of each sentences score, subtract it from current sentence and replace with new translation score + + +static const double kMINUS_EPSILON = -1e-6; +static const double EPSILON = 0.000001; +static const double SMO_EPSILON = 0.0001; +static const double PSEUDO_SCALE = 0.95; +static const int MAX_SMO = 10; +int cur_pass; + +struct HypothesisInfo { + SparseVector features; + vector hyp; + double mt_metric; + double hope; + double fear; + double alpha; + double oracle_loss; + SparseVector oracle_feat_diff; + shared_ptr oracleN; +}; + +bool ApproxEqual(double a, double b) { + if (a == b) return true; + return (fabs(a-b)/fabs(b)) < EPSILON; +} + +typedef shared_ptr HI; +bool HypothesisCompareB(const HI& h1, const HI& h2 ) +{ + return h1->mt_metric > h2->mt_metric; +}; + + +bool HopeCompareB(const HI& h1, const HI& h2 ) +{ + return h1->hope > h2->hope; +}; + +bool FearCompareB(const HI& h1, const HI& h2 ) +{ + return h1->fear > h2->fear; +}; + +bool FearComparePred(const HI& h1, const HI& h2 ) +{ + return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g); +}; + +bool HypothesisCompareG(const HI& h1, const HI& h2 ) +{ + return h1->mt_metric < h2->mt_metric; +}; + + +void CuttingPlane(vector >* cur_c, bool* again, vector >& all_hyp, vector dense_weights) +{ + bool DEBUG_CUT = false; + shared_ptr max_fear, max_fear_in_set; + vector >& cur_constraint = *cur_c; + + if(no_reweight) + { + //find new hope hypothesis + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights); + all_hyp[u]->hope = 1 * all_hyp[u]->mt_metric + t_score; + //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; + + } + + //sort hyps by hope score + sort(all_hyp.begin(),all_hyp.end(),HopeCompareB); + + double hope_score = all_hyp[0]->features.dot(dense_weights); + if(DEBUG_CUT) cerr << "New hope derivation score " << hope_score << endl; + + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights); + //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; + + all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss + // all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric; + //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features; + // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; + //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; + + } + + sort(all_hyp.begin(),all_hyp.end(),FearCompareB); + + } + //assign maximum fear derivation from all derivations + max_fear = all_hyp[0]; + + if(DEBUG_CUT) cerr <<"Cutting Plane Max Fear "<fear ; + for(int i=0; i < cur_constraint.size();i++) //select maximal violator already in constraint set + { + if (!max_fear_in_set || cur_constraint[i]->fear > max_fear_in_set->fear) + max_fear_in_set = cur_constraint[i]; + } + if(DEBUG_CUT) cerr << "Max Fear in constraint set " << max_fear_in_set->fear << endl; + + if(max_fear->fear > max_fear_in_set->fear + SMO_EPSILON) + { + cur_constraint.push_back(max_fear); + *again = true; + if(DEBUG_CUT) cerr << "Optimize Again " << *again << endl; + } +} + + +double ComputeDelta(vector >* cur_p, double max_step_size,vector dense_weights ) +{ + vector >& cur_pair = *cur_p; + double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss; + //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff? + //double num = loss - margin; + + + double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights)); + const double num = margin + loss; + cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <features.dot(dense_weights) - cur_pair[0]->features.dot(dense_weights); + // double loss = cur_pair[1]->oracle_loss; //good.mt_metric - cur_bad.mt_metric); + //const double num = margin + loss; + + //cerr << "Compute Delta " << loss << " " << margin << " "; + + // double margin = cur_pair[0]->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff? +/* double num = + (cur_pair[0]->oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights)) + - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights)); + */ + + SparseVector diff = cur_pair[0]->features; + diff -= cur_pair[1]->features; + /* SparseVector diff = cur_pair[0]->oracle_feat_diff; + diff -= cur_pair[1]->oracle_feat_diff;*/ + double diffsqnorm = diff.l2norm_sq(); + double delta; + if (diffsqnorm > 0) + delta = num / (diffsqnorm * max_step_size); + else + delta = 0; + cerr << " D1:" << delta; + //clip delta (enforce margin constraints) + + delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha)); + cerr << " D2:" << delta; + return delta; +} + + +vector > SelectPair(vector >* cur_c) +{ + bool DEBUG_SELECT= false; + vector >& cur_constraint = *cur_c; + + vector > pair; + + if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira + // if(optimizer == 2) { + pair.push_back(cur_constraint[0]); + pair.push_back(cur_constraint[1]); + return pair; + // } + } + + for(int u=0;u != cur_constraint.size();u++) + { + shared_ptr max_fear; + + if(DEBUG_SELECT) cerr<< "cur alpha " << u << " " << cur_constraint[u]->alpha; + for(int i=0; i < cur_constraint.size();i++) //select maximal violator + { + if(i != u) + if (!max_fear || cur_constraint[i]->fear > max_fear->fear) + max_fear = cur_constraint[i]; + } + if(!max_fear) return pair; // + + if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl; + + + if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON)) + { + for(int i=0; i < cur_constraint.size();i++) //select maximal violator + { + if(i != u) + if (cur_constraint[i]->alpha > 0) + { + pair.push_back(cur_constraint[u]); + pair.push_back(cur_constraint[i]); + cerr << "RETJURN from 1" << endl; + return pair; + } + } + } + if ((cur_constraint[u]->alpha > 0) && (cur_constraint[u]->fear < max_fear->fear - SMO_EPSILON)) + { + for(int i=0; i < cur_constraint.size();i++) //select maximal violator + { + if(i != u) + if (cur_constraint[i]->fear > cur_constraint[u]->fear) + { + pair.push_back(cur_constraint[u]); + pair.push_back(cur_constraint[i]); + return pair; + } + } + } + + } + return pair; //no more constraints to optimize, we're done here + +} + +struct GoodBadOracle { + vector > good; + vector > bad; +}; + +struct TrainingObserver : public DecoderObserver { + TrainingObserver(const int k, const DocScorer& d, vector* o, vector* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { + // TrainingObserver(const int k, const DocScorer& d, vector* o) : ds(d), oracles(*o), kbest_size(k) { + + //calculate corpus bleu score from previous iterations 1-best for BLEU gain + if(!pseudo_doc) + if(cur_pass > 0) + { + ScoreP acc; + for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { + if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } + acc->PlusEquals(*corpus_bleu_sent_stats[ii]); + + } + corpus_bleu_stats = acc; + corpus_bleu_score = acc->ComputeScore(); + } + //corpus_src_length = 0; +} + const DocScorer& ds; + vector& corpus_bleu_sent_stats; + vector& oracles; + vector > cur_best; + shared_ptr cur_oracle; + const int kbest_size; + Hypergraph forest; + int cur_sent; + ScoreP corpus_bleu_stats; + float corpus_bleu_score; + + float corpus_src_length; + float curr_src_length; + + const int GetCurrentSent() const { + return cur_sent; + } + + const HypothesisInfo& GetCurrentBestHypothesis() const { + return *cur_best[0]; + } + + const vector > GetCurrentBest() const { + return cur_best; + } + + const HypothesisInfo& GetCurrentOracle() const { + return *cur_oracle; + } + + const Hypergraph& GetCurrentForest() const { + return forest; + } + + + virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + cur_sent = smeta.GetSentenceID(); + //cerr << "SOURCE " << smeta.GetSourceLength() << endl; + curr_src_length = (float) smeta.GetSourceLength(); + //UpdateOracles(smeta.GetSentenceID(), *hg); + if(unique_kbest) + UpdateOracles(smeta.GetSentenceID(), *hg); + else + UpdateOracles > >(smeta.GetSentenceID(), *hg); + forest = *hg; + + } + + shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double score, const vector& hyp) { + shared_ptr h(new HypothesisInfo); + h->features = feats; + h->mt_metric = score; + h->hyp = hyp; + return h; + } + + template + void UpdateOracles(int sent_id, const Hypergraph& forest) { + + bool PRINT_LIST= false; + vector >& cur_good = oracles[sent_id].good; + vector >& cur_bad = oracles[sent_id].bad; + //TODO: look at keeping previous iterations hypothesis lists around + cur_best.clear(); + cur_good.clear(); + cur_bad.clear(); + + vector > all_hyp; + + typedef KBest::KBestDerivations, ESentenceTraversal,Filter> K; + K kbest(forest,kbest_size); + + //KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); + for (int i = 0; i < kbest_size; ++i) { + //const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + typename K::Derivation *d = + kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + + float sentscore; + if(approx_score) + { + + if(cur_pass > 0 && !pseudo_doc) + { + ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield); + ScoreP corpus_no_best = corpus_bleu_stats->GetZero(); + + corpus_bleu_stats->Subtract(*corpus_bleu_sent_stats[sent_id], &*corpus_no_best); + sent_stats->PlusEquals(*corpus_no_best, 0.5); + + //compute gain from new sentence in 1-best corpus + sentscore = mt_metric_scale * (sent_stats->ComputeScore() - corpus_no_best->ComputeScore());// - corpus_bleu_score); + } + else if(pseudo_doc) + { + //cerr << "CORP:" << corpus_bleu_score << " NEW:" << sent_stats->ComputeScore() << " sentscore:" << sentscore << endl; + + //-----pseudo-corpus approach + float src_scale = corpus_src_length + curr_src_length; + ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield); + if(!corpus_bleu_stats){ corpus_bleu_stats = sent_stats->GetZero();} + + sent_stats->PlusEquals(*corpus_bleu_stats); + sentscore = mt_metric_scale * src_scale * sent_stats->ComputeScore(); + + } + else + { + //cerr << "Using sentence-level approximation - PASS - " << boost::lexical_cast(cur_pass) << endl; + //approx style of computation, used for 0th iteration + sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeSentScore()); + + //use pseudo-doc + } + + + } + else + { + sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore()); + } + + if (invert_score) sentscore *= -1.0; + //cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << " " << approx_sentscore << endl; + + if (i < update_list_size){ + if (i == 0) //take cur best and add its bleu statistics counts to the pseudo-doc + { } + if(PRINT_LIST)cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; + cur_best.push_back( MakeHypothesisInfo(d->feature_values, sentscore, d->yield)); + } + + all_hyp.push_back(MakeHypothesisInfo(d->feature_values, sentscore,d->yield)); //store all hyp to extract oracle best and worst + + } + + if(pseudo_doc){ + //update psuedo-doc stats + string details, details2; + corpus_bleu_stats->ScoreDetails(&details2); + ScoreP sent_stats = ds[sent_id]->ScoreCandidate(cur_best[0]->hyp); + corpus_bleu_stats->PlusEquals(*sent_stats); + + + sent_stats->ScoreDetails(&details); + + + sent_stats = corpus_bleu_stats; + corpus_bleu_stats = sent_stats->GetZero(); + corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE); + + + corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length); + cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n " << details2 << endl; + + + } + + + //figure out how many hyps we can keep maximum + int temp_update_size = update_list_size; + if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();} + + //sort all hyps by sentscore (bleu) + sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB); + + if(PRINT_LIST){ cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++) cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; } + + //if(optimizer != 4 ) + if(hope_select == 1) + { + //find hope hypothesis using model + bleu + if (PRINT_LIST) cerr << "HOPE " << endl; + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights_g); + all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score; + if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; + + } + + //sort hyps by hope score + sort(all_hyp.begin(),all_hyp.end(),HopeCompareB); + } + + + //assign cur_good the sorted list + cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); + if(PRINT_LIST) { cerr << "GOOD" << endl; for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;} + /* if (!cur_oracle) { cur_oracle = cur_good[0]; + cerr << "Set oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl; } + else { + cerr << "Stay oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl; } */ + + shared_ptr& oracleN = cur_good[0]; + //if(optimizer != 4){ + if(fear_select == 1){ + //compute fear hyps + if (PRINT_LIST) cerr << "FEAR " << endl; + double hope_score = oracleN->features.dot(dense_weights_g); + //double hope_score = cur_oracle->features.dot(dense_weights); + if (PRINT_LIST) cerr << "hope score " << hope_score << endl; + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights_g); + //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; + + /* all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss + all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric; + all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/ + + all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss + all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric; + all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features; + all_hyp[u]->oracleN=oracleN; + // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; + if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; + + } + + sort(all_hyp.begin(),all_hyp.end(),FearCompareB); + + cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); + } + else if(fear_select == 2) //select fear based on cost + { + cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); + reverse(cur_bad.begin(),cur_bad.end()); + } + else //pred-based, fear_select = 3 + { + sort(all_hyp.begin(),all_hyp.end(),FearComparePred); + cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); + } + + + if(PRINT_LIST){ cerr<< "BAD"<mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;} + + cerr << "GOOD (BEST): " << cur_good[0]->mt_metric << endl; + cerr << " CUR: " << cur_best[0]->mt_metric << endl; + cerr << " BAD (WORST): " << cur_bad[0]->mt_metric << endl; + } +}; + +void ReadTrainingCorpus(const string& fname, vector* c) { + + + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (!in) break; + c->push_back(line); + } +} + +void ReadPastTranslationForScore(const int cur_pass, vector* c, DocScorer& ds, const string& od) +{ + cerr << "Reading BLEU gain file "; + string fname; + if(cur_pass == 0) + { + fname = od + "/run.raw.init"; + } + else + { + int last_pass = cur_pass - 1; + fname = od + "/run.raw." + boost::lexical_cast(last_pass) + ".B"; + } + cerr << fname << "\n"; + ReadFile rf(fname); + istream& in = *rf.stream(); + ScoreP acc; + string line; + int lc = 0; + while(in) { + getline(in, line); + if (line.empty() && !in) break; + vector sent; + TD::ConvertSentence(line, &sent); + ScoreP sentscore = ds[lc]->ScoreCandidate(sent); + c->push_back(sentscore); + if (!acc) { acc = sentscore->GetZero(); } + acc->PlusEquals(*sentscore); + ++lc; + + } + + + assert(lc > 0); + float score = acc->ComputeScore(); + string details; + acc->ScoreDetails(&details); + cerr << "INIT RUN " << details << score << endl; + +} + + +int main(int argc, char** argv) { + register_feature_functions(); + SetSilent(true); // turn off verbose decoder output + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + + vector corpus; + //ReadTrainingCorpus(conf["source"].as(), &corpus); + + const string metric_name = conf["mt_metric"].as(); + optimizer = conf["optimizer"].as(); + fear_select = conf["fear"].as(); + hope_select = conf["hope"].as(); + mt_metric_scale = conf["mt_metric_scale"].as(); + approx_score = conf.count("approx_score"); + no_reweight = conf.count("no_reweight"); + no_select = conf.count("no_select"); + update_list_size = conf["update_k_best"].as(); + unique_kbest = conf.count("unique_k_best"); + pseudo_doc = true; + + const string weights_dir = conf["weights_output"].as(); + const string output_dir = conf["output_dir"].as(); + ScoreType type = ScoreTypeFromString(metric_name); + + //establish metric used for tuning + if (type == TER) { + invert_score = true; + // approx_score = false; + } else { + invert_score = false; + } + + //load references + DocScorer ds(type, conf["reference"].as >(), ""); + cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; + vector corpus_bleu_sent_stats; + + //check training pass,if >0, then use previous iterations corpus bleu stats + cur_pass = conf["passes"].as(); + if(cur_pass > 0) + { + ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir); + } + /* if (ds.size() != corpus.size()) { + cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; + return 1; + }*/ + cerr << "Optimizing with " << optimizer << endl; + // load initial weights + /*Weights weights; + weights.InitFromFile(conf["input_weights"].as()); + SparseVector lambdas; + weights.InitSparseVector(&lambdas); + */ + + + + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + + vector& dense_weights = decoder.CurrentWeightVector(); + + SparseVector lambdas; + Weights::InitFromFile(conf["input_weights"].as(), &dense_weights); + Weights::InitSparseVector(dense_weights, &lambdas); + + const string input = decoder.GetConf()["input"].as(); + //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary"); + if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; + ReadFile in_read(input); + istream *in = in_read.stream(); + assert(*in); + string buf; + + const double max_step_size = conf["max_step_size"].as(); + + + // assert(corpus.size() > 0); + vector oracles(ds.size()); + + TrainingObserver observer(conf["k_best_size"].as(), ds, &oracles, &corpus_bleu_sent_stats); + + int cur_sent = 0; + int lcount = 0; + double objective=0; + double tot_loss = 0; + int dots = 0; + // int cur_pass = 1; + // vector dense_weights; + SparseVector tot; + SparseVector final_tot; + // tot += lambdas; // initial weights + // lcount++; // count for initial weights + + //string msg = "# MIRA tuned weights"; + // while (cur_pass <= max_iteration) { + SparseVector old_lambdas = lambdas; + tot.clear(); + tot += lambdas; + cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; + ScoreP acc, acc_h, acc_f; + + while(*in) { + getline(*in, buf); + if (buf.empty()) continue; + //for (cur_sent = 0; cur_sent < corpus.size(); cur_sent++) { + + cerr << "SENT: " << cur_sent << endl; + //TODO: allow batch updating + //dense_weights.clear(); + //weights.InitFromVector(lambdas); + //weights.InitVector(&dense_weights); + //decoder.SetWeights(dense_weights); + lambdas.init_vector(&dense_weights); + dense_weights_g = dense_weights; + decoder.SetId(cur_sent); + decoder.Decode(buf, &observer); // decode the sentence, calling Notify to get the hope,fear, and model best hyps. + + cur_sent = observer.GetCurrentSent(); + const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis(); + const HypothesisInfo& cur_good = *oracles[cur_sent].good[0]; + const HypothesisInfo& cur_bad = *oracles[cur_sent].bad[0]; + + vector >& cur_good_v = oracles[cur_sent].good; + vector >& cur_bad_v = oracles[cur_sent].bad; + vector > cur_best_v = observer.GetCurrentBest(); + + tot_loss += cur_hyp.mt_metric; + + //score hyps to be able to compute corpus level bleu after we finish this iteration through the corpus + ScoreP sentscore = ds[cur_sent]->ScoreCandidate(cur_hyp.hyp); + if (!acc) { acc = sentscore->GetZero(); } + acc->PlusEquals(*sentscore); + + ScoreP hope_sentscore = ds[cur_sent]->ScoreCandidate(cur_good.hyp); + if (!acc_h) { acc_h = hope_sentscore->GetZero(); } + acc_h->PlusEquals(*hope_sentscore); + + ScoreP fear_sentscore = ds[cur_sent]->ScoreCandidate(cur_bad.hyp); + if (!acc_f) { acc_f = fear_sentscore->GetZero(); } + acc_f->PlusEquals(*fear_sentscore); + + if(optimizer == 4) { //single dual coordinate update, cur_good selected on BLEU score only (not model+BLEU) + // if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) { + + double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights); + double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric); + const double loss = margin + mt_loss; + cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) < 0.0) { + SparseVector diff = cur_good.features; + diff -= cur_bad.features; + + double diffsqnorm = diff.l2norm_sq(); + double delta; + if (diffsqnorm > 0) + delta = loss / (diffsqnorm); + else + delta = 0; + + //double step_size = loss / diff.l2norm_sq(); + cerr << loss << " " << delta << " " << diff << endl; + if (delta > max_step_size) delta = max_step_size; + lambdas += (cur_good.features * delta); + lambdas -= (cur_bad.features * delta); + //cerr << "L: " << lambdas << endl; + // } + // } + } + else if(optimizer == 1) //sgd - nonadapted step size + { + + lambdas += (cur_good.features) * max_step_size; + lambdas -= (cur_bad.features) * max_step_size; + } + //cerr << "L: " << lambdas << endl; + else if(optimizer == 5) //full mira with n-best list of constraints from oracle, fear, best + { + vector > cur_constraint; + cur_constraint.insert(cur_constraint.begin(), cur_bad_v.begin(), cur_bad_v.end()); + cur_constraint.insert(cur_constraint.begin(), cur_best_v.begin(), cur_best_v.end()); + cur_constraint.insert(cur_constraint.begin(), cur_good_v.begin(), cur_good_v.end()); + + bool optimize_again; + vector > cur_pair; + //SMO + for(int u=0;u!=cur_constraint.size();u++) + cur_constraint[u]->alpha =0; + + cur_constraint[0]->alpha =1; //set oracle to alpha=1 + + cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl; + int smo_iter = 10, smo_iter2 = 10; + int iter, iter2 =0; + bool DEBUG_SMO = false; + while (iter2 < smo_iter2) + { + iter =0; + while (iter < smo_iter) + { + optimize_again = true; + for (int i = 0; i< cur_constraint.size(); i++) + for (int j = i+1; j< cur_constraint.size(); j++) + { + if(DEBUG_SMO) cerr << "start " << i << " " << j << endl; + cur_pair.clear(); + cur_pair.push_back(cur_constraint[j]); + cur_pair.push_back(cur_constraint[i]); + double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); + + if (delta == 0) optimize_again = false; + // cur_pair[0]->alpha += delta; + // cur_pair[1]->alpha -= delta; + cur_constraint[j]->alpha += delta; + cur_constraint[i]->alpha -= delta; + double step_size = delta * max_step_size; + /*lambdas += (cur_pair[1]->features) * step_size; + lambdas -= (cur_pair[0]->features) * step_size;*/ + lambdas += (cur_constraint[i]->features) * step_size; + lambdas -= (cur_constraint[j]->features) * step_size; + if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << i << " " << j << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; + + //reload weights based on update + /*dense_weights.clear(); + weights.InitFromVector(lambdas); + weights.InitVector(&dense_weights);*/ + } + iter++; + + if(!optimize_again) + { + iter = 100; + cerr << "Optimization stopped, delta =0" << endl; + } + + + } + iter2++; + } + + + } + else if(optimizer == 2 || optimizer == 3) //1-fear and cutting plane mira + { + bool DEBUG_SMO= true; + vector > cur_constraint; + cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set + bool optimize_again = true; + int cut_plane_calls = 0; + while (optimize_again) + { + if(DEBUG_SMO) cerr<< "optimize again: " << optimize_again << endl; + if(optimizer == 2){ //1-fear + cur_constraint.push_back(cur_bad_v[0]); + + //check if we have a violation + if(!(cur_constraint[1]->fear > cur_constraint[0]->fear + SMO_EPSILON)) + { + optimize_again = false; + cerr << "Constraint not violated" << endl; + } + } + else + { //cutting plane to add constraints + if(DEBUG_SMO) cerr<< "Cutting Plane " << cut_plane_calls << " with " << lambdas << endl; + optimize_again = false; + cut_plane_calls++; + CuttingPlane(&cur_constraint, &optimize_again, oracles[cur_sent].bad, dense_weights); + if (cut_plane_calls >= MAX_SMO) optimize_again = false; + } + + if(optimize_again) + { + //SMO + for(int u=0;u!=cur_constraint.size();u++) + { + cur_constraint[u]->alpha =0; + //cur_good_v[0]->alpha = 1; cur_bad_v[0]->alpha = 0; + } + cur_constraint[0]->alpha = 1; + cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl; + int smo_iter = MAX_SMO; + int iter =0; + while (iter < smo_iter) + { + //select pair to optimize from constraint set + vector > cur_pair = SelectPair(&cur_constraint); + + if(cur_pair.empty()){iter=MAX_SMO; cerr << "Undefined pair " << endl; continue;} //pair is undefined so we are done with this smo + + //double num = cur_good_v[0]->fear - cur_bad_v[0]->fear; + /*double loss = cur_good_v[0]->oracle_loss - cur_bad_v[0]->oracle_loss; + double margin = cur_good_v[0]->oracle_feat_diff.dot(dense_weights) - cur_bad_v[0]->oracle_feat_diff.dot(dense_weights); + double num = loss - margin; + SparseVector diff = cur_good_v[0]->features; + diff -= cur_bad_v[0]->features; + double delta = num / (diff.l2norm_sq() * max_step_size); + delta = max(-cur_good_v[0]->alpha, min(delta, cur_bad_v[0]->alpha)); + cur_good_v[0]->alpha += delta; + cur_bad_v[0]->alpha -= delta; + double step_size = delta * max_step_size; + lambdas += (cur_bad_v[0]->features) * step_size; + lambdas -= (cur_good_v[0]->features) * step_size; + */ + + double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); + + cur_pair[0]->alpha += delta; + cur_pair[1]->alpha -= delta; + double step_size = delta * max_step_size; + /* lambdas += (cur_pair[1]->oracle_feat_diff) * step_size; + lambdas -= (cur_pair[0]->oracle_feat_diff) * step_size;*/ + + cerr << "step " << step_size << endl; + double alpha_sum=0; + SparseVector temp_lambdas = lambdas; + + for(int u=0;u!=cur_constraint.size();u++) + { + cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << endl; + temp_lambdas += (cur_constraint[u]->oracleN->features-cur_constraint[u]->features) * cur_constraint[u]->alpha * step_size; + alpha_sum += cur_constraint[u]->alpha; + } + cerr << "Alpha sum " << alpha_sum << " " << temp_lambdas << endl; + + lambdas += (cur_pair[1]->features) * step_size; + lambdas -= (cur_pair[0]->features) * step_size; + cerr << " Lambdas " << lambdas << endl; + //reload weights based on update + dense_weights.clear(); + //weights.InitFromVector(lambdas); + //weights.InitVector(&dense_weights); + lambdas.init_vector(&dense_weights); + dense_weights_g = dense_weights; + iter++; + + if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; + // cerr << "SMO opt " << iter << " " << delta << " " << cur_good_v[0]->alpha << " " << cur_bad_v[0]->alpha << endl; + if(no_select) //don't use selection heuristic to determine when to stop SMO, rather just when delta =0 + if (delta == 0) iter = MAX_SMO; + + //only perform one dual coordinate ascent step + if(optimizer == 2) + { + optimize_again = false; + iter = MAX_SMO; + } + + } + if(optimizer == 3) + { + if(!no_reweight) + { + if(DEBUG_SMO) cerr<< "Decoding with new weights -- now orac are " << oracles[cur_sent].good.size() << endl; + Hypergraph hg = observer.GetCurrentForest(); + hg.Reweight(dense_weights); + //observer.UpdateOracles(cur_sent, hg); + if(unique_kbest) + observer.UpdateOracles(cur_sent, hg); + else + observer.UpdateOracles > >(cur_sent, hg); + + + } + } + } + + + } + + //print objective after this sentence + double lambda_change = (lambdas - old_lambdas).l2norm_sq(); + double max_fear = cur_constraint[cur_constraint.size()-1]->fear; + double temp_objective = 0.5 * lambda_change;// + max_step_size * max_fear; + + for(int u=0;u!=cur_constraint.size();u++) + { + cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << " " << cur_constraint[u]->fear << endl; + temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear; + } + objective += temp_objective; + + cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; + } + + + if ((cur_sent * 40 / ds.size()) > dots) { ++dots; cerr << '.'; } + tot += lambdas; + ++lcount; + cur_sent++; + + cout << TD::GetString(cur_good_v[0]->hyp) << " ||| " << TD::GetString(cur_best_v[0]->hyp) << " ||| " << TD::GetString(cur_bad_v[0]->hyp) << endl; + + //clear good/bad lists from oracles for this sentences - you want to keep them around for things + + // oracles[cur_sent].good.clear(); + //oracles[cur_sent].bad.clear(); + } + + cerr << "FINAL OBJECTIVE: "<< objective << endl; + final_tot += tot; + cerr << "Translated " << lcount << " sentences " << endl; + cerr << " [AVG METRIC LAST PASS=" << (tot_loss / lcount) << "]\n"; + tot_loss = 0; + /* + float corpus_score = acc->ComputeScore(); + string corpus_details; + acc->ScoreDetails(&corpus_details); + cerr << "MODEL " << corpus_details << endl; + cout << corpus_score << endl; + + corpus_score = acc_h->ComputeScore(); + acc_h->ScoreDetails(&corpus_details); + cerr << "HOPE " << corpus_details << endl; + cout << corpus_score << endl; + + corpus_score = acc_f->ComputeScore(); + acc_f->ScoreDetails(&corpus_details); + cerr << "FEAR " << corpus_details << endl; + cout << corpus_score << endl; + */ + int node_id = rng->next() * 100000; + cerr << " Writing weights to " << node_id << endl; + Weights::ShowLargestFeatures(dense_weights); + dots = 0; + ostringstream os; + os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz"; + string msg = "# MIRA tuned weights ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); + //Weights.InitFromVector(lambdas); + lambdas.init_vector(&dense_weights); + Weights::WriteToFile(os.str(), dense_weights, true, &msg); + + SparseVector x = tot; + x /= lcount; + ostringstream sa; + string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); + sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz"; + //Weights ww; + //ww.InitFromVector(x); + x.init_vector(&dense_weights); + Weights::WriteToFile(sa.str(), dense_weights, true, &msga); + + //assign averaged lambdas to initialize next iteration + //lambdas = x; + + /* double lambda_change = (old_lambdas - lambdas).l2norm_sq(); + cerr << "Change in lambda " << lambda_change << endl; + + if ( lambda_change < EPSILON) + { + cur_pass = max_iteration; + cerr << "Weights converged - breaking" << endl; + } + + ++cur_pass; + */ + + //} iteration while loop + + /* cerr << endl; + weights.WriteToFile("weights.mira-final.gz", true, &msg); + final_tot /= (lcount + 1);//max_iteration); + tot /= (corpus.size() + 1); + weights.InitFromVector(final_tot); + cerr << tot << "||||" << final_tot << endl; + msg = "# MIRA tuned weights (averaged vector)"; + weights.WriteToFile("weights.mira-final-avg.gz", true, &msg); + */ + cerr << "Optimization complete.\\AVERAGED WEIGHTS: weights.mira-final-avg.gz\n"; + return 0; +} + diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl new file mode 100755 index 00000000..f4d61407 --- /dev/null +++ b/training/mira/run_mira.pl @@ -0,0 +1,548 @@ +#!/usr/bin/env perl +use strict; +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); +push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use IPC::Open2; +use POSIX ":sys_wait_h"; +my $QSUB_CMD = qsub_args(mert_memory()); + +require "libcall.pl"; + + +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; + +my $iteration = 0.0; +my $max_iterations = 6; +my $metric = "ibm_bleu"; +my $iniFile; +my $weights; +my $initialWeights; +my $decode_nodes = 1; # number of decode nodes +my $pmem = "1g"; +my $dir; + +my $SCORER = $FAST_SCORE; +my $local_server = "$bin_dir/local_parallelize.pl"; +my $parallelize = "$bin_dir/../dpmert/parallelize.pl"; +my $libcall = "$bin_dir/../dpmert/libcall.pl"; +my $sentserver = "$bin_dir/../dpmert/sentserver"; +my $sentclient = "$bin_dir/../dpmert/sentclient"; +my $run_local_server = 0; +my $run_local = 0; +my $usefork; +my $pass_suffix = ''; + +my $cdec ="$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv"; + +#my $cdec ="$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv"; +die "Can't find decoder in $cdec" unless -x $cdec; +my $decoder = $cdec; +my $decoderOpt; +my $update_size=250; +my $approx_score; +my $kbest_size=250; +my $metric_scale=1; +my $optimizer=2; +my $disable_clean = 0; +my $use_make; # use make to parallelize line search +my $density_prune; +my $cpbin=1; +my $help = 0; +my $epsilon = 0.0001; +my $step_size = 0.01; +my $gpref; +my $unique_kbest; +my $freeze; +my $latent; +my $sample_max; +my $hopes=1; +my $fears=1; + +my $range = 35000; +my $minimum = 15000; +my $portn = int(rand($range)) + $minimum; + + +# Process command-line options +Getopt::Long::Configure("no_auto_abbrev"); +if (GetOptions( + "decoder=s" => \$decoderOpt, + "decode-nodes=i" => \$decode_nodes, + "density-prune=f" => \$density_prune, + "dont-clean" => \$disable_clean, + "pass-suffix=s" => \$pass_suffix, + "use-fork" => \$usefork, + "epsilon=s" => \$epsilon, + "help" => \$help, + "local" => \$run_local, + "local_server" => \$run_local_server, + "use-make=i" => \$use_make, + "max-iterations=i" => \$max_iterations, + "pmem=s" => \$pmem, + "cpbin!" => \$cpbin, + "ref-files=s" => \$refFiles, + "metric=s" => \$metric, + "source-file=s" => \$srcFile, + "weights=s" => \$initialWeights, + "optimizer=i" => \$optimizer, + "metric-scale=i" => \$metric_scale, + "kbest-size=i" => \$kbest_size, + "update-size=i" => \$update_size, + "step-size=f" => \$step_size, + "hope-select=i" => \$hopes, + "fear-select=i" => \$fears, + "approx-score" => \$approx_score, + "unique-kbest" => \$unique_kbest, + "latent" => \$latent, + "sample-max=i" => \$sample_max, + "grammar-prefix=s" => \$gpref, + "freeze" => \$freeze, + "workdir=s" => \$dir, + ) == 0 || @ARGV!=1 || $help) { + print_help(); + exit; +} + +($iniFile) = @ARGV; + + +sub write_config; +sub enseg; +sub print_help; + +my $nodelist; +my $host =check_output("hostname"); chomp $host; +my $bleu; +my $interval_count = 0; +my $logfile; +my $projected_score; + + +#my $refs_comma_sep = get_comma_sep_refs($refFiles); +my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); + +#my $refs_comma_sep_4cdec = get_comma_sep_refs_4cdec($refFiles); + +unless ($dir){ + $dir = "mira"; +} +unless ($dir =~ /^\//){ # convert relative path to absolute path + my $basedir = check_output("pwd"); + chomp $basedir; + $dir = "$basedir/$dir"; +} + +if ($decoderOpt){ $decoder = $decoderOpt; } + +# Initializations and helper functions +srand; + +my @childpids = (); +my @cleanupcmds = (); + +sub cleanup { + print STDERR "Cleanup...\n"; + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } + exit 1; +}; + +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; +$SIG{INT} = "cleanup"; +$SIG{TERM} = "cleanup"; +$SIG{HUP} = "cleanup"; + + +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; +my $newIniFile = "$dir/$decoderBase.ini"; +my $inputFileName = "$dir/input"; +my $user = $ENV{"USER"}; + + +# process ini file +-e $iniFile || die "Error: could not open $iniFile for reading\n"; +open(INI, $iniFile); + +use File::Basename qw(basename); +#pass bindir, refs to vars holding bin +sub modbin { + local $_; + my $bindir=shift; + check_call("mkdir -p $bindir"); + -d $bindir || die "couldn't make bindir $bindir"; + for (@_) { + my $src=$$_; + $$_="$bindir/".basename($src); + check_call("cp -p $src $$_"); + } +} +sub dirsize { + opendir ISEMPTY,$_[0]; + return scalar(readdir(ISEMPTY))-1; +} + + + + +if (-e $dir && dirsize($dir)>1 && -e "$dir/weights" ){ # allow preexisting logfile, binaries, but not dist-vest.pl outputs + die "ERROR: working dir $dir already exists\n\n"; +} else { + -e $dir || mkdir $dir; + mkdir "$dir/scripts"; + my $cmdfile="$dir/rerun-mira.sh"; + open CMD,'>',$cmdfile; + print CMD "cd ",&getcwd,"\n"; + my $cline=&cmdline."\n"; + print CMD $cline; + close CMD; + print STDERR $cline; + chmod(0755,$cmdfile); + unless (-e $initialWeights) { + print STDERR "Please specify an initial weights file with --initial-weights\n"; + print_help(); + exit; + } + check_call("cp $initialWeights $dir/weights.0"); + die "Can't find weights.0" unless (-e "$dir/weights.0"); +} +write_config(*STDERR); + +# Generate initial files and values +check_call("cp $iniFile $newIniFile"); +$iniFile = $newIniFile; + +my $newsrc = "$dir/dev.input"; +enseg($srcFile, $newsrc, $gpref); + +$srcFile = $newsrc; +my $devSize = 0; +open F, "<$srcFile" or die "Can't read $srcFile: $!"; +while() { $devSize++; } +close F; + +my $lastPScore = 0; +my $lastWeightsFile; + +# main optimization loop +#while (1){ +for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { + + print STDERR "\n\nITERATION $opt_iter\n==========\n"; + print STDERR "Using port $portn\n"; + + # iteration-specific files + my $runFile="$dir/run.raw.$opt_iter"; + my $onebestFile="$dir/1best.$opt_iter"; + my $logdir="$dir/logs.$opt_iter"; + my $decoderLog="$logdir/decoder.sentserver.log.$opt_iter"; + my $scorerLog="$logdir/scorer.log.$opt_iter"; + my $weightdir="$dir/weights.pass$opt_iter/"; + check_call("mkdir -p $logdir"); + check_call("mkdir -p $weightdir"); + + #decode + print STDERR "RUNNING DECODER AT "; + print STDERR unchecked_output("date"); +# my $im1 = $opt_iter - 1; + my $weightsFile="$dir/weights.$opt_iter"; + print "ITER $iteration " ; + my $cur_pass = "-p 0$opt_iter"; + my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile $refs_comma_sep -m $metric -s $metric_scale -a -b $update_size -k $kbest_size -o $optimizer $cur_pass -O $weightdir -D $dir -h $hopes -f $fears -C $step_size"; + if($unique_kbest){ + $decoder_cmd .= " -u"; + } + if($latent){ + $decoder_cmd .= " -l"; + } + if($sample_max){ + $decoder_cmd .= " -t $sample_max"; + } + if ($density_prune) { + $decoder_cmd .= " --density_prune $density_prune"; + } + my $pcmd; + if ($run_local) { + $pcmd = "cat $srcFile |"; + } elsif ($use_make) { + # TODO: Throw error when decode_nodes is specified along with use_make + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --"; + } elsif ($run_local_server){ + $pcmd = "cat $srcFile | $local_server $usefork -p $pmem -e $logdir -n $decode_nodes --"; + } + else { + $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --baseport $portn --"; + } + my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + + my $retries = 0; + my $num_topbest; + while($retries < 5) { + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_topbest); + + + #score the output from this iteration + open RUN, "<$runFile" or die "Can't read $runFile: $!"; + open H, ">$runFile.H" or die; + open F, ">$runFile.F" or die; + open B, ">$runFile.B" or die; + while() { + chomp(); + (my $hope,my $best,my $fear) = split(/ \|\|\| /); + print H "$hope \n"; + print B "$best \n"; + print F "$fear \n"; + } + close RUN; + close F; close B; close H; + + my $dec_score = check_output("cat $runFile.B | $SCORER $refs_comma_sep -l $metric"); + my $dec_score_h = check_output("cat $runFile.H | $SCORER $refs_comma_sep -l $metric"); + my $dec_score_f = check_output("cat $runFile.F | $SCORER $refs_comma_sep -l $metric"); + chomp $dec_score; chomp $dec_score_h; chomp $dec_score_f; + print STDERR "DECODER SCORE: $dec_score HOPE: $dec_score_h FEAR: $dec_score_f\n"; + + # save space + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); + my $iter_filler=""; + if($opt_iter < 10) + {$iter_filler="0";} + + my $nextIter = $opt_iter + 1; + my $newWeightsFile = "$dir/weights.$nextIter"; + $lastWeightsFile = "$dir/weights.$opt_iter"; + + average_weights("$weightdir/weights.mira-pass*.*[0-9].gz", $newWeightsFile, $logdir); +# check_call("cp $lastW $newWeightsFile"); +# if ($icc < 2) { +# print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; +# last; +# } + system("gzip -f $logdir/kbes*"); + print STDERR "\n==========\n"; + $iteration++; +} +#find +#my $cmd = `grep SCORE /fs/clip-galep5/lexical_tm/log.runmira.nist.20 | cat -n | sort -k +2 | tail -1`; +#$cmd =~ m/([0-9]+)/; +#$lastWeightsFile = "$dir/weights.$1"; +#check_call("ln -s $lastWeightsFile $dir/weights.tuned"); +print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; + +print STDOUT "$lastWeightsFile\n"; + +sub get_lines { + my $fn = shift @_; + open FL, "<$fn" or die "Couldn't read $fn: $!"; + my $lc = 0; + while() { $lc++; } + return $lc; +} + +sub get_comma_sep_refs { + my ($r,$p) = @_; + my $o = check_output("echo $p"); + chomp $o; + my @files = split /\s+/, $o; + return "-$r " . join(" -$r ", @files); +} + + +sub read_weights_file { + my ($file) = @_; + open F, "<$file" or die "Couldn't read $file: $!"; + my @r = (); + my $pm = -1; + while() { + next if /^#/; + next if /^\s*$/; + chomp; + if (/^(.+)\s+(.+)$/) { + my $m = $1; + my $w = $2; + die "Weights out of order: $m <= $pm" unless $m > $pm; + push @r, $w; + } else { + warn "Unexpected feature name in weight file: $_"; + } + } + close F; + return join ' ', @r; +} + +sub write_config { + my $fh = shift; + my $cleanup = "yes"; + if ($disable_clean) {$cleanup = "no";} + + print $fh "\n"; + print $fh "DECODER: $decoder\n"; + print $fh "INI FILE: $iniFile\n"; + print $fh "WORKING DIR: $dir\n"; + print $fh "SOURCE (DEV): $srcFile\n"; + print $fh "REFS (DEV): $refFiles\n"; + print $fh "EVAL METRIC: $metric\n"; + print $fh "START ITERATION: $iteration\n"; + print $fh "MAX ITERATIONS: $max_iterations\n"; + print $fh "DECODE NODES: $decode_nodes\n"; + print $fh "HEAD NODE: $host\n"; + print $fh "PMEM (DECODING): $pmem\n"; + print $fh "CLEANUP: $cleanup\n"; + print $fh "INITIAL WEIGHTS: $initialWeights\n"; + print $fh "GRAMMAR PREFIX: $gpref\n"; +} + +sub update_weights_file { + my ($neww, $rfn, $rpts) = @_; + my @feats = @$rfn; + my @pts = @$rpts; + my $num_feats = scalar @feats; + my $num_pts = scalar @pts; + die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; + open G, ">$neww" or die; + for (my $i = 0; $i < $num_feats; $i++) { + my $f = $feats[$i]; + my $lambda = $pts[$i]; + print G "$f $lambda\n"; + } + close G; +} + +sub enseg { + my $src = shift; + my $newsrc = shift; + my $grammarpref = shift; + + open(SRC, $src); + open(NEWSRC, ">$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } + elsif (defined $grammarpref) { + print NEWSRC "$line\n";} + else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; +} + +sub print_help { + print "Something wrong\n"; +} + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} + +sub average_weights { + + my $path = shift; + my $out = shift; + my $logpath = shift; + print "AVERAGE $path $out\n"; + my %feature_weights= (); + my $total =0; + my $total_mult =0; + sleep(10); + foreach my $file (glob "$path") + { + $file =~ /\/([^\/]+).gz$/; + my $fname = $1; + my $cmd = "gzip -d $file"; + $file =~ s/\.gz//; + check_bash_call($cmd); + my $mult = 0; + print "FILE $file \n"; + open SCORE, "< $file" or next; + $total++; + while( ) { + my $line = $_; + if ($line !~ m/^\#/) + { + my @s = split(" ",$line); + $feature_weights{$s[0]}+= $mult * $s[1]; + } + else + { + (my $msg,my $ran,$mult) = split(/ \|\|\| /); + print "RAN $ran $mult\n"; + } + } + $total_mult += $mult; + + close SCORE; + $cmd = "gzip $file"; check_bash_call($cmd); + } + +#print out new averaged weights + open OUT, "> $out" or next; + for my $f ( keys %feature_weights ) { + print "$f $feature_weights{$f} $total_mult\n"; + my $ave = $feature_weights{$f} / $total_mult; + + print "Printing $f $ave ||| "; + print OUT "$f $ave\n"; + } + +} -- cgit v1.2.3 From 2d58182ec6c961fe2f08f4a88886f3e128fb0113 Mon Sep 17 00:00:00 2001 From: Vladimir Eidelman Date: Sat, 13 Apr 2013 21:57:37 -0400 Subject: mira run script --- environment/LocalConfig.pm | 2 +- training/mira/Makefile.am | 7 +- training/mira/kbest_cut_mira.cc | 1010 ++++++++++++++++++++++++++++++++++ training/mira/kbest_mirav5.cc | 1148 --------------------------------------- training/mira/run_mira.pl | 181 ++++-- 5 files changed, 1141 insertions(+), 1207 deletions(-) create mode 100644 training/mira/kbest_cut_mira.cc delete mode 100644 training/mira/kbest_mirav5.cc (limited to 'training') diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 627f7f8c..f7c3b1c7 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -34,7 +34,7 @@ my $CCONFIG = { #'QSubQueue' => '-q long', }, 'UMIACS' => { - 'HOST_REGEXP' => qr/^d.*\.umiacs\.umd\.edu$/, + 'HOST_REGEXP' => qr/^(n|s|d).*\.umiacs\.umd\.edu$/, 'JobControl' => 'qsub', 'QSubMemFlag' => '-l pmem=', 'QSubQueue' => '-q batch', diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am index fa4fb22d..8cddc2d7 100644 --- a/training/mira/Makefile.am +++ b/training/mira/Makefile.am @@ -1,6 +1,11 @@ -bin_PROGRAMS = kbest_mira +bin_PROGRAMS = kbest_mira \ + kbest_cut_mira kbest_mira_SOURCES = kbest_mira.cc kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a + +kbest_cut_mira_SOURCES = kbest_cut_mira.cc +kbest_cut_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a + AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc new file mode 100644 index 00000000..34eb00dc --- /dev/null +++ b/training/mira/kbest_cut_mira.cc @@ -0,0 +1,1010 @@ +#include +#include +#include +#include +#include +#include + +#include "config.h" + + +#include +#include +#include + +#include "sentence_metadata.h" +#include "scorer.h" +#include "verbose.h" +#include "viterbi.h" +#include "hg.h" +#include "prob.h" +#include "kbest.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "fdict.h" +#include "time.h" +#include "sampler.h" + +#include "weights.h" +#include "sparse_vector.h" + +using namespace std; +using boost::shared_ptr; +namespace po = boost::program_options; + +bool invert_score; +boost::shared_ptr rng; +bool approx_score; +bool no_reweight; +bool no_select; +bool unique_kbest; +int update_list_size; +vector dense_weights_g; +double mt_metric_scale; +int optimizer; +int fear_select; +int hope_select; +bool pseudo_doc; +bool sent_approx; +bool checkloss; + +void SanityCheck(const vector& w) { + for (int i = 0; i < w.size(); ++i) { + assert(!isnan(w[i])); + assert(!isinf(w[i])); + } +} + +struct FComp { + const vector& w_; + FComp(const vector& w) : w_(w) {} + bool operator()(int a, int b) const { + return fabs(w_[a]) > fabs(w_[b]); + } +}; + +void ShowLargestFeatures(const vector& w) { + vector fnums(w.size()); + for (int i = 0; i < w.size(); ++i) + fnums[i] = i; + vector::iterator mid = fnums.begin(); + mid += (w.size() > 10 ? 10 : w.size()); + partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); + cerr << "TOP FEATURES:"; + for (vector::iterator i = fnums.begin(); i != mid; ++i) { + cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; + } + cerr << endl; +} + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input_weights,w",po::value(),"Input feature weights file") + ("source,i",po::value(),"Source file for development set") + ("pass,p", po::value()->default_value(15), "Current pass through the training data") + ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") + ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("optimizer,o",po::value()->default_value(1), "Optimizer (SGD=1, PA MIRA w/Delta=2, Cutting Plane MIRA=3, PA MIRA=4, Triple nbest list MIRA=5)") + ("fear,f",po::value()->default_value(1), "Fear selection (model-cost=1, maxcost=2, maxscore=3)") + ("hope,h",po::value()->default_value(1), "Hope selection (model+cost=1, mincost=2)") + ("max_step_size,C", po::value()->default_value(0.01), "regularization strength (C)") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") + ("sent_approx,a", "Use smoothed sentence-level BLEU score for approximate scoring") + ("pseudo_doc,e", "Use pseudo-document BLEU score for approximate scoring") + ("no_reweight,d","Do not reweight forest for cutting plane") + ("no_select,n", "Do not use selection heuristic") + ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") + ("update_k_best,b", po::value()->default_value(1), "Size of good, bad lists to perform update with") + ("unique_k_best,u", "Unique k-best translation list") + ("weights_output,O",po::value(),"Directory to write weights to") + ("output_dir,D",po::value(),"Directory to place output in") + ("decoder_config,c",po::value(),"Decoder configuration file"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,H", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("input_weights") || !conf->count("decoder_config") || !conf->count("reference")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +//load previous translation, store array of each sentences score, subtract it from current sentence and replace with new translation score + + +static const double kMINUS_EPSILON = -1e-6; +static const double EPSILON = 0.000001; +static const double SMO_EPSILON = 0.0001; +static const double PSEUDO_SCALE = 0.95; +static const int MAX_SMO = 10; +int cur_pass; + +struct HypothesisInfo { + SparseVector features; + vector hyp; + double mt_metric; + double hope; + double fear; + double alpha; + double oracle_loss; + SparseVector oracle_feat_diff; + shared_ptr oracleN; +}; + +bool ApproxEqual(double a, double b) { + if (a == b) return true; + return (fabs(a-b)/fabs(b)) < EPSILON; +} + +typedef shared_ptr HI; +bool HypothesisCompareB(const HI& h1, const HI& h2 ) +{ + return h1->mt_metric > h2->mt_metric; +}; + + +bool HopeCompareB(const HI& h1, const HI& h2 ) +{ + return h1->hope > h2->hope; +}; + +bool FearCompareB(const HI& h1, const HI& h2 ) +{ + return h1->fear > h2->fear; +}; + +bool FearComparePred(const HI& h1, const HI& h2 ) +{ + return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g); +}; + +bool HypothesisCompareG(const HI& h1, const HI& h2 ) +{ + return h1->mt_metric < h2->mt_metric; +}; + + +void CuttingPlane(vector >* cur_c, bool* again, vector >& all_hyp, vector dense_weights) +{ + bool DEBUG_CUT = false; + shared_ptr max_fear, max_fear_in_set; + vector >& cur_constraint = *cur_c; + + if(no_reweight) + { + //find new hope hypothesis + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights); + all_hyp[u]->hope = 1 * all_hyp[u]->mt_metric + t_score; + } + + //sort hyps by hope score + sort(all_hyp.begin(),all_hyp.end(),HopeCompareB); + + double hope_score = all_hyp[0]->features.dot(dense_weights); + if(DEBUG_CUT) cerr << "New hope derivation score " << hope_score << endl; + + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights); + //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; + + all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss + // all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric; + //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features; + // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; + } + + sort(all_hyp.begin(),all_hyp.end(),FearCompareB); + + } + //assign maximum fear derivation from all derivations + max_fear = all_hyp[0]; + + if(DEBUG_CUT) cerr <<"Cutting Plane Max Fear "<fear ; + for(int i=0; i < cur_constraint.size();i++) //select maximal violator already in constraint set + { + if (!max_fear_in_set || cur_constraint[i]->fear > max_fear_in_set->fear) + max_fear_in_set = cur_constraint[i]; + } + if(DEBUG_CUT) cerr << "Max Fear in constraint set " << max_fear_in_set->fear << endl; + + if(max_fear->fear > max_fear_in_set->fear + SMO_EPSILON) + { + cur_constraint.push_back(max_fear); + *again = true; + if(DEBUG_CUT) cerr << "Optimize Again " << *again << endl; + } +} + + +double ComputeDelta(vector >* cur_p, double max_step_size,vector dense_weights ) +{ + vector >& cur_pair = *cur_p; + double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss; + //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff? + //double num = loss - margin; + + + double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights)); + const double num = margin + loss; + cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights)) + - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights)); + */ + + SparseVector diff = cur_pair[0]->features; + diff -= cur_pair[1]->features; + /* SparseVector diff = cur_pair[0]->oracle_feat_diff; + diff -= cur_pair[1]->oracle_feat_diff;*/ + double diffsqnorm = diff.l2norm_sq(); + double delta; + if (diffsqnorm > 0) + delta = num / (diffsqnorm * max_step_size); + else + delta = 0; + cerr << " D1:" << delta; + //clip delta (enforce margin constraints) + + delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha)); + cerr << " D2:" << delta; + return delta; +} + + +vector > SelectPair(vector >* cur_c) +{ + bool DEBUG_SELECT= false; + vector >& cur_constraint = *cur_c; + + vector > pair; + + if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira + // if(optimizer == 2) { + pair.push_back(cur_constraint[0]); + pair.push_back(cur_constraint[1]); + return pair; + // } + } + + for(int u=0;u != cur_constraint.size();u++) + { + shared_ptr max_fear; + + if(DEBUG_SELECT) cerr<< "cur alpha " << u << " " << cur_constraint[u]->alpha; + for(int i=0; i < cur_constraint.size();i++) //select maximal violator + { + if(i != u) + if (!max_fear || cur_constraint[i]->fear > max_fear->fear) + max_fear = cur_constraint[i]; + } + if(!max_fear) return pair; // + + if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl; + + + if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON)) + { + for(int i=0; i < cur_constraint.size();i++) //select maximal violator + { + if(i != u) + if (cur_constraint[i]->alpha > 0) + { + pair.push_back(cur_constraint[u]); + pair.push_back(cur_constraint[i]); + cerr << "RETJURN from 1" << endl; + return pair; + } + } + } + if ((cur_constraint[u]->alpha > 0) && (cur_constraint[u]->fear < max_fear->fear - SMO_EPSILON)) + { + for(int i=0; i < cur_constraint.size();i++) //select maximal violator + { + if(i != u) + if (cur_constraint[i]->fear > cur_constraint[u]->fear) + { + pair.push_back(cur_constraint[u]); + pair.push_back(cur_constraint[i]); + return pair; + } + } + } + + } + return pair; //no more constraints to optimize, we're done here + +} + +struct GoodBadOracle { + vector > good; + vector > bad; +}; + +struct TrainingObserver : public DecoderObserver { + TrainingObserver(const int k, const DocScorer& d, vector* o, vector* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { + // TrainingObserver(const int k, const DocScorer& d, vector* o) : ds(d), oracles(*o), kbest_size(k) { + + //calculate corpus bleu score from previous iterations 1-best for BLEU gain + if(!pseudo_doc && !sent_approx) + if(cur_pass > 0) + { + ScoreP acc; + for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { + if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } + acc->PlusEquals(*corpus_bleu_sent_stats[ii]); + + } + corpus_bleu_stats = acc; + corpus_bleu_score = acc->ComputeScore(); + } + //corpus_src_length = 0; +} + const DocScorer& ds; + vector& corpus_bleu_sent_stats; + vector& oracles; + vector > cur_best; + shared_ptr cur_oracle; + const int kbest_size; + Hypergraph forest; + int cur_sent; + ScoreP corpus_bleu_stats; + float corpus_bleu_score; + + float corpus_src_length; + float curr_src_length; + + const int GetCurrentSent() const { + return cur_sent; + } + + const HypothesisInfo& GetCurrentBestHypothesis() const { + return *cur_best[0]; + } + + const vector > GetCurrentBest() const { + return cur_best; + } + + const HypothesisInfo& GetCurrentOracle() const { + return *cur_oracle; + } + + const Hypergraph& GetCurrentForest() const { + return forest; + } + + + virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + cur_sent = smeta.GetSentenceID(); + //cerr << "SOURCE " << smeta.GetSourceLength() << endl; + curr_src_length = (float) smeta.GetSourceLength(); + //UpdateOracles(smeta.GetSentenceID(), *hg); + if(unique_kbest) + UpdateOracles(smeta.GetSentenceID(), *hg); + else + UpdateOracles > >(smeta.GetSentenceID(), *hg); + forest = *hg; + + } + + shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double score, const vector& hyp) { + shared_ptr h(new HypothesisInfo); + h->features = feats; + h->mt_metric = score; + h->hyp = hyp; + return h; + } + + template + void UpdateOracles(int sent_id, const Hypergraph& forest) { + + bool PRINT_LIST= false; + vector >& cur_good = oracles[sent_id].good; + vector >& cur_bad = oracles[sent_id].bad; + //TODO: look at keeping previous iterations hypothesis lists around + cur_best.clear(); + cur_good.clear(); + cur_bad.clear(); + + vector > all_hyp; + + typedef KBest::KBestDerivations, ESentenceTraversal,Filter> K; + K kbest(forest,kbest_size); + + //KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); + for (int i = 0; i < kbest_size; ++i) { + //const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + typename K::Derivation *d = + kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + + float sentscore; + if(cur_pass > 0 && !pseudo_doc && !sent_approx) + { + ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield); + ScoreP corpus_no_best = corpus_bleu_stats->GetZero(); + + corpus_bleu_stats->Subtract(*corpus_bleu_sent_stats[sent_id], &*corpus_no_best); + sent_stats->PlusEquals(*corpus_no_best, 0.5); + + //compute gain from new sentence in 1-best corpus + sentscore = mt_metric_scale * (sent_stats->ComputeScore() - corpus_no_best->ComputeScore());// - corpus_bleu_score); + } + else if(pseudo_doc) //pseudo-corpus smoothing + { + float src_scale = corpus_src_length + curr_src_length; + ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield); + if(!corpus_bleu_stats){ corpus_bleu_stats = sent_stats->GetZero();} + + sent_stats->PlusEquals(*corpus_bleu_stats); + sentscore = mt_metric_scale * src_scale * sent_stats->ComputeScore(); + + } + else //use sentence-level smoothing ( used when cur_pass=0 if not pseudo_doc) + { + + sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore()); + } + + if (invert_score) sentscore *= -1.0; + + if (i < update_list_size){ + if(PRINT_LIST)cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; + cur_best.push_back( MakeHypothesisInfo(d->feature_values, sentscore, d->yield)); + } + + all_hyp.push_back(MakeHypothesisInfo(d->feature_values, sentscore,d->yield)); //store all hyp to extract hope and fear + } + + if(pseudo_doc){ + //update psuedo-doc stats + string details, details2; + corpus_bleu_stats->ScoreDetails(&details2); + ScoreP sent_stats = ds[sent_id]->ScoreCandidate(cur_best[0]->hyp); + corpus_bleu_stats->PlusEquals(*sent_stats); + + sent_stats->ScoreDetails(&details); + sent_stats = corpus_bleu_stats; + corpus_bleu_stats = sent_stats->GetZero(); + corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE); + + corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length); + cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n" << details2 << endl; + } + + + //figure out how many hyps we can keep maximum + int temp_update_size = update_list_size; + if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();} + + //sort all hyps by sentscore (eg. bleu) + sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB); + + if(PRINT_LIST){ cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++) cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; } + + if(hope_select == 1) + { + //find hope hypothesis using model + bleu + if (PRINT_LIST) cerr << "HOPE " << endl; + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights_g); + all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score; + if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; + + } + + //sort hyps by hope score + sort(all_hyp.begin(),all_hyp.end(),HopeCompareB); + } + + //assign cur_good the sorted list + cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); + if(PRINT_LIST) { cerr << "GOOD" << endl; for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;} + + shared_ptr& oracleN = cur_good[0]; + + + if(fear_select == 1){ //compute fear hyps with model - bleu + if (PRINT_LIST) cerr << "FEAR " << endl; + double hope_score = oracleN->features.dot(dense_weights_g); + + if (PRINT_LIST) cerr << "hope score " << hope_score << endl; + for(int u=0;u!=all_hyp.size();u++) + { + double t_score = all_hyp[u]->features.dot(dense_weights_g); + //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; + + /* all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss + all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric; + all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/ + + all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss + all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric; + all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features; + all_hyp[u]->oracleN=oracleN; + // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; + if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; + + } + + sort(all_hyp.begin(),all_hyp.end(),FearCompareB); + + cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); + } + else if(fear_select == 2) //select fear based on cost + { + cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); + reverse(cur_bad.begin(),cur_bad.end()); + } + else //pred-based, fear_select = 3 + { + sort(all_hyp.begin(),all_hyp.end(),FearComparePred); + cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); + } + + + if(PRINT_LIST){ cerr<< "BAD"<mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;} + + cerr << "GOOD (BEST): " << cur_good[0]->mt_metric << endl; + cerr << " CUR: " << cur_best[0]->mt_metric << endl; + cerr << " BAD (WORST): " << cur_bad[0]->mt_metric << endl; + } +}; + +void ReadTrainingCorpus(const string& fname, vector* c) { + + + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (!in) break; + c->push_back(line); + } +} + +void ReadPastTranslationForScore(const int cur_pass, vector* c, DocScorer& ds, const string& od) +{ + cerr << "Reading BLEU gain file "; + string fname; + if(cur_pass == 0) + { + fname = od + "/run.raw.init"; + } + else + { + int last_pass = cur_pass - 1; + fname = od + "/run.raw." + boost::lexical_cast(last_pass) + ".B"; + } + cerr << fname << "\n"; + ReadFile rf(fname); + istream& in = *rf.stream(); + ScoreP acc; + string line; + int lc = 0; + while(in) { + getline(in, line); + if (line.empty() && !in) break; + vector sent; + TD::ConvertSentence(line, &sent); + ScoreP sentscore = ds[lc]->ScoreCandidate(sent); + c->push_back(sentscore); + if (!acc) { acc = sentscore->GetZero(); } + acc->PlusEquals(*sentscore); + ++lc; + + } + + + assert(lc > 0); + float score = acc->ComputeScore(); + string details; + acc->ScoreDetails(&details); + cerr << "INIT RUN " << details << score << endl; + +} + + +int main(int argc, char** argv) { + register_feature_functions(); + SetSilent(true); // turn off verbose decoder output + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + + vector corpus; + //ReadTrainingCorpus(conf["source"].as(), &corpus); + + const string metric_name = conf["mt_metric"].as(); + optimizer = conf["optimizer"].as(); + fear_select = conf["fear"].as(); + hope_select = conf["hope"].as(); + mt_metric_scale = conf["mt_metric_scale"].as(); + approx_score = conf.count("approx_score"); + no_reweight = conf.count("no_reweight"); + no_select = conf.count("no_select"); + update_list_size = conf["update_k_best"].as(); + unique_kbest = conf.count("unique_k_best"); + pseudo_doc = conf.count("pseudo_doc"); + sent_approx = conf.count("sent_approx"); + cerr << "PSEUDO " << pseudo_doc << " SENT " << sent_approx << endl; + if(pseudo_doc) + mt_metric_scale=1; + + const string weights_dir = conf["weights_output"].as(); + const string output_dir = conf["output_dir"].as(); + ScoreType type = ScoreTypeFromString(metric_name); + + //establish metric used for tuning + if (type == TER) { + invert_score = true; + // approx_score = false; + } else { + invert_score = false; + } + + //load references + DocScorer ds(type, conf["reference"].as >(), ""); + cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; + vector corpus_bleu_sent_stats; + + //check training pass,if >0, then use previous iterations corpus bleu stats + cur_pass = conf["pass"].as(); + if(cur_pass > 0) + { + ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir); + } + /* if (ds.size() != corpus.size()) { + cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; + return 1; + }*/ + cerr << "Optimizing with " << optimizer << endl; + // load initial weights + /*Weights weights; + weights.InitFromFile(conf["input_weights"].as()); + SparseVector lambdas; + weights.InitSparseVector(&lambdas); + */ + + + + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + + vector& dense_weights = decoder.CurrentWeightVector(); + + SparseVector lambdas; + Weights::InitFromFile(conf["input_weights"].as(), &dense_weights); + Weights::InitSparseVector(dense_weights, &lambdas); + + const string input = decoder.GetConf()["input"].as(); + //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary"); + if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; + ReadFile in_read(input); + istream *in = in_read.stream(); + assert(*in); + string buf; + + const double max_step_size = conf["max_step_size"].as(); + + + // assert(corpus.size() > 0); + vector oracles(ds.size()); + + TrainingObserver observer(conf["k_best_size"].as(), ds, &oracles, &corpus_bleu_sent_stats); + + int cur_sent = 0; + int lcount = 0; + double objective=0; + double tot_loss = 0; + int dots = 0; + // int cur_pass = 1; + // vector dense_weights; + SparseVector tot; + SparseVector final_tot; + // tot += lambdas; // initial weights + // lcount++; // count for initial weights + + //string msg = "# MIRA tuned weights"; + // while (cur_pass <= max_iteration) { + SparseVector old_lambdas = lambdas; + tot.clear(); + tot += lambdas; + cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; + ScoreP acc, acc_h, acc_f; + + while(*in) { + getline(*in, buf); + if (buf.empty()) continue; + //TODO: allow batch updating + lambdas.init_vector(&dense_weights); + dense_weights_g = dense_weights; + decoder.SetId(cur_sent); + decoder.Decode(buf, &observer); // decode the sentence, calling Notify to get the hope,fear, and model best hyps. + + cur_sent = observer.GetCurrentSent(); + cerr << "SENT: " << cur_sent << endl; + const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis(); + const HypothesisInfo& cur_good = *oracles[cur_sent].good[0]; + const HypothesisInfo& cur_bad = *oracles[cur_sent].bad[0]; + + vector >& cur_good_v = oracles[cur_sent].good; + vector >& cur_bad_v = oracles[cur_sent].bad; + vector > cur_best_v = observer.GetCurrentBest(); + + tot_loss += cur_hyp.mt_metric; + + //score hyps to be able to compute corpus level bleu after we finish this iteration through the corpus + ScoreP sentscore = ds[cur_sent]->ScoreCandidate(cur_hyp.hyp); + if (!acc) { acc = sentscore->GetZero(); } + acc->PlusEquals(*sentscore); + + ScoreP hope_sentscore = ds[cur_sent]->ScoreCandidate(cur_good.hyp); + if (!acc_h) { acc_h = hope_sentscore->GetZero(); } + acc_h->PlusEquals(*hope_sentscore); + + ScoreP fear_sentscore = ds[cur_sent]->ScoreCandidate(cur_bad.hyp); + if (!acc_f) { acc_f = fear_sentscore->GetZero(); } + acc_f->PlusEquals(*fear_sentscore); + + if(optimizer == 4) { //passive-aggresive update (single dual coordinate step) + + double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights); + double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric); + const double loss = margin + mt_loss; + cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) < 0.0 || !checkloss) { + SparseVector diff = cur_good.features; + diff -= cur_bad.features; + + double diffsqnorm = diff.l2norm_sq(); + double delta; + if (diffsqnorm > 0) + delta = loss / (diffsqnorm); + else + delta = 0; + + if (delta > max_step_size) delta = max_step_size; + lambdas += (cur_good.features * delta); + lambdas -= (cur_bad.features * delta); + + } + } + else if(optimizer == 1) //sgd - nonadapted step size + { + + lambdas += (cur_good.features) * max_step_size; + lambdas -= (cur_bad.features) * max_step_size; + } + else if(optimizer == 5) //full mira with n-best list of constraints from hope, fear, model best + { + vector > cur_constraint; + cur_constraint.insert(cur_constraint.begin(), cur_bad_v.begin(), cur_bad_v.end()); + cur_constraint.insert(cur_constraint.begin(), cur_best_v.begin(), cur_best_v.end()); + cur_constraint.insert(cur_constraint.begin(), cur_good_v.begin(), cur_good_v.end()); + + bool optimize_again; + vector > cur_pair; + //SMO + for(int u=0;u!=cur_constraint.size();u++) + cur_constraint[u]->alpha =0; + + cur_constraint[0]->alpha =1; //set oracle to alpha=1 + + cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl; + int smo_iter = MAX_SMO, smo_iter2 = MAX_SMO; + int iter, iter2 =0; + bool DEBUG_SMO = false; + while (iter2 < smo_iter2) + { + iter =0; + while (iter < smo_iter) + { + optimize_again = true; + for (int i = 0; i< cur_constraint.size(); i++) + for (int j = i+1; j< cur_constraint.size(); j++) + { + if(DEBUG_SMO) cerr << "start " << i << " " << j << endl; + cur_pair.clear(); + cur_pair.push_back(cur_constraint[j]); + cur_pair.push_back(cur_constraint[i]); + double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); + + if (delta == 0) optimize_again = false; + cur_constraint[j]->alpha += delta; + cur_constraint[i]->alpha -= delta; + double step_size = delta * max_step_size; + + lambdas += (cur_constraint[i]->features) * step_size; + lambdas -= (cur_constraint[j]->features) * step_size; + if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << i << " " << j << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; + } + iter++; + + if(!optimize_again) + { + iter = MAX_SMO; + cerr << "Optimization stopped, delta =0" << endl; + } + } + iter2++; + } + } + else if(optimizer == 2 || optimizer == 3) //PA and Cutting Plane MIRA update + { + bool DEBUG_SMO= true; + vector > cur_constraint; + cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set + bool optimize_again = true; + int cut_plane_calls = 0; + while (optimize_again) + { + if(DEBUG_SMO) cerr<< "optimize again: " << optimize_again << endl; + if(optimizer == 2){ //PA + cur_constraint.push_back(cur_bad_v[0]); + + //check if we have a violation + if(!(cur_constraint[1]->fear > cur_constraint[0]->fear + SMO_EPSILON)) + { + optimize_again = false; + cerr << "Constraint not violated" << endl; + } + } + else + { //cutting plane to add constraints + if(DEBUG_SMO) cerr<< "Cutting Plane " << cut_plane_calls << " with " << lambdas << endl; + optimize_again = false; + cut_plane_calls++; + CuttingPlane(&cur_constraint, &optimize_again, oracles[cur_sent].bad, dense_weights); + if (cut_plane_calls >= MAX_SMO) optimize_again = false; + } + + if(optimize_again) + { + //SMO + for(int u=0;u!=cur_constraint.size();u++) + { + cur_constraint[u]->alpha =0; + } + cur_constraint[0]->alpha = 1; + cerr <<" Optimizing with " << cur_constraint.size() << " constraints" << endl; + int smo_iter = MAX_SMO; + int iter =0; + while (iter < smo_iter) + { + //select pair to optimize from constraint set + vector > cur_pair = SelectPair(&cur_constraint); + + if(cur_pair.empty()){ + iter=MAX_SMO; + cerr << "Undefined pair " << endl; + continue; + } //pair is undefined so we are done with this smo + + double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); + + cur_pair[0]->alpha += delta; + cur_pair[1]->alpha -= delta; + double step_size = delta * max_step_size; + cerr << "step " << step_size << endl; + + lambdas += (cur_pair[1]->features) * step_size; + lambdas -= (cur_pair[0]->features) * step_size; + cerr << " Lambdas " << lambdas << endl; + //reload weights based on update + + dense_weights.clear(); + lambdas.init_vector(&dense_weights); + dense_weights_g = dense_weights; + iter++; + + if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; + if(no_select) //don't use selection heuristic to determine when to stop SMO, rather just when delta =0 + if (delta == 0) iter = MAX_SMO; + + //only perform one dual coordinate ascent step + if(optimizer == 2) + { + optimize_again = false; + iter = MAX_SMO; + } + } + if(optimizer == 3) + { + if(!no_reweight) //reweight the forest and select a new k-best + { + if(DEBUG_SMO) cerr<< "Decoding with new weights -- now orac are " << oracles[cur_sent].good.size() << endl; + Hypergraph hg = observer.GetCurrentForest(); + hg.Reweight(dense_weights); + if(unique_kbest) + observer.UpdateOracles(cur_sent, hg); + else + observer.UpdateOracles > >(cur_sent, hg); + } + } + } + + } + + //print objective after this sentence + double lambda_change = (lambdas - old_lambdas).l2norm_sq(); + double max_fear = cur_constraint[cur_constraint.size()-1]->fear; + double temp_objective = 0.5 * lambda_change;// + max_step_size * max_fear; + + for(int u=0;u!=cur_constraint.size();u++) + { + cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << " " << cur_constraint[u]->fear << endl; + temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear; + } + objective += temp_objective; + + cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; + } + + + if ((cur_sent * 40 / ds.size()) > dots) { ++dots; cerr << '.'; } + tot += lambdas; + ++lcount; + cur_sent++; + + cout << TD::GetString(cur_good_v[0]->hyp) << " ||| " << TD::GetString(cur_best_v[0]->hyp) << " ||| " << TD::GetString(cur_bad_v[0]->hyp) << endl; + + } + + cerr << "FINAL OBJECTIVE: "<< objective << endl; + final_tot += tot; + cerr << "Translated " << lcount << " sentences " << endl; + cerr << " [AVG METRIC LAST PASS=" << (tot_loss / lcount) << "]\n"; + tot_loss = 0; + + int node_id = rng->next() * 100000; + cerr << " Writing weights to " << node_id << endl; + Weights::ShowLargestFeatures(dense_weights); + dots = 0; + ostringstream os; + os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz"; + string msg = "# MIRA tuned weights ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); + //Weights.InitFromVector(lambdas); + lambdas.init_vector(&dense_weights); + Weights::WriteToFile(os.str(), dense_weights, true, &msg); + + SparseVector x = tot; + x /= lcount; + ostringstream sa; + string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); + sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz"; + x.init_vector(&dense_weights); + Weights::WriteToFile(sa.str(), dense_weights, true, &msga); + + + cerr << "Optimization complete.\n"; + return 0; +} + diff --git a/training/mira/kbest_mirav5.cc b/training/mira/kbest_mirav5.cc deleted file mode 100644 index cea5cf67..00000000 --- a/training/mira/kbest_mirav5.cc +++ /dev/null @@ -1,1148 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "config.h" - - -#include -#include -#include - -#include "sentence_metadata.h" -#include "scorer.h" -#include "verbose.h" -#include "viterbi.h" -#include "hg.h" -#include "prob.h" -#include "kbest.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "fdict.h" -#include "time.h" -#include "sampler.h" - -#include "weights.h" -#include "sparse_vector.h" - -using namespace std; -using boost::shared_ptr; -namespace po = boost::program_options; - -bool invert_score; -boost::shared_ptr rng; -bool approx_score; -bool no_reweight; -bool no_select; -bool unique_kbest; -int update_list_size; -vector dense_weights_g; -double mt_metric_scale; -int optimizer; -int fear_select; -int hope_select; - -bool pseudo_doc; - -void SanityCheck(const vector& w) { - for (int i = 0; i < w.size(); ++i) { - assert(!isnan(w[i])); - assert(!isinf(w[i])); - } -} - -struct FComp { - const vector& w_; - FComp(const vector& w) : w_(w) {} - bool operator()(int a, int b) const { - return fabs(w_[a]) > fabs(w_[b]); - } -}; - -void ShowLargestFeatures(const vector& w) { - vector fnums(w.size()); - for (int i = 0; i < w.size(); ++i) - fnums[i] = i; - vector::iterator mid = fnums.begin(); - mid += (w.size() > 10 ? 10 : w.size()); - partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); - cerr << "TOP FEATURES:"; - for (vector::iterator i = fnums.begin(); i != mid; ++i) { - cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; - } - cerr << endl; -} - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,w",po::value(),"Input feature weights file") - ("source,i",po::value(),"Source file for development set") - ("passes,p", po::value()->default_value(15), "Number of passes through the training data") - ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") - ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") - ("optimizer,o",po::value()->default_value(1), "Optimizer (sgd=1, mira 1-fear=2, full mira w/ cutting plane=3, full mira w/ nbest list=5, local update=4)") - ("fear,f",po::value()->default_value(1), "Fear selection (model-cost=1, max-cost=2, pred-base=3)") - ("hope,h",po::value()->default_value(1), "Hope selection (model+cost=1, max-cost=2, local-cost=3)") - ("max_step_size,C", po::value()->default_value(0.01), "regularization strength (C)") - ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") - ("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") - ("approx_score,a", "Use smoothed sentence-level BLEU score for approximate scoring") - ("no_reweight,d","Do not reweight forest for cutting plane") - ("no_select,n", "Do not use selection heuristic") - ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") - ("update_k_best,b", po::value()->default_value(1), "Size of good, bad lists to perform update with") - ("unique_k_best,u", "Unique k-best translation list") - ("weights_output,O",po::value(),"Directory to write weights to") - ("output_dir,D",po::value(),"Directory to place output in") - ("decoder_config,c",po::value(),"Decoder configuration file"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,H", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("input_weights") || !conf->count("decoder_config") || !conf->count("reference")) { - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -//load previous translation, store array of each sentences score, subtract it from current sentence and replace with new translation score - - -static const double kMINUS_EPSILON = -1e-6; -static const double EPSILON = 0.000001; -static const double SMO_EPSILON = 0.0001; -static const double PSEUDO_SCALE = 0.95; -static const int MAX_SMO = 10; -int cur_pass; - -struct HypothesisInfo { - SparseVector features; - vector hyp; - double mt_metric; - double hope; - double fear; - double alpha; - double oracle_loss; - SparseVector oracle_feat_diff; - shared_ptr oracleN; -}; - -bool ApproxEqual(double a, double b) { - if (a == b) return true; - return (fabs(a-b)/fabs(b)) < EPSILON; -} - -typedef shared_ptr HI; -bool HypothesisCompareB(const HI& h1, const HI& h2 ) -{ - return h1->mt_metric > h2->mt_metric; -}; - - -bool HopeCompareB(const HI& h1, const HI& h2 ) -{ - return h1->hope > h2->hope; -}; - -bool FearCompareB(const HI& h1, const HI& h2 ) -{ - return h1->fear > h2->fear; -}; - -bool FearComparePred(const HI& h1, const HI& h2 ) -{ - return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g); -}; - -bool HypothesisCompareG(const HI& h1, const HI& h2 ) -{ - return h1->mt_metric < h2->mt_metric; -}; - - -void CuttingPlane(vector >* cur_c, bool* again, vector >& all_hyp, vector dense_weights) -{ - bool DEBUG_CUT = false; - shared_ptr max_fear, max_fear_in_set; - vector >& cur_constraint = *cur_c; - - if(no_reweight) - { - //find new hope hypothesis - for(int u=0;u!=all_hyp.size();u++) - { - double t_score = all_hyp[u]->features.dot(dense_weights); - all_hyp[u]->hope = 1 * all_hyp[u]->mt_metric + t_score; - //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; - - } - - //sort hyps by hope score - sort(all_hyp.begin(),all_hyp.end(),HopeCompareB); - - double hope_score = all_hyp[0]->features.dot(dense_weights); - if(DEBUG_CUT) cerr << "New hope derivation score " << hope_score << endl; - - for(int u=0;u!=all_hyp.size();u++) - { - double t_score = all_hyp[u]->features.dot(dense_weights); - //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; - - all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss - // all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric; - //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features; - // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; - //if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; - - } - - sort(all_hyp.begin(),all_hyp.end(),FearCompareB); - - } - //assign maximum fear derivation from all derivations - max_fear = all_hyp[0]; - - if(DEBUG_CUT) cerr <<"Cutting Plane Max Fear "<fear ; - for(int i=0; i < cur_constraint.size();i++) //select maximal violator already in constraint set - { - if (!max_fear_in_set || cur_constraint[i]->fear > max_fear_in_set->fear) - max_fear_in_set = cur_constraint[i]; - } - if(DEBUG_CUT) cerr << "Max Fear in constraint set " << max_fear_in_set->fear << endl; - - if(max_fear->fear > max_fear_in_set->fear + SMO_EPSILON) - { - cur_constraint.push_back(max_fear); - *again = true; - if(DEBUG_CUT) cerr << "Optimize Again " << *again << endl; - } -} - - -double ComputeDelta(vector >* cur_p, double max_step_size,vector dense_weights ) -{ - vector >& cur_pair = *cur_p; - double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss; - //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff? - //double num = loss - margin; - - - double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights)); - const double num = margin + loss; - cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <features.dot(dense_weights) - cur_pair[0]->features.dot(dense_weights); - // double loss = cur_pair[1]->oracle_loss; //good.mt_metric - cur_bad.mt_metric); - //const double num = margin + loss; - - //cerr << "Compute Delta " << loss << " " << margin << " "; - - // double margin = cur_pair[0]->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff? -/* double num = - (cur_pair[0]->oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights)) - - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights)); - */ - - SparseVector diff = cur_pair[0]->features; - diff -= cur_pair[1]->features; - /* SparseVector diff = cur_pair[0]->oracle_feat_diff; - diff -= cur_pair[1]->oracle_feat_diff;*/ - double diffsqnorm = diff.l2norm_sq(); - double delta; - if (diffsqnorm > 0) - delta = num / (diffsqnorm * max_step_size); - else - delta = 0; - cerr << " D1:" << delta; - //clip delta (enforce margin constraints) - - delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha)); - cerr << " D2:" << delta; - return delta; -} - - -vector > SelectPair(vector >* cur_c) -{ - bool DEBUG_SELECT= false; - vector >& cur_constraint = *cur_c; - - vector > pair; - - if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira - // if(optimizer == 2) { - pair.push_back(cur_constraint[0]); - pair.push_back(cur_constraint[1]); - return pair; - // } - } - - for(int u=0;u != cur_constraint.size();u++) - { - shared_ptr max_fear; - - if(DEBUG_SELECT) cerr<< "cur alpha " << u << " " << cur_constraint[u]->alpha; - for(int i=0; i < cur_constraint.size();i++) //select maximal violator - { - if(i != u) - if (!max_fear || cur_constraint[i]->fear > max_fear->fear) - max_fear = cur_constraint[i]; - } - if(!max_fear) return pair; // - - if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl; - - - if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON)) - { - for(int i=0; i < cur_constraint.size();i++) //select maximal violator - { - if(i != u) - if (cur_constraint[i]->alpha > 0) - { - pair.push_back(cur_constraint[u]); - pair.push_back(cur_constraint[i]); - cerr << "RETJURN from 1" << endl; - return pair; - } - } - } - if ((cur_constraint[u]->alpha > 0) && (cur_constraint[u]->fear < max_fear->fear - SMO_EPSILON)) - { - for(int i=0; i < cur_constraint.size();i++) //select maximal violator - { - if(i != u) - if (cur_constraint[i]->fear > cur_constraint[u]->fear) - { - pair.push_back(cur_constraint[u]); - pair.push_back(cur_constraint[i]); - return pair; - } - } - } - - } - return pair; //no more constraints to optimize, we're done here - -} - -struct GoodBadOracle { - vector > good; - vector > bad; -}; - -struct TrainingObserver : public DecoderObserver { - TrainingObserver(const int k, const DocScorer& d, vector* o, vector* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { - // TrainingObserver(const int k, const DocScorer& d, vector* o) : ds(d), oracles(*o), kbest_size(k) { - - //calculate corpus bleu score from previous iterations 1-best for BLEU gain - if(!pseudo_doc) - if(cur_pass > 0) - { - ScoreP acc; - for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { - if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } - acc->PlusEquals(*corpus_bleu_sent_stats[ii]); - - } - corpus_bleu_stats = acc; - corpus_bleu_score = acc->ComputeScore(); - } - //corpus_src_length = 0; -} - const DocScorer& ds; - vector& corpus_bleu_sent_stats; - vector& oracles; - vector > cur_best; - shared_ptr cur_oracle; - const int kbest_size; - Hypergraph forest; - int cur_sent; - ScoreP corpus_bleu_stats; - float corpus_bleu_score; - - float corpus_src_length; - float curr_src_length; - - const int GetCurrentSent() const { - return cur_sent; - } - - const HypothesisInfo& GetCurrentBestHypothesis() const { - return *cur_best[0]; - } - - const vector > GetCurrentBest() const { - return cur_best; - } - - const HypothesisInfo& GetCurrentOracle() const { - return *cur_oracle; - } - - const Hypergraph& GetCurrentForest() const { - return forest; - } - - - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - cur_sent = smeta.GetSentenceID(); - //cerr << "SOURCE " << smeta.GetSourceLength() << endl; - curr_src_length = (float) smeta.GetSourceLength(); - //UpdateOracles(smeta.GetSentenceID(), *hg); - if(unique_kbest) - UpdateOracles(smeta.GetSentenceID(), *hg); - else - UpdateOracles > >(smeta.GetSentenceID(), *hg); - forest = *hg; - - } - - shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double score, const vector& hyp) { - shared_ptr h(new HypothesisInfo); - h->features = feats; - h->mt_metric = score; - h->hyp = hyp; - return h; - } - - template - void UpdateOracles(int sent_id, const Hypergraph& forest) { - - bool PRINT_LIST= false; - vector >& cur_good = oracles[sent_id].good; - vector >& cur_bad = oracles[sent_id].bad; - //TODO: look at keeping previous iterations hypothesis lists around - cur_best.clear(); - cur_good.clear(); - cur_bad.clear(); - - vector > all_hyp; - - typedef KBest::KBestDerivations, ESentenceTraversal,Filter> K; - K kbest(forest,kbest_size); - - //KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); - for (int i = 0; i < kbest_size; ++i) { - //const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - typename K::Derivation *d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - - float sentscore; - if(approx_score) - { - - if(cur_pass > 0 && !pseudo_doc) - { - ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield); - ScoreP corpus_no_best = corpus_bleu_stats->GetZero(); - - corpus_bleu_stats->Subtract(*corpus_bleu_sent_stats[sent_id], &*corpus_no_best); - sent_stats->PlusEquals(*corpus_no_best, 0.5); - - //compute gain from new sentence in 1-best corpus - sentscore = mt_metric_scale * (sent_stats->ComputeScore() - corpus_no_best->ComputeScore());// - corpus_bleu_score); - } - else if(pseudo_doc) - { - //cerr << "CORP:" << corpus_bleu_score << " NEW:" << sent_stats->ComputeScore() << " sentscore:" << sentscore << endl; - - //-----pseudo-corpus approach - float src_scale = corpus_src_length + curr_src_length; - ScoreP sent_stats = ds[sent_id]->ScoreCandidate(d->yield); - if(!corpus_bleu_stats){ corpus_bleu_stats = sent_stats->GetZero();} - - sent_stats->PlusEquals(*corpus_bleu_stats); - sentscore = mt_metric_scale * src_scale * sent_stats->ComputeScore(); - - } - else - { - //cerr << "Using sentence-level approximation - PASS - " << boost::lexical_cast(cur_pass) << endl; - //approx style of computation, used for 0th iteration - sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeSentScore()); - - //use pseudo-doc - } - - - } - else - { - sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore()); - } - - if (invert_score) sentscore *= -1.0; - //cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << " " << approx_sentscore << endl; - - if (i < update_list_size){ - if (i == 0) //take cur best and add its bleu statistics counts to the pseudo-doc - { } - if(PRINT_LIST)cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; - cur_best.push_back( MakeHypothesisInfo(d->feature_values, sentscore, d->yield)); - } - - all_hyp.push_back(MakeHypothesisInfo(d->feature_values, sentscore,d->yield)); //store all hyp to extract oracle best and worst - - } - - if(pseudo_doc){ - //update psuedo-doc stats - string details, details2; - corpus_bleu_stats->ScoreDetails(&details2); - ScoreP sent_stats = ds[sent_id]->ScoreCandidate(cur_best[0]->hyp); - corpus_bleu_stats->PlusEquals(*sent_stats); - - - sent_stats->ScoreDetails(&details); - - - sent_stats = corpus_bleu_stats; - corpus_bleu_stats = sent_stats->GetZero(); - corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE); - - - corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length); - cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n " << details2 << endl; - - - } - - - //figure out how many hyps we can keep maximum - int temp_update_size = update_list_size; - if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();} - - //sort all hyps by sentscore (bleu) - sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB); - - if(PRINT_LIST){ cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++) cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; } - - //if(optimizer != 4 ) - if(hope_select == 1) - { - //find hope hypothesis using model + bleu - if (PRINT_LIST) cerr << "HOPE " << endl; - for(int u=0;u!=all_hyp.size();u++) - { - double t_score = all_hyp[u]->features.dot(dense_weights_g); - all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score; - if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; - - } - - //sort hyps by hope score - sort(all_hyp.begin(),all_hyp.end(),HopeCompareB); - } - - - //assign cur_good the sorted list - cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); - if(PRINT_LIST) { cerr << "GOOD" << endl; for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;} - /* if (!cur_oracle) { cur_oracle = cur_good[0]; - cerr << "Set oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl; } - else { - cerr << "Stay oracle " << cur_oracle->hope << " " << cur_oracle->fear << " " << cur_oracle->mt_metric << endl; } */ - - shared_ptr& oracleN = cur_good[0]; - //if(optimizer != 4){ - if(fear_select == 1){ - //compute fear hyps - if (PRINT_LIST) cerr << "FEAR " << endl; - double hope_score = oracleN->features.dot(dense_weights_g); - //double hope_score = cur_oracle->features.dot(dense_weights); - if (PRINT_LIST) cerr << "hope score " << hope_score << endl; - for(int u=0;u!=all_hyp.size();u++) - { - double t_score = all_hyp[u]->features.dot(dense_weights_g); - //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; - - /* all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss - all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric; - all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/ - - all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss - all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric; - all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features; - all_hyp[u]->oracleN=oracleN; - // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; - if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; - - } - - sort(all_hyp.begin(),all_hyp.end(),FearCompareB); - - cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); - } - else if(fear_select == 2) //select fear based on cost - { - cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); - reverse(cur_bad.begin(),cur_bad.end()); - } - else //pred-based, fear_select = 3 - { - sort(all_hyp.begin(),all_hyp.end(),FearComparePred); - cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); - } - - - if(PRINT_LIST){ cerr<< "BAD"<mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;} - - cerr << "GOOD (BEST): " << cur_good[0]->mt_metric << endl; - cerr << " CUR: " << cur_best[0]->mt_metric << endl; - cerr << " BAD (WORST): " << cur_bad[0]->mt_metric << endl; - } -}; - -void ReadTrainingCorpus(const string& fname, vector* c) { - - - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - while(in) { - getline(in, line); - if (!in) break; - c->push_back(line); - } -} - -void ReadPastTranslationForScore(const int cur_pass, vector* c, DocScorer& ds, const string& od) -{ - cerr << "Reading BLEU gain file "; - string fname; - if(cur_pass == 0) - { - fname = od + "/run.raw.init"; - } - else - { - int last_pass = cur_pass - 1; - fname = od + "/run.raw." + boost::lexical_cast(last_pass) + ".B"; - } - cerr << fname << "\n"; - ReadFile rf(fname); - istream& in = *rf.stream(); - ScoreP acc; - string line; - int lc = 0; - while(in) { - getline(in, line); - if (line.empty() && !in) break; - vector sent; - TD::ConvertSentence(line, &sent); - ScoreP sentscore = ds[lc]->ScoreCandidate(sent); - c->push_back(sentscore); - if (!acc) { acc = sentscore->GetZero(); } - acc->PlusEquals(*sentscore); - ++lc; - - } - - - assert(lc > 0); - float score = acc->ComputeScore(); - string details; - acc->ScoreDetails(&details); - cerr << "INIT RUN " << details << score << endl; - -} - - -int main(int argc, char** argv) { - register_feature_functions(); - SetSilent(true); // turn off verbose decoder output - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) return 1; - - if (conf.count("random_seed")) - rng.reset(new MT19937(conf["random_seed"].as())); - else - rng.reset(new MT19937); - - vector corpus; - //ReadTrainingCorpus(conf["source"].as(), &corpus); - - const string metric_name = conf["mt_metric"].as(); - optimizer = conf["optimizer"].as(); - fear_select = conf["fear"].as(); - hope_select = conf["hope"].as(); - mt_metric_scale = conf["mt_metric_scale"].as(); - approx_score = conf.count("approx_score"); - no_reweight = conf.count("no_reweight"); - no_select = conf.count("no_select"); - update_list_size = conf["update_k_best"].as(); - unique_kbest = conf.count("unique_k_best"); - pseudo_doc = true; - - const string weights_dir = conf["weights_output"].as(); - const string output_dir = conf["output_dir"].as(); - ScoreType type = ScoreTypeFromString(metric_name); - - //establish metric used for tuning - if (type == TER) { - invert_score = true; - // approx_score = false; - } else { - invert_score = false; - } - - //load references - DocScorer ds(type, conf["reference"].as >(), ""); - cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; - vector corpus_bleu_sent_stats; - - //check training pass,if >0, then use previous iterations corpus bleu stats - cur_pass = conf["passes"].as(); - if(cur_pass > 0) - { - ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir); - } - /* if (ds.size() != corpus.size()) { - cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; - return 1; - }*/ - cerr << "Optimizing with " << optimizer << endl; - // load initial weights - /*Weights weights; - weights.InitFromFile(conf["input_weights"].as()); - SparseVector lambdas; - weights.InitSparseVector(&lambdas); - */ - - - - ReadFile ini_rf(conf["decoder_config"].as()); - Decoder decoder(ini_rf.stream()); - - vector& dense_weights = decoder.CurrentWeightVector(); - - SparseVector lambdas; - Weights::InitFromFile(conf["input_weights"].as(), &dense_weights); - Weights::InitSparseVector(dense_weights, &lambdas); - - const string input = decoder.GetConf()["input"].as(); - //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary"); - if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; - ReadFile in_read(input); - istream *in = in_read.stream(); - assert(*in); - string buf; - - const double max_step_size = conf["max_step_size"].as(); - - - // assert(corpus.size() > 0); - vector oracles(ds.size()); - - TrainingObserver observer(conf["k_best_size"].as(), ds, &oracles, &corpus_bleu_sent_stats); - - int cur_sent = 0; - int lcount = 0; - double objective=0; - double tot_loss = 0; - int dots = 0; - // int cur_pass = 1; - // vector dense_weights; - SparseVector tot; - SparseVector final_tot; - // tot += lambdas; // initial weights - // lcount++; // count for initial weights - - //string msg = "# MIRA tuned weights"; - // while (cur_pass <= max_iteration) { - SparseVector old_lambdas = lambdas; - tot.clear(); - tot += lambdas; - cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; - ScoreP acc, acc_h, acc_f; - - while(*in) { - getline(*in, buf); - if (buf.empty()) continue; - //for (cur_sent = 0; cur_sent < corpus.size(); cur_sent++) { - - cerr << "SENT: " << cur_sent << endl; - //TODO: allow batch updating - //dense_weights.clear(); - //weights.InitFromVector(lambdas); - //weights.InitVector(&dense_weights); - //decoder.SetWeights(dense_weights); - lambdas.init_vector(&dense_weights); - dense_weights_g = dense_weights; - decoder.SetId(cur_sent); - decoder.Decode(buf, &observer); // decode the sentence, calling Notify to get the hope,fear, and model best hyps. - - cur_sent = observer.GetCurrentSent(); - const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis(); - const HypothesisInfo& cur_good = *oracles[cur_sent].good[0]; - const HypothesisInfo& cur_bad = *oracles[cur_sent].bad[0]; - - vector >& cur_good_v = oracles[cur_sent].good; - vector >& cur_bad_v = oracles[cur_sent].bad; - vector > cur_best_v = observer.GetCurrentBest(); - - tot_loss += cur_hyp.mt_metric; - - //score hyps to be able to compute corpus level bleu after we finish this iteration through the corpus - ScoreP sentscore = ds[cur_sent]->ScoreCandidate(cur_hyp.hyp); - if (!acc) { acc = sentscore->GetZero(); } - acc->PlusEquals(*sentscore); - - ScoreP hope_sentscore = ds[cur_sent]->ScoreCandidate(cur_good.hyp); - if (!acc_h) { acc_h = hope_sentscore->GetZero(); } - acc_h->PlusEquals(*hope_sentscore); - - ScoreP fear_sentscore = ds[cur_sent]->ScoreCandidate(cur_bad.hyp); - if (!acc_f) { acc_f = fear_sentscore->GetZero(); } - acc_f->PlusEquals(*fear_sentscore); - - if(optimizer == 4) { //single dual coordinate update, cur_good selected on BLEU score only (not model+BLEU) - // if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) { - - double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights); - double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric); - const double loss = margin + mt_loss; - cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) < 0.0) { - SparseVector diff = cur_good.features; - diff -= cur_bad.features; - - double diffsqnorm = diff.l2norm_sq(); - double delta; - if (diffsqnorm > 0) - delta = loss / (diffsqnorm); - else - delta = 0; - - //double step_size = loss / diff.l2norm_sq(); - cerr << loss << " " << delta << " " << diff << endl; - if (delta > max_step_size) delta = max_step_size; - lambdas += (cur_good.features * delta); - lambdas -= (cur_bad.features * delta); - //cerr << "L: " << lambdas << endl; - // } - // } - } - else if(optimizer == 1) //sgd - nonadapted step size - { - - lambdas += (cur_good.features) * max_step_size; - lambdas -= (cur_bad.features) * max_step_size; - } - //cerr << "L: " << lambdas << endl; - else if(optimizer == 5) //full mira with n-best list of constraints from oracle, fear, best - { - vector > cur_constraint; - cur_constraint.insert(cur_constraint.begin(), cur_bad_v.begin(), cur_bad_v.end()); - cur_constraint.insert(cur_constraint.begin(), cur_best_v.begin(), cur_best_v.end()); - cur_constraint.insert(cur_constraint.begin(), cur_good_v.begin(), cur_good_v.end()); - - bool optimize_again; - vector > cur_pair; - //SMO - for(int u=0;u!=cur_constraint.size();u++) - cur_constraint[u]->alpha =0; - - cur_constraint[0]->alpha =1; //set oracle to alpha=1 - - cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl; - int smo_iter = 10, smo_iter2 = 10; - int iter, iter2 =0; - bool DEBUG_SMO = false; - while (iter2 < smo_iter2) - { - iter =0; - while (iter < smo_iter) - { - optimize_again = true; - for (int i = 0; i< cur_constraint.size(); i++) - for (int j = i+1; j< cur_constraint.size(); j++) - { - if(DEBUG_SMO) cerr << "start " << i << " " << j << endl; - cur_pair.clear(); - cur_pair.push_back(cur_constraint[j]); - cur_pair.push_back(cur_constraint[i]); - double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); - - if (delta == 0) optimize_again = false; - // cur_pair[0]->alpha += delta; - // cur_pair[1]->alpha -= delta; - cur_constraint[j]->alpha += delta; - cur_constraint[i]->alpha -= delta; - double step_size = delta * max_step_size; - /*lambdas += (cur_pair[1]->features) * step_size; - lambdas -= (cur_pair[0]->features) * step_size;*/ - lambdas += (cur_constraint[i]->features) * step_size; - lambdas -= (cur_constraint[j]->features) * step_size; - if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << i << " " << j << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; - - //reload weights based on update - /*dense_weights.clear(); - weights.InitFromVector(lambdas); - weights.InitVector(&dense_weights);*/ - } - iter++; - - if(!optimize_again) - { - iter = 100; - cerr << "Optimization stopped, delta =0" << endl; - } - - - } - iter2++; - } - - - } - else if(optimizer == 2 || optimizer == 3) //1-fear and cutting plane mira - { - bool DEBUG_SMO= true; - vector > cur_constraint; - cur_constraint.push_back(cur_good_v[0]); //add oracle to constraint set - bool optimize_again = true; - int cut_plane_calls = 0; - while (optimize_again) - { - if(DEBUG_SMO) cerr<< "optimize again: " << optimize_again << endl; - if(optimizer == 2){ //1-fear - cur_constraint.push_back(cur_bad_v[0]); - - //check if we have a violation - if(!(cur_constraint[1]->fear > cur_constraint[0]->fear + SMO_EPSILON)) - { - optimize_again = false; - cerr << "Constraint not violated" << endl; - } - } - else - { //cutting plane to add constraints - if(DEBUG_SMO) cerr<< "Cutting Plane " << cut_plane_calls << " with " << lambdas << endl; - optimize_again = false; - cut_plane_calls++; - CuttingPlane(&cur_constraint, &optimize_again, oracles[cur_sent].bad, dense_weights); - if (cut_plane_calls >= MAX_SMO) optimize_again = false; - } - - if(optimize_again) - { - //SMO - for(int u=0;u!=cur_constraint.size();u++) - { - cur_constraint[u]->alpha =0; - //cur_good_v[0]->alpha = 1; cur_bad_v[0]->alpha = 0; - } - cur_constraint[0]->alpha = 1; - cerr <<"Optimizing with " << cur_constraint.size() << " constraints" << endl; - int smo_iter = MAX_SMO; - int iter =0; - while (iter < smo_iter) - { - //select pair to optimize from constraint set - vector > cur_pair = SelectPair(&cur_constraint); - - if(cur_pair.empty()){iter=MAX_SMO; cerr << "Undefined pair " << endl; continue;} //pair is undefined so we are done with this smo - - //double num = cur_good_v[0]->fear - cur_bad_v[0]->fear; - /*double loss = cur_good_v[0]->oracle_loss - cur_bad_v[0]->oracle_loss; - double margin = cur_good_v[0]->oracle_feat_diff.dot(dense_weights) - cur_bad_v[0]->oracle_feat_diff.dot(dense_weights); - double num = loss - margin; - SparseVector diff = cur_good_v[0]->features; - diff -= cur_bad_v[0]->features; - double delta = num / (diff.l2norm_sq() * max_step_size); - delta = max(-cur_good_v[0]->alpha, min(delta, cur_bad_v[0]->alpha)); - cur_good_v[0]->alpha += delta; - cur_bad_v[0]->alpha -= delta; - double step_size = delta * max_step_size; - lambdas += (cur_bad_v[0]->features) * step_size; - lambdas -= (cur_good_v[0]->features) * step_size; - */ - - double delta = ComputeDelta(&cur_pair,max_step_size, dense_weights); - - cur_pair[0]->alpha += delta; - cur_pair[1]->alpha -= delta; - double step_size = delta * max_step_size; - /* lambdas += (cur_pair[1]->oracle_feat_diff) * step_size; - lambdas -= (cur_pair[0]->oracle_feat_diff) * step_size;*/ - - cerr << "step " << step_size << endl; - double alpha_sum=0; - SparseVector temp_lambdas = lambdas; - - for(int u=0;u!=cur_constraint.size();u++) - { - cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << endl; - temp_lambdas += (cur_constraint[u]->oracleN->features-cur_constraint[u]->features) * cur_constraint[u]->alpha * step_size; - alpha_sum += cur_constraint[u]->alpha; - } - cerr << "Alpha sum " << alpha_sum << " " << temp_lambdas << endl; - - lambdas += (cur_pair[1]->features) * step_size; - lambdas -= (cur_pair[0]->features) * step_size; - cerr << " Lambdas " << lambdas << endl; - //reload weights based on update - dense_weights.clear(); - //weights.InitFromVector(lambdas); - //weights.InitVector(&dense_weights); - lambdas.init_vector(&dense_weights); - dense_weights_g = dense_weights; - iter++; - - if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; - // cerr << "SMO opt " << iter << " " << delta << " " << cur_good_v[0]->alpha << " " << cur_bad_v[0]->alpha << endl; - if(no_select) //don't use selection heuristic to determine when to stop SMO, rather just when delta =0 - if (delta == 0) iter = MAX_SMO; - - //only perform one dual coordinate ascent step - if(optimizer == 2) - { - optimize_again = false; - iter = MAX_SMO; - } - - } - if(optimizer == 3) - { - if(!no_reweight) - { - if(DEBUG_SMO) cerr<< "Decoding with new weights -- now orac are " << oracles[cur_sent].good.size() << endl; - Hypergraph hg = observer.GetCurrentForest(); - hg.Reweight(dense_weights); - //observer.UpdateOracles(cur_sent, hg); - if(unique_kbest) - observer.UpdateOracles(cur_sent, hg); - else - observer.UpdateOracles > >(cur_sent, hg); - - - } - } - } - - - } - - //print objective after this sentence - double lambda_change = (lambdas - old_lambdas).l2norm_sq(); - double max_fear = cur_constraint[cur_constraint.size()-1]->fear; - double temp_objective = 0.5 * lambda_change;// + max_step_size * max_fear; - - for(int u=0;u!=cur_constraint.size();u++) - { - cerr << cur_constraint[u]->alpha << " " << cur_constraint[u]->hope << " " << cur_constraint[u]->fear << endl; - temp_objective += cur_constraint[u]->alpha * cur_constraint[u]->fear; - } - objective += temp_objective; - - cerr << "SENT OBJ: " << temp_objective << " NEW OBJ: " << objective << endl; - } - - - if ((cur_sent * 40 / ds.size()) > dots) { ++dots; cerr << '.'; } - tot += lambdas; - ++lcount; - cur_sent++; - - cout << TD::GetString(cur_good_v[0]->hyp) << " ||| " << TD::GetString(cur_best_v[0]->hyp) << " ||| " << TD::GetString(cur_bad_v[0]->hyp) << endl; - - //clear good/bad lists from oracles for this sentences - you want to keep them around for things - - // oracles[cur_sent].good.clear(); - //oracles[cur_sent].bad.clear(); - } - - cerr << "FINAL OBJECTIVE: "<< objective << endl; - final_tot += tot; - cerr << "Translated " << lcount << " sentences " << endl; - cerr << " [AVG METRIC LAST PASS=" << (tot_loss / lcount) << "]\n"; - tot_loss = 0; - /* - float corpus_score = acc->ComputeScore(); - string corpus_details; - acc->ScoreDetails(&corpus_details); - cerr << "MODEL " << corpus_details << endl; - cout << corpus_score << endl; - - corpus_score = acc_h->ComputeScore(); - acc_h->ScoreDetails(&corpus_details); - cerr << "HOPE " << corpus_details << endl; - cout << corpus_score << endl; - - corpus_score = acc_f->ComputeScore(); - acc_f->ScoreDetails(&corpus_details); - cerr << "FEAR " << corpus_details << endl; - cout << corpus_score << endl; - */ - int node_id = rng->next() * 100000; - cerr << " Writing weights to " << node_id << endl; - Weights::ShowLargestFeatures(dense_weights); - dots = 0; - ostringstream os; - os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz"; - string msg = "# MIRA tuned weights ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); - //Weights.InitFromVector(lambdas); - lambdas.init_vector(&dense_weights); - Weights::WriteToFile(os.str(), dense_weights, true, &msg); - - SparseVector x = tot; - x /= lcount; - ostringstream sa; - string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); - sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz"; - //Weights ww; - //ww.InitFromVector(x); - x.init_vector(&dense_weights); - Weights::WriteToFile(sa.str(), dense_weights, true, &msga); - - //assign averaged lambdas to initialize next iteration - //lambdas = x; - - /* double lambda_change = (old_lambdas - lambdas).l2norm_sq(); - cerr << "Change in lambda " << lambda_change << endl; - - if ( lambda_change < EPSILON) - { - cur_pass = max_iteration; - cerr << "Weights converged - breaking" << endl; - } - - ++cur_pass; - */ - - //} iteration while loop - - /* cerr << endl; - weights.WriteToFile("weights.mira-final.gz", true, &msg); - final_tot /= (lcount + 1);//max_iteration); - tot /= (corpus.size() + 1); - weights.InitFromVector(final_tot); - cerr << tot << "||||" << final_tot << endl; - msg = "# MIRA tuned weights (averaged vector)"; - weights.WriteToFile("weights.mira-final-avg.gz", true, &msg); - */ - cerr << "Optimization complete.\\AVERAGED WEIGHTS: weights.mira-final-avg.gz\n"; - return 0; -} - diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl index f4d61407..90a4da0e 100755 --- a/training/mira/run_mira.pl +++ b/training/mira/run_mira.pl @@ -3,7 +3,7 @@ use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); -push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } +push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; } # Skip local config (used for distributing jobs) if we're running in local-only mode use LocalConfig; @@ -11,51 +11,50 @@ use Getopt::Long; use IPC::Open2; use POSIX ":sys_wait_h"; my $QSUB_CMD = qsub_args(mert_memory()); - -require "libcall.pl"; - +my $default_jobs = env_default_jobs(); my $srcFile; my $refFiles; my $bin_dir = $SCRIPT_DIR; die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; +my $FAST_SCORE="$bin_dir/../../mteval/fast_score"; die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; my $iteration = 0.0; -my $max_iterations = 6; +my $max_iterations = 10; my $metric = "ibm_bleu"; my $iniFile; my $weights; my $initialWeights; -my $decode_nodes = 1; # number of decode nodes +my $jobs = $default_jobs; # number of decode nodes my $pmem = "1g"; my $dir; my $SCORER = $FAST_SCORE; -my $local_server = "$bin_dir/local_parallelize.pl"; -my $parallelize = "$bin_dir/../dpmert/parallelize.pl"; -my $libcall = "$bin_dir/../dpmert/libcall.pl"; -my $sentserver = "$bin_dir/../dpmert/sentserver"; -my $sentclient = "$bin_dir/../dpmert/sentclient"; -my $run_local_server = 0; + +my $UTILS_DIR="$SCRIPT_DIR/../utils"; +require "$UTILS_DIR/libcall.pl"; + +my $parallelize = "$UTILS_DIR/parallelize.pl"; +my $libcall = "$UTILS_DIR/libcall.pl"; +my $sentserver = "$UTILS_DIR/sentserver"; +my $sentclient = "$UTILS_DIR/sentclient"; + my $run_local = 0; -my $usefork; my $pass_suffix = ''; -my $cdec ="$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv"; +my $cdec ="$bin_dir/kbest_cut_mira"; -#my $cdec ="$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mirav5"; #"$bin_dir/kbest_mira_rmmv2"; #"$bin_dir/kbest_mira_lv"; die "Can't find decoder in $cdec" unless -x $cdec; my $decoder = $cdec; my $decoderOpt; -my $update_size=250; +my $update_size; my $approx_score; my $kbest_size=250; my $metric_scale=1; my $optimizer=2; my $disable_clean = 0; -my $use_make; # use make to parallelize line search +my $use_make=0; my $density_prune; my $cpbin=1; my $help = 0; @@ -64,10 +63,10 @@ my $step_size = 0.01; my $gpref; my $unique_kbest; my $freeze; -my $latent; -my $sample_max; my $hopes=1; my $fears=1; +my $sent_approx=0; +my $pseudo_doc=0; my $range = 35000; my $minimum = 15000; @@ -78,15 +77,13 @@ my $portn = int(rand($range)) + $minimum; Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( "decoder=s" => \$decoderOpt, - "decode-nodes=i" => \$decode_nodes, + "jobs=i" => \$jobs, "density-prune=f" => \$density_prune, "dont-clean" => \$disable_clean, "pass-suffix=s" => \$pass_suffix, - "use-fork" => \$usefork, "epsilon=s" => \$epsilon, "help" => \$help, "local" => \$run_local, - "local_server" => \$run_local_server, "use-make=i" => \$use_make, "max-iterations=i" => \$max_iterations, "pmem=s" => \$pmem, @@ -102,10 +99,9 @@ if (GetOptions( "step-size=f" => \$step_size, "hope-select=i" => \$hopes, "fear-select=i" => \$fears, - "approx-score" => \$approx_score, + "sent-approx" => \$sent_approx, + "pseudo-doc" => \$pseudo_doc, "unique-kbest" => \$unique_kbest, - "latent" => \$latent, - "sample-max=i" => \$sample_max, "grammar-prefix=s" => \$gpref, "freeze" => \$freeze, "workdir=s" => \$dir, @@ -235,7 +231,9 @@ close F; my $lastPScore = 0; my $lastWeightsFile; - +my $bestScoreIter=-1; +my $bestScore=-1; +unless ($update_size){$update_size = $kbest_size;} # main optimization loop #while (1){ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { @@ -260,16 +258,16 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { my $weightsFile="$dir/weights.$opt_iter"; print "ITER $iteration " ; my $cur_pass = "-p 0$opt_iter"; - my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile $refs_comma_sep -m $metric -s $metric_scale -a -b $update_size -k $kbest_size -o $optimizer $cur_pass -O $weightdir -D $dir -h $hopes -f $fears -C $step_size"; + my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile $refs_comma_sep -m $metric -s $metric_scale -b $update_size -k $kbest_size -o $optimizer $cur_pass -O $weightdir -D $dir -h $hopes -f $fears -C $step_size"; if($unique_kbest){ $decoder_cmd .= " -u"; } - if($latent){ - $decoder_cmd .= " -l"; - } - if($sample_max){ - $decoder_cmd .= " -t $sample_max"; + if($sent_approx){ + $decoder_cmd .= " -a"; } + if($pseudo_doc){ + $decoder_cmd .= " -e"; + } if ($density_prune) { $decoder_cmd .= " --density_prune $density_prune"; } @@ -277,13 +275,11 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { if ($run_local) { $pcmd = "cat $srcFile |"; } elsif ($use_make) { - # TODO: Throw error when decode_nodes is specified along with use_make + # TODO: Throw error when jobs is speong with use_make $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --"; - } elsif ($run_local_server){ - $pcmd = "cat $srcFile | $local_server $usefork -p $pmem -e $logdir -n $decode_nodes --"; - } + } else { - $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --baseport $portn --"; + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --baseport $portn --"; } my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; print STDERR "COMMAND:\n$cmd\n"; @@ -291,14 +287,14 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { my $retries = 0; my $num_topbest; - while($retries < 5) { + while($retries < 6) { $num_topbest = check_output("wc -l < $runFile"); print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; if($devSize == $num_topbest) { last; } else { print STDERR "Incorrect number of topbest. Waiting for distributed filesystem and retrying...\n"; - sleep(3); + sleep(10); } $retries++; } @@ -320,12 +316,15 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { close RUN; close F; close B; close H; - my $dec_score = check_output("cat $runFile.B | $SCORER $refs_comma_sep -l $metric"); - my $dec_score_h = check_output("cat $runFile.H | $SCORER $refs_comma_sep -l $metric"); - my $dec_score_f = check_output("cat $runFile.F | $SCORER $refs_comma_sep -l $metric"); + my $dec_score = check_output("cat $runFile.B | $SCORER $refs_comma_sep -m $metric"); + my $dec_score_h = check_output("cat $runFile.H | $SCORER $refs_comma_sep -m $metric"); + my $dec_score_f = check_output("cat $runFile.F | $SCORER $refs_comma_sep -m $metric"); chomp $dec_score; chomp $dec_score_h; chomp $dec_score_f; print STDERR "DECODER SCORE: $dec_score HOPE: $dec_score_h FEAR: $dec_score_f\n"; - + if ($dec_score> $bestScore){ + $bestScoreIter=$opt_iter; + $bestScore=$dec_score; + } # save space check_call("gzip -f $runFile"); check_call("gzip -f $decoderLog"); @@ -338,21 +337,11 @@ for (my $opt_iter=0; $opt_iter<$max_iterations; $opt_iter++) { $lastWeightsFile = "$dir/weights.$opt_iter"; average_weights("$weightdir/weights.mira-pass*.*[0-9].gz", $newWeightsFile, $logdir); -# check_call("cp $lastW $newWeightsFile"); -# if ($icc < 2) { -# print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; -# last; -# } system("gzip -f $logdir/kbes*"); print STDERR "\n==========\n"; $iteration++; } -#find -#my $cmd = `grep SCORE /fs/clip-galep5/lexical_tm/log.runmira.nist.20 | cat -n | sort -k +2 | tail -1`; -#$cmd =~ m/([0-9]+)/; -#$lastWeightsFile = "$dir/weights.$1"; -#check_call("ln -s $lastWeightsFile $dir/weights.tuned"); -print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; +print STDERR "\nBEST ITER: $bestScoreIter :: $bestScore\n\n\n"; print STDOUT "$lastWeightsFile\n"; @@ -409,7 +398,7 @@ sub write_config { print $fh "EVAL METRIC: $metric\n"; print $fh "START ITERATION: $iteration\n"; print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "DECODE NODES: $decode_nodes\n"; + print $fh "DECODE NODES: $jobs\n"; print $fh "HEAD NODE: $host\n"; print $fh "PMEM (DECODING): $pmem\n"; print $fh "CLEANUP: $cleanup\n"; @@ -462,9 +451,87 @@ sub enseg { } sub print_help { - print "Something wrong\n"; + my $executable = check_output("basename $0"); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable [options] + Runs a complete MIRA optimization using the ini file specified. + +Required: + + --ref-files + Dev set ref files. This option takes only a single string argument. + To use multiple files (including file globbing), this argument should + be quoted. + --source-file + Dev set source file. + --weights + Initial weights file + +General options: + + --help + Print this message and exit. + + --max-iterations + Maximum number of iterations to run. If not specified, defaults + to $max_iterations. + + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + + --workdir + Directory for intermediate and output files. If not specified, the + name is derived from the ini filename. Assuming that the ini + filename begins with the decoder name and ends with ini, the default + name of the working directory is inferred from the middle part of + the filename. E.g. an ini file named decoder.foo.ini would have + a default working directory name foo. + --optimizer + Learning method to use for weight update. Choice are 1) SGD, 2) PA MIRA with Selection from Cutting Plane, 3) Cutting Plane MIRA, 4) PA MIRA,5) nbest MIRA with hope, fear, and model constraints + --metric-scale + Scale MT loss by this amount when computing hope/fear candidates + --kbest-size + Size of k-best list to extract from forest + --update-size + Size of k-best list to use for update (applies to optimizer 5) + --step-size + Controls aggresiveness of update (C) + --hope-select + How to select hope candidate. Choices are 1) model score - cost, 2) min cost + --fear-select + How to select fear candodate. Choices are 1) model score + cost, 2) max cost, 3) max score + --sent-approx + Use smoothed sentence-level MT metric + --pseudo-doc + Use pseudo document to approximate MT metric + --unique-kbest + Extract unique k-best from forest + --grammar-prefix + Path to sentence-specific grammar files + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + + --local + Run single learner + --use-make + Run parallel learners on a single machine through fork. + + +Help } + sub cmdline { return join ' ',($0,@ORIG_ARGV); } -- cgit v1.2.3 From 14a82a5c9116d5e30dbfa33561851fdee28a0925 Mon Sep 17 00:00:00 2001 From: Vladimir Eidelman Date: Sat, 13 Apr 2013 22:21:04 -0400 Subject: cleanup mira --- training/mira/kbest_cut_mira.cc | 128 +++++++++++----------------------------- 1 file changed, 36 insertions(+), 92 deletions(-) (limited to 'training') diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index 34eb00dc..7df9a18f 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -40,7 +40,7 @@ bool no_reweight; bool no_select; bool unique_kbest; int update_list_size; -vector dense_weights_g; +vector dense_w_local; double mt_metric_scale; int optimizer; int fear_select; @@ -170,7 +170,7 @@ bool FearCompareB(const HI& h1, const HI& h2 ) bool FearComparePred(const HI& h1, const HI& h2 ) { - return h1->features.dot(dense_weights_g) > h2->features.dot(dense_weights_g); + return h1->features.dot(dense_w_local) > h2->features.dot(dense_w_local); }; bool HypothesisCompareG(const HI& h1, const HI& h2 ) @@ -203,12 +203,7 @@ void CuttingPlane(vector >* cur_c, bool* again, vecto for(int u=0;u!=all_hyp.size();u++) { double t_score = all_hyp[u]->features.dot(dense_weights); - //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; - all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*all_hyp[0]->mt_metric - hope_score + t_score; //relative loss - // all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*all_hyp[0]->mt_metric; - //all_hyp[u]->oracle_feat_diff = all_hyp[0]->features - all_hyp[u]->features; - // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; } sort(all_hyp.begin(),all_hyp.end(),FearCompareB); @@ -238,24 +233,14 @@ double ComputeDelta(vector >* cur_p, double max_step_ { vector >& cur_pair = *cur_p; double loss = cur_pair[0]->oracle_loss - cur_pair[1]->oracle_loss; - //double margin = -cur_pair[0]->oracle_feat_diff.dot(dense_weights) + cur_pair[1]->oracle_feat_diff.dot(dense_weights); //TODO: is it a problem that new oracle is used in diff? - //double num = loss - margin; - double margin = -(cur_pair[0]->oracleN->features.dot(dense_weights)- cur_pair[0]->features.dot(dense_weights)) + (cur_pair[1]->oracleN->features.dot(dense_weights) - cur_pair[1]->features.dot(dense_weights)); const double num = margin + loss; cerr << "LOSS: " << num << " Margin:" << margin << " BLEUL:" << loss << " " << cur_pair[1]->features.dot(dense_weights) << " " << cur_pair[0]->features.dot(dense_weights) <oracle_loss - cur_pair[0]->oracle_feat_diff.dot(dense_weights)) - - (cur_pair[1]->oracle_loss - cur_pair[1]->oracle_feat_diff.dot(dense_weights)); - */ - SparseVector diff = cur_pair[0]->features; diff -= cur_pair[1]->features; - /* SparseVector diff = cur_pair[0]->oracle_feat_diff; - diff -= cur_pair[1]->oracle_feat_diff;*/ double diffsqnorm = diff.l2norm_sq(); double delta; if (diffsqnorm > 0) @@ -264,7 +249,6 @@ double ComputeDelta(vector >* cur_p, double max_step_ delta = 0; cerr << " D1:" << delta; //clip delta (enforce margin constraints) - delta = max(-cur_pair[0]->alpha, min(delta, cur_pair[1]->alpha)); cerr << " D2:" << delta; return delta; @@ -278,12 +262,12 @@ vector > SelectPair(vector vector > pair; - if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for 1-mira - // if(optimizer == 2) { + if (no_select || optimizer == 2){ //skip heuristic search and return oracle and fear for pa-mira + pair.push_back(cur_constraint[0]); pair.push_back(cur_constraint[1]); return pair; - // } + } for(int u=0;u != cur_constraint.size();u++) @@ -299,8 +283,6 @@ vector > SelectPair(vector } if(!max_fear) return pair; // - if(DEBUG_SELECT) cerr << " F" << max_fear->fear << endl; - if ((cur_constraint[u]->alpha == 0) && (cur_constraint[u]->fear > max_fear->fear + SMO_EPSILON)) { @@ -310,8 +292,7 @@ vector > SelectPair(vector if (cur_constraint[i]->alpha > 0) { pair.push_back(cur_constraint[u]); - pair.push_back(cur_constraint[i]); - cerr << "RETJURN from 1" << endl; + pair.push_back(cur_constraint[i]); return pair; } } @@ -342,11 +323,10 @@ struct GoodBadOracle { struct TrainingObserver : public DecoderObserver { TrainingObserver(const int k, const DocScorer& d, vector* o, vector* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { - // TrainingObserver(const int k, const DocScorer& d, vector* o) : ds(d), oracles(*o), kbest_size(k) { - //calculate corpus bleu score from previous iterations 1-best for BLEU gain + if(!pseudo_doc && !sent_approx) - if(cur_pass > 0) + if(cur_pass > 0) //calculate corpus bleu score from previous iterations 1-best for BLEU gain { ScoreP acc; for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { @@ -357,7 +337,7 @@ struct TrainingObserver : public DecoderObserver { corpus_bleu_stats = acc; corpus_bleu_score = acc->ComputeScore(); } - //corpus_src_length = 0; + } const DocScorer& ds; vector& corpus_bleu_sent_stats; @@ -396,9 +376,8 @@ struct TrainingObserver : public DecoderObserver { virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { cur_sent = smeta.GetSentenceID(); - //cerr << "SOURCE " << smeta.GetSourceLength() << endl; curr_src_length = (float) smeta.GetSourceLength(); - //UpdateOracles(smeta.GetSentenceID(), *hg); + if(unique_kbest) UpdateOracles(smeta.GetSentenceID(), *hg); else @@ -431,9 +410,8 @@ struct TrainingObserver : public DecoderObserver { typedef KBest::KBestDerivations, ESentenceTraversal,Filter> K; K kbest(forest,kbest_size); - //KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); for (int i = 0; i < kbest_size; ++i) { - //const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + typename K::Derivation *d = kbest.LazyKthBest(forest.nodes_.size() - 1, i); if (!d) break; @@ -489,10 +467,9 @@ struct TrainingObserver : public DecoderObserver { corpus_bleu_stats->PlusEquals(*sent_stats, PSEUDO_SCALE); corpus_src_length = PSEUDO_SCALE * (corpus_src_length + curr_src_length); - cerr << "CORP S " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n" << details2 << endl; + cerr << "ps corpus size: " << corpus_src_length << " " << curr_src_length << "\n" << details << "\n" << details2 << endl; } - //figure out how many hyps we can keep maximum int temp_update_size = update_list_size; if (all_hyp.size() < update_list_size){ temp_update_size = all_hyp.size();} @@ -500,7 +477,8 @@ struct TrainingObserver : public DecoderObserver { //sort all hyps by sentscore (eg. bleu) sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareB); - if(PRINT_LIST){ cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++) cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_weights_g) << endl; } + if(PRINT_LIST){ cerr << "Sorting " << endl; for(int u=0;u!=all_hyp.size();u++) + cerr << all_hyp[u]->mt_metric << " " << all_hyp[u]->features.dot(dense_w_local) << endl; } if(hope_select == 1) { @@ -508,7 +486,7 @@ struct TrainingObserver : public DecoderObserver { if (PRINT_LIST) cerr << "HOPE " << endl; for(int u=0;u!=all_hyp.size();u++) { - double t_score = all_hyp[u]->features.dot(dense_weights_g); + double t_score = all_hyp[u]->features.dot(dense_w_local); all_hyp[u]->hope = all_hyp[u]->mt_metric + t_score; if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " S:" << t_score << endl; @@ -522,47 +500,38 @@ struct TrainingObserver : public DecoderObserver { cur_good.insert(cur_good.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); if(PRINT_LIST) { cerr << "GOOD" << endl; for(int u=0;u!=cur_good.size();u++) cerr << cur_good[u]->mt_metric << " " << cur_good[u]->hope << endl;} + //use hope for fear selection shared_ptr& oracleN = cur_good[0]; - if(fear_select == 1){ //compute fear hyps with model - bleu if (PRINT_LIST) cerr << "FEAR " << endl; - double hope_score = oracleN->features.dot(dense_weights_g); + double hope_score = oracleN->features.dot(dense_w_local); if (PRINT_LIST) cerr << "hope score " << hope_score << endl; for(int u=0;u!=all_hyp.size();u++) { - double t_score = all_hyp[u]->features.dot(dense_weights_g); - //all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - hope_score + t_score; - - /* all_hyp[u]->fear = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric - hope_score + t_score; //relative loss - all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric - -1*cur_oracle->mt_metric; - all_hyp[u]->oracle_feat_diff = cur_oracle->features - all_hyp[u]->features;*/ + double t_score = all_hyp[u]->features.dot(dense_w_local); all_hyp[u]->fear = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric - hope_score + t_score; //relative loss all_hyp[u]->oracle_loss = -1*all_hyp[u]->mt_metric + 1*oracleN->mt_metric; all_hyp[u]->oracle_feat_diff = oracleN->features - all_hyp[u]->features; all_hyp[u]->oracleN=oracleN; - // all_hyp[u]->fear = -1 * all_hyp[u]->mt_metric + t_score; if (PRINT_LIST) cerr << all_hyp[u]->mt_metric << " H:" << all_hyp[u]->hope << " F:" << all_hyp[u]->fear << endl; } sort(all_hyp.begin(),all_hyp.end(),FearCompareB); - cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); } else if(fear_select == 2) //select fear based on cost { - cur_bad.insert(cur_bad.begin(), all_hyp.end()-temp_update_size, all_hyp.end()); - reverse(cur_bad.begin(),cur_bad.end()); + sort(all_hyp.begin(),all_hyp.end(),HypothesisCompareG); } - else //pred-based, fear_select = 3 + else //max model score, also known as prediction-based { sort(all_hyp.begin(),all_hyp.end(),FearComparePred); - cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); } - + cur_bad.insert(cur_bad.begin(), all_hyp.begin(), all_hyp.begin()+temp_update_size); if(PRINT_LIST){ cerr<< "BAD"<mt_metric << " H:" << cur_bad[u]->hope << " F:" << cur_bad[u]->fear << endl;} @@ -616,13 +585,12 @@ void ReadPastTranslationForScore(const int cur_pass, vector* c, DocScore ++lc; } - assert(lc > 0); float score = acc->ComputeScore(); string details; acc->ScoreDetails(&details); - cerr << "INIT RUN " << details << score << endl; + cerr << "Previous run: " << details << score << endl; } @@ -640,7 +608,6 @@ int main(int argc, char** argv) { rng.reset(new MT19937); vector corpus; - //ReadTrainingCorpus(conf["source"].as(), &corpus); const string metric_name = conf["mt_metric"].as(); optimizer = conf["optimizer"].as(); @@ -654,7 +621,7 @@ int main(int argc, char** argv) { unique_kbest = conf.count("unique_k_best"); pseudo_doc = conf.count("pseudo_doc"); sent_approx = conf.count("sent_approx"); - cerr << "PSEUDO " << pseudo_doc << " SENT " << sent_approx << endl; + cerr << "Using pseudo-doc:" << pseudo_doc << " Sent:" << sent_approx << endl; if(pseudo_doc) mt_metric_scale=1; @@ -665,7 +632,6 @@ int main(int argc, char** argv) { //establish metric used for tuning if (type == TER) { invert_score = true; - // approx_score = false; } else { invert_score = false; } @@ -681,20 +647,9 @@ int main(int argc, char** argv) { { ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, ds, output_dir); } - /* if (ds.size() != corpus.size()) { - cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; - return 1; - }*/ - cerr << "Optimizing with " << optimizer << endl; - // load initial weights - /*Weights weights; - weights.InitFromFile(conf["input_weights"].as()); - SparseVector lambdas; - weights.InitSparseVector(&lambdas); - */ - - + cerr << "Using optimizer:" << optimizer << endl; + ReadFile ini_rf(conf["decoder_config"].as()); Decoder decoder(ini_rf.stream()); @@ -705,7 +660,6 @@ int main(int argc, char** argv) { Weights::InitSparseVector(dense_weights, &lambdas); const string input = decoder.GetConf()["input"].as(); - //const bool show_feature_dictionary = decoder.GetConf().count("show_feature_dictionary"); if (!SILENT) cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl; ReadFile in_read(input); istream *in = in_read.stream(); @@ -714,8 +668,6 @@ int main(int argc, char** argv) { const double max_step_size = conf["max_step_size"].as(); - - // assert(corpus.size() > 0); vector oracles(ds.size()); TrainingObserver observer(conf["k_best_size"].as(), ds, &oracles, &corpus_bleu_sent_stats); @@ -725,27 +677,21 @@ int main(int argc, char** argv) { double objective=0; double tot_loss = 0; int dots = 0; - // int cur_pass = 1; - // vector dense_weights; SparseVector tot; SparseVector final_tot; - // tot += lambdas; // initial weights - // lcount++; // count for initial weights - - //string msg = "# MIRA tuned weights"; - // while (cur_pass <= max_iteration) { - SparseVector old_lambdas = lambdas; - tot.clear(); - tot += lambdas; - cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; - ScoreP acc, acc_h, acc_f; - - while(*in) { + + SparseVector old_lambdas = lambdas; + tot.clear(); + tot += lambdas; + cerr << "PASS " << cur_pass << " " << endl << lambdas << endl; + ScoreP acc, acc_h, acc_f; + + while(*in) { getline(*in, buf); if (buf.empty()) continue; //TODO: allow batch updating lambdas.init_vector(&dense_weights); - dense_weights_g = dense_weights; + dense_w_local = dense_weights; decoder.SetId(cur_sent); decoder.Decode(buf, &observer); // decode the sentence, calling Notify to get the hope,fear, and model best hyps. @@ -922,7 +868,7 @@ int main(int argc, char** argv) { dense_weights.clear(); lambdas.init_vector(&dense_weights); - dense_weights_g = dense_weights; + dense_w_local = dense_weights; iter++; if(DEBUG_SMO) cerr << "SMO opt " << iter << " " << delta << " " << cur_pair[0]->alpha << " " << cur_pair[1]->alpha << endl; @@ -991,19 +937,17 @@ int main(int argc, char** argv) { ostringstream os; os << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << ".gz"; string msg = "# MIRA tuned weights ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); - //Weights.InitFromVector(lambdas); lambdas.init_vector(&dense_weights); Weights::WriteToFile(os.str(), dense_weights, true, &msg); SparseVector x = tot; - x /= lcount; + x /= lcount+1; ostringstream sa; string msga = "# MIRA tuned weights AVERAGED ||| " + boost::lexical_cast(node_id) + " ||| " + boost::lexical_cast(lcount); sa << weights_dir << "/weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "." << node_id << "-avg.gz"; x.init_vector(&dense_weights); Weights::WriteToFile(sa.str(), dense_weights, true, &msga); - cerr << "Optimization complete.\n"; return 0; } -- cgit v1.2.3 From 5daf7c9c53bf842721f7bbcbeb235279aa950bcf Mon Sep 17 00:00:00 2001 From: Vladimir Eidelman Date: Sun, 14 Apr 2013 00:00:43 -0400 Subject: cleanup script --- training/mira/run_mira.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'training') diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl index 90a4da0e..e72c02e0 100755 --- a/training/mira/run_mira.pl +++ b/training/mira/run_mira.pl @@ -593,7 +593,7 @@ sub average_weights { else { (my $msg,my $ran,$mult) = split(/ \|\|\| /); - print "RAN $ran $mult\n"; + print "Processing $ran $mult\n"; } } $total_mult += $mult; -- cgit v1.2.3 From 2613a9673263a4442b4a8f7fc28a820f8d071157 Mon Sep 17 00:00:00 2001 From: Vladimir Eidelman Date: Sun, 14 Apr 2013 00:02:00 -0400 Subject: add example to run script --- training/mira/run_mira.pl | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'training') diff --git a/training/mira/run_mira.pl b/training/mira/run_mira.pl index e72c02e0..d71590ba 100755 --- a/training/mira/run_mira.pl +++ b/training/mira/run_mira.pl @@ -455,9 +455,24 @@ sub print_help { print << "Help"; Usage: $executable [options] - - $executable [options] - Runs a complete MIRA optimization using the ini file specified. + Runs a complete MIRA optimization using the ini file specified. + Example invocation: + run_mira.pl \ + --pmem 3g \ + --max-iterations 20 \ + --optimizer 2 \ + --unique-kbest \ + --jobs 15 \ + --kbest-size 500 \ + --hope-select 1 \ + --fear-select 1 \ + --ref-files "ref.0.soseos ref.1.soseos" \ + --source-file src.soseos \ + --weights weights.init \ + --workdir workdir \ + --grammar-prefix grammars/grammar \ + --step-size 0.01 \ + --metric-scale 10000 \ Required: -- cgit v1.2.3