From 4cd076acd1760035df693ff4f93a79b5d5c3d29d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 25 Sep 2011 21:59:39 +0200 Subject: removed old stuff --- dtrain/dtrain.cc | 2 +- dtrain/ksampler.h | 6 +----- dtrain/test/cdec_toy/cdec.ini | 4 ---- dtrain/test/cdec_toy/grammar | 12 ------------ dtrain/test/cdec_toy/in | 1 - dtrain/test/cdec_toy/weights | 2 -- dtrain/test/toy/cdec.ini | 2 ++ dtrain/test/toy/dtrain.ini | 8 ++++++++ dtrain/test/toy/in | 2 ++ dtrain/test/toy_example/cdec.ini | 2 -- dtrain/test/toy_example/dtrain.ini | 8 -------- dtrain/test/toy_example/in | 2 -- dtrain/test/wc_pipes/bible.txt.gz | Bin 1193106 -> 0 bytes dtrain/test/wc_pipes/jobconf.xml | 16 ---------------- dtrain/test/wc_pipes/run.sh | 11 ----------- dtrain/test/wc_pipes/wordcount.cc | 38 ------------------------------------- dtrain/test/wc_pipes/wordcount.h | 34 --------------------------------- 17 files changed, 14 insertions(+), 136 deletions(-) delete mode 100644 dtrain/test/cdec_toy/cdec.ini delete mode 100644 dtrain/test/cdec_toy/grammar delete mode 100644 dtrain/test/cdec_toy/in delete mode 100644 dtrain/test/cdec_toy/weights create mode 100644 dtrain/test/toy/cdec.ini create mode 100644 dtrain/test/toy/dtrain.ini create mode 100644 dtrain/test/toy/in delete mode 100644 dtrain/test/toy_example/cdec.ini delete mode 100644 dtrain/test/toy_example/dtrain.ini delete mode 100644 dtrain/test/toy_example/in delete mode 100644 dtrain/test/wc_pipes/bible.txt.gz delete mode 100644 dtrain/test/wc_pipes/jobconf.xml delete mode 100755 dtrain/test/wc_pipes/run.sh delete mode 100644 dtrain/test/wc_pipes/wordcount.cc delete mode 100644 dtrain/test/wc_pipes/wordcount.h diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index ad1ab7b7..b1ea1e46 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -253,7 +253,7 @@ main(int argc, char** argv) } } if (broken_grammar) continue; - boost::replace_all(in_split[3], " __NEXT__RULE__ ", "\n"); + boost::replace_all(in_split[3], " __NEXT__RULE__ ", "\n"); // TODO in_split[3] += "\n"; grammar_buf_out << in_split[3] << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl; decoder.SetSentenceGrammarFromString(in_split[3]); diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h index bbe2b402..08bf1498 100644 --- a/dtrain/ksampler.h +++ b/dtrain/ksampler.h @@ -9,10 +9,6 @@ namespace dtrain { -/* - * KSampler - * - */ struct KSampler : public HypSampler { const size_t k_; @@ -34,7 +30,7 @@ struct KSampler : public HypSampler s_.clear(); std::vector samples; HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); - for ( size_t i = 0; i < k_; ++i ) { + for (size_t i = 0; i < k_; ++i) { ScoredHyp h; h.w = samples[i].words; h.f = samples[i].fmap; diff --git a/dtrain/test/cdec_toy/cdec.ini b/dtrain/test/cdec_toy/cdec.ini deleted file mode 100644 index 9eb34512..00000000 --- a/dtrain/test/cdec_toy/cdec.ini +++ /dev/null @@ -1,4 +0,0 @@ -formalism=scfg -grammar=../dtrain/test/toy_cdec/grammar -add_pass_through_rules=true -weights=../dtrain/test/toy_cdec/weights diff --git a/dtrain/test/cdec_toy/grammar b/dtrain/test/cdec_toy/grammar deleted file mode 100644 index aeed75ef..00000000 --- a/dtrain/test/cdec_toy/grammar +++ /dev/null @@ -1,12 +0,0 @@ -[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 -[NP] ||| ich ||| i ||| logp=0 -[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 -[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 -[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 -[JJ] ||| kleines ||| small ||| logp=0 -[JJ] ||| kleines ||| little ||| logp=0 -[JJ] ||| grosses ||| big ||| logp=0 -[JJ] ||| grosses ||| large ||| logp=0 -[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 -[V] ||| sah ||| saw ||| logp=0 -[V] ||| fand ||| found ||| logp=0 diff --git a/dtrain/test/cdec_toy/in b/dtrain/test/cdec_toy/in deleted file mode 100644 index e6df9275..00000000 --- a/dtrain/test/cdec_toy/in +++ /dev/null @@ -1 +0,0 @@ -ich sah ein kleines haus diff --git a/dtrain/test/cdec_toy/weights b/dtrain/test/cdec_toy/weights deleted file mode 100644 index 10d7ed83..00000000 --- a/dtrain/test/cdec_toy/weights +++ /dev/null @@ -1,2 +0,0 @@ -logp 1 -use_shell 1 diff --git a/dtrain/test/toy/cdec.ini b/dtrain/test/toy/cdec.ini new file mode 100644 index 00000000..98b02d44 --- /dev/null +++ b/dtrain/test/toy/cdec.ini @@ -0,0 +1,2 @@ +formalism=scfg +add_pass_through_rules=true diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini new file mode 100644 index 00000000..3ab4f8d4 --- /dev/null +++ b/dtrain/test/toy/dtrain.ini @@ -0,0 +1,8 @@ +decoder_config=test/toy_example/cdec.ini +ksamples=4 +ngrams=3 +epochs=2 +input=test/toy_example/toy.in +scorer=bleu +output=- +wprint=logp use_shell use_house PassThrough diff --git a/dtrain/test/toy/in b/dtrain/test/toy/in new file mode 100644 index 00000000..63f97158 --- /dev/null +++ b/dtrain/test/toy/in @@ -0,0 +1,2 @@ +0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 +1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 diff --git a/dtrain/test/toy_example/cdec.ini b/dtrain/test/toy_example/cdec.ini deleted file mode 100644 index 98b02d44..00000000 --- a/dtrain/test/toy_example/cdec.ini +++ /dev/null @@ -1,2 +0,0 @@ -formalism=scfg -add_pass_through_rules=true diff --git a/dtrain/test/toy_example/dtrain.ini b/dtrain/test/toy_example/dtrain.ini deleted file mode 100644 index 3ab4f8d4..00000000 --- a/dtrain/test/toy_example/dtrain.ini +++ /dev/null @@ -1,8 +0,0 @@ -decoder_config=test/toy_example/cdec.ini -ksamples=4 -ngrams=3 -epochs=2 -input=test/toy_example/toy.in -scorer=bleu -output=- -wprint=logp use_shell use_house PassThrough diff --git a/dtrain/test/toy_example/in b/dtrain/test/toy_example/in deleted file mode 100644 index 63f97158..00000000 --- a/dtrain/test/toy_example/in +++ /dev/null @@ -1,2 +0,0 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 -1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 diff --git a/dtrain/test/wc_pipes/bible.txt.gz b/dtrain/test/wc_pipes/bible.txt.gz deleted file mode 100644 index a25a65d9..00000000 Binary files a/dtrain/test/wc_pipes/bible.txt.gz and /dev/null differ diff --git a/dtrain/test/wc_pipes/jobconf.xml b/dtrain/test/wc_pipes/jobconf.xml deleted file mode 100644 index facdbce6..00000000 --- a/dtrain/test/wc_pipes/jobconf.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - hadoop.pipes.executable - path/to/dp_hadoop_pipes_test - - - hadoop.pipes.java.recordreader - true - - - hadoop.pipes.java.recordwriter - true - - - diff --git a/dtrain/test/wc_pipes/run.sh b/dtrain/test/wc_pipes/run.sh deleted file mode 100755 index de7d8aef..00000000 --- a/dtrain/test/wc_pipes/run.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -HADOOP=~/z/hadoop-0.20.2/ - -$HADOOP/bin/hadoop dfs -put wordcount wordcount - -$HADOOP/bin/hadoop pipes -D hadoop.pipes.java.recordreader=true \ - -D hadoop.pipes.java.recordwriter=true \ - -input in/bible.txt -output out/bible_out \ - -program ./wordcount - diff --git a/dtrain/test/wc_pipes/wordcount.cc b/dtrain/test/wc_pipes/wordcount.cc deleted file mode 100644 index 39560a31..00000000 --- a/dtrain/test/wc_pipes/wordcount.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "wordcount.hh" - - -void -WordcountMapper::map(HadoopPipes::MapContext & context) -{ - typedef boost::tokenizer<> tokenizer_t; - tokenizer_t tokenizer(context.getInputValue()); - - for( tokenizer_t::const_iterator i = tokenizer.begin(); - tokenizer.end() != i; ++i ) { - context.emit(boost::to_lower_copy(*i), "1"); - } -} - -void -WordcountReducer::reduce(HadoopPipes::ReduceContext & context) -{ - uint32_t count( 0 ); - - do { - ++count; - } while( context.nextValue() ); - - std::cout << context.getInputKey() << endl; - context.emit( context.getInputKey(), - boost::lexical_cast(count) ); -} - - -int -main( int argc, char * argv[] ) -{ - HadoopPipes::TemplateFactory2 factory; - return HadoopPipes::runTask( factory ); -} - diff --git a/dtrain/test/wc_pipes/wordcount.h b/dtrain/test/wc_pipes/wordcount.h deleted file mode 100644 index c8fc8a29..00000000 --- a/dtrain/test/wc_pipes/wordcount.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __WORDCOUNT_HH__ -#define __WORDCOUNT_HH__ - - -#include -#include - -#include "hadoop/Pipes.hh" -#include "hadoop/TemplateFactory.hh" - -#include -#include -#include - -using namespace std; - - -class WordcountMapper : public HadoopPipes::Mapper -{ - public: - WordcountMapper( const HadoopPipes::TaskContext & ) {}; - void map( HadoopPipes::MapContext &context ); -}; - -class WordcountReducer : public HadoopPipes::Reducer -{ - public: - WordcountReducer( const HadoopPipes::TaskContext & ) {}; - void reduce( HadoopPipes::ReduceContext & context ); -}; - - -#endif - -- cgit v1.2.3