diff options
-rw-r--r-- | dtrain/dtrain.cc | 2 | ||||
-rw-r--r-- | dtrain/ksampler.h | 6 | ||||
-rw-r--r-- | dtrain/test/cdec_toy/cdec.ini | 4 | ||||
-rw-r--r-- | dtrain/test/cdec_toy/grammar | 12 | ||||
-rw-r--r-- | dtrain/test/cdec_toy/in | 1 | ||||
-rw-r--r-- | dtrain/test/cdec_toy/weights | 2 | ||||
-rw-r--r-- | dtrain/test/toy/cdec.ini (renamed from dtrain/test/toy_example/cdec.ini) | 0 | ||||
-rw-r--r-- | dtrain/test/toy/dtrain.ini (renamed from dtrain/test/toy_example/dtrain.ini) | 0 | ||||
-rw-r--r-- | dtrain/test/toy/in (renamed from dtrain/test/toy_example/in) | 0 | ||||
-rw-r--r-- | dtrain/test/wc_pipes/bible.txt.gz | bin | 1193106 -> 0 bytes | |||
-rw-r--r-- | dtrain/test/wc_pipes/jobconf.xml | 16 | ||||
-rwxr-xr-x | dtrain/test/wc_pipes/run.sh | 11 | ||||
-rw-r--r-- | dtrain/test/wc_pipes/wordcount.cc | 38 | ||||
-rw-r--r-- | dtrain/test/wc_pipes/wordcount.h | 34 |
14 files changed, 2 insertions, 124 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index ad1ab7b7..b1ea1e46 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -253,7 +253,7 @@ main(int argc, char** argv) } } if (broken_grammar) continue; - boost::replace_all(in_split[3], " __NEXT__RULE__ ", "\n"); + boost::replace_all(in_split[3], " __NEXT__RULE__ ", "\n"); // TODO in_split[3] += "\n"; grammar_buf_out << in_split[3] << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl; decoder.SetSentenceGrammarFromString(in_split[3]); diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h index bbe2b402..08bf1498 100644 --- a/dtrain/ksampler.h +++ b/dtrain/ksampler.h @@ -9,10 +9,6 @@ namespace dtrain { -/* - * KSampler - * - */ struct KSampler : public HypSampler { const size_t k_; @@ -34,7 +30,7 @@ struct KSampler : public HypSampler s_.clear(); std::vector<HypergraphSampler::Hypothesis> samples; HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); - for ( size_t i = 0; i < k_; ++i ) { + for (size_t i = 0; i < k_; ++i) { ScoredHyp h; h.w = samples[i].words; h.f = samples[i].fmap; diff --git a/dtrain/test/cdec_toy/cdec.ini b/dtrain/test/cdec_toy/cdec.ini deleted file mode 100644 index 9eb34512..00000000 --- a/dtrain/test/cdec_toy/cdec.ini +++ /dev/null @@ -1,4 +0,0 @@ -formalism=scfg -grammar=../dtrain/test/toy_cdec/grammar -add_pass_through_rules=true -weights=../dtrain/test/toy_cdec/weights diff --git a/dtrain/test/cdec_toy/grammar b/dtrain/test/cdec_toy/grammar deleted file mode 100644 index aeed75ef..00000000 --- a/dtrain/test/cdec_toy/grammar +++ /dev/null @@ -1,12 +0,0 @@ -[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 -[NP] ||| ich ||| i ||| logp=0 -[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 -[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 -[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 -[JJ] ||| kleines ||| small ||| logp=0 -[JJ] ||| kleines ||| little ||| logp=0 -[JJ] ||| grosses ||| big ||| logp=0 -[JJ] ||| grosses ||| large ||| logp=0 -[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 -[V] ||| sah ||| saw ||| logp=0 -[V] ||| fand ||| found ||| logp=0 diff --git a/dtrain/test/cdec_toy/in b/dtrain/test/cdec_toy/in deleted file mode 100644 index e6df9275..00000000 --- a/dtrain/test/cdec_toy/in +++ /dev/null @@ -1 +0,0 @@ -ich sah ein kleines haus diff --git a/dtrain/test/cdec_toy/weights b/dtrain/test/cdec_toy/weights deleted file mode 100644 index 10d7ed83..00000000 --- a/dtrain/test/cdec_toy/weights +++ /dev/null @@ -1,2 +0,0 @@ -logp 1 -use_shell 1 diff --git a/dtrain/test/toy_example/cdec.ini b/dtrain/test/toy/cdec.ini index 98b02d44..98b02d44 100644 --- a/dtrain/test/toy_example/cdec.ini +++ b/dtrain/test/toy/cdec.ini diff --git a/dtrain/test/toy_example/dtrain.ini b/dtrain/test/toy/dtrain.ini index 3ab4f8d4..3ab4f8d4 100644 --- a/dtrain/test/toy_example/dtrain.ini +++ b/dtrain/test/toy/dtrain.ini diff --git a/dtrain/test/toy_example/in b/dtrain/test/toy/in index 63f97158..63f97158 100644 --- a/dtrain/test/toy_example/in +++ b/dtrain/test/toy/in diff --git a/dtrain/test/wc_pipes/bible.txt.gz b/dtrain/test/wc_pipes/bible.txt.gz Binary files differdeleted file mode 100644 index a25a65d9..00000000 --- a/dtrain/test/wc_pipes/bible.txt.gz +++ /dev/null diff --git a/dtrain/test/wc_pipes/jobconf.xml b/dtrain/test/wc_pipes/jobconf.xml deleted file mode 100644 index facdbce6..00000000 --- a/dtrain/test/wc_pipes/jobconf.xml +++ /dev/null @@ -1,16 +0,0 @@ -<?xml version="1.0"?> -<configuration> - <property> - <name>hadoop.pipes.executable</name> - <value>path/to/dp_hadoop_pipes_test</value> - </property> - <property> - <name>hadoop.pipes.java.recordreader</name> - <value>true</value> - </property> - <property> - <name>hadoop.pipes.java.recordwriter</name> - <value>true</value> - </property> -</configuration> - diff --git a/dtrain/test/wc_pipes/run.sh b/dtrain/test/wc_pipes/run.sh deleted file mode 100755 index de7d8aef..00000000 --- a/dtrain/test/wc_pipes/run.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -HADOOP=~/z/hadoop-0.20.2/ - -$HADOOP/bin/hadoop dfs -put wordcount wordcount - -$HADOOP/bin/hadoop pipes -D hadoop.pipes.java.recordreader=true \ - -D hadoop.pipes.java.recordwriter=true \ - -input in/bible.txt -output out/bible_out \ - -program ./wordcount - diff --git a/dtrain/test/wc_pipes/wordcount.cc b/dtrain/test/wc_pipes/wordcount.cc deleted file mode 100644 index 39560a31..00000000 --- a/dtrain/test/wc_pipes/wordcount.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "wordcount.hh" - - -void -WordcountMapper::map(HadoopPipes::MapContext & context) -{ - typedef boost::tokenizer<> tokenizer_t; - tokenizer_t tokenizer(context.getInputValue()); - - for( tokenizer_t::const_iterator i = tokenizer.begin(); - tokenizer.end() != i; ++i ) { - context.emit(boost::to_lower_copy(*i), "1"); - } -} - -void -WordcountReducer::reduce(HadoopPipes::ReduceContext & context) -{ - uint32_t count( 0 ); - - do { - ++count; - } while( context.nextValue() ); - - std::cout << context.getInputKey() << endl; - context.emit( context.getInputKey(), - boost::lexical_cast<std::string>(count) ); -} - - -int -main( int argc, char * argv[] ) -{ - HadoopPipes::TemplateFactory2<WordcountMapper, - WordcountReducer> factory; - return HadoopPipes::runTask( factory ); -} - diff --git a/dtrain/test/wc_pipes/wordcount.h b/dtrain/test/wc_pipes/wordcount.h deleted file mode 100644 index c8fc8a29..00000000 --- a/dtrain/test/wc_pipes/wordcount.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __WORDCOUNT_HH__ -#define __WORDCOUNT_HH__ - - -#include <iostream> -#include <string> - -#include "hadoop/Pipes.hh" -#include "hadoop/TemplateFactory.hh" - -#include <boost/algorithm/string.hpp> -#include <boost/tokenizer.hpp> -#include <boost/lexical_cast.hpp> - -using namespace std; - - -class WordcountMapper : public HadoopPipes::Mapper -{ - public: - WordcountMapper( const HadoopPipes::TaskContext & ) {}; - void map( HadoopPipes::MapContext &context ); -}; - -class WordcountReducer : public HadoopPipes::Reducer -{ - public: - WordcountReducer( const HadoopPipes::TaskContext & ) {}; - void reduce( HadoopPipes::ReduceContext & context ); -}; - - -#endif - |