summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-09-25 21:59:39 +0200
committerPatrick Simianer <p@simianer.de>2011-09-25 21:59:39 +0200
commit98f12432cf2dc4a906d13d17e122b457882df308 (patch)
treec454f2ad97704dadd336bd973c5799b7671c8df8 /dtrain
parent4d8c300734c441821141f4bff044c439e004ff84 (diff)
removed old stuff
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/dtrain.cc2
-rw-r--r--dtrain/ksampler.h6
-rw-r--r--dtrain/test/cdec_toy/cdec.ini4
-rw-r--r--dtrain/test/cdec_toy/grammar12
-rw-r--r--dtrain/test/cdec_toy/in1
-rw-r--r--dtrain/test/cdec_toy/weights2
-rw-r--r--dtrain/test/toy/cdec.ini (renamed from dtrain/test/toy_example/cdec.ini)0
-rw-r--r--dtrain/test/toy/dtrain.ini (renamed from dtrain/test/toy_example/dtrain.ini)0
-rw-r--r--dtrain/test/toy/in (renamed from dtrain/test/toy_example/in)0
-rw-r--r--dtrain/test/wc_pipes/bible.txt.gzbin1193106 -> 0 bytes
-rw-r--r--dtrain/test/wc_pipes/jobconf.xml16
-rwxr-xr-xdtrain/test/wc_pipes/run.sh11
-rw-r--r--dtrain/test/wc_pipes/wordcount.cc38
-rw-r--r--dtrain/test/wc_pipes/wordcount.h34
14 files changed, 2 insertions, 124 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index ad1ab7b7..b1ea1e46 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -253,7 +253,7 @@ main(int argc, char** argv)
}
}
if (broken_grammar) continue;
- boost::replace_all(in_split[3], " __NEXT__RULE__ ", "\n");
+ boost::replace_all(in_split[3], " __NEXT__RULE__ ", "\n"); // TODO
in_split[3] += "\n";
grammar_buf_out << in_split[3] << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl;
decoder.SetSentenceGrammarFromString(in_split[3]);
diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h
index bbe2b402..08bf1498 100644
--- a/dtrain/ksampler.h
+++ b/dtrain/ksampler.h
@@ -9,10 +9,6 @@ namespace dtrain
{
-/*
- * KSampler
- *
- */
struct KSampler : public HypSampler
{
const size_t k_;
@@ -34,7 +30,7 @@ struct KSampler : public HypSampler
s_.clear();
std::vector<HypergraphSampler::Hypothesis> samples;
HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples);
- for ( size_t i = 0; i < k_; ++i ) {
+ for (size_t i = 0; i < k_; ++i) {
ScoredHyp h;
h.w = samples[i].words;
h.f = samples[i].fmap;
diff --git a/dtrain/test/cdec_toy/cdec.ini b/dtrain/test/cdec_toy/cdec.ini
deleted file mode 100644
index 9eb34512..00000000
--- a/dtrain/test/cdec_toy/cdec.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-formalism=scfg
-grammar=../dtrain/test/toy_cdec/grammar
-add_pass_through_rules=true
-weights=../dtrain/test/toy_cdec/weights
diff --git a/dtrain/test/cdec_toy/grammar b/dtrain/test/cdec_toy/grammar
deleted file mode 100644
index aeed75ef..00000000
--- a/dtrain/test/cdec_toy/grammar
+++ /dev/null
@@ -1,12 +0,0 @@
-[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0
-[NP] ||| ich ||| i ||| logp=0
-[NP] ||| ein [NN,1] ||| a [1] ||| logp=0
-[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1
-[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1
-[JJ] ||| kleines ||| small ||| logp=0
-[JJ] ||| kleines ||| little ||| logp=0
-[JJ] ||| grosses ||| big ||| logp=0
-[JJ] ||| grosses ||| large ||| logp=0
-[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0
-[V] ||| sah ||| saw ||| logp=0
-[V] ||| fand ||| found ||| logp=0
diff --git a/dtrain/test/cdec_toy/in b/dtrain/test/cdec_toy/in
deleted file mode 100644
index e6df9275..00000000
--- a/dtrain/test/cdec_toy/in
+++ /dev/null
@@ -1 +0,0 @@
-ich sah ein kleines haus
diff --git a/dtrain/test/cdec_toy/weights b/dtrain/test/cdec_toy/weights
deleted file mode 100644
index 10d7ed83..00000000
--- a/dtrain/test/cdec_toy/weights
+++ /dev/null
@@ -1,2 +0,0 @@
-logp 1
-use_shell 1
diff --git a/dtrain/test/toy_example/cdec.ini b/dtrain/test/toy/cdec.ini
index 98b02d44..98b02d44 100644
--- a/dtrain/test/toy_example/cdec.ini
+++ b/dtrain/test/toy/cdec.ini
diff --git a/dtrain/test/toy_example/dtrain.ini b/dtrain/test/toy/dtrain.ini
index 3ab4f8d4..3ab4f8d4 100644
--- a/dtrain/test/toy_example/dtrain.ini
+++ b/dtrain/test/toy/dtrain.ini
diff --git a/dtrain/test/toy_example/in b/dtrain/test/toy/in
index 63f97158..63f97158 100644
--- a/dtrain/test/toy_example/in
+++ b/dtrain/test/toy/in
diff --git a/dtrain/test/wc_pipes/bible.txt.gz b/dtrain/test/wc_pipes/bible.txt.gz
deleted file mode 100644
index a25a65d9..00000000
--- a/dtrain/test/wc_pipes/bible.txt.gz
+++ /dev/null
Binary files differ
diff --git a/dtrain/test/wc_pipes/jobconf.xml b/dtrain/test/wc_pipes/jobconf.xml
deleted file mode 100644
index facdbce6..00000000
--- a/dtrain/test/wc_pipes/jobconf.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-<?xml version="1.0"?>
-<configuration>
- <property>
- <name>hadoop.pipes.executable</name>
- <value>path/to/dp_hadoop_pipes_test</value>
- </property>
- <property>
- <name>hadoop.pipes.java.recordreader</name>
- <value>true</value>
- </property>
- <property>
- <name>hadoop.pipes.java.recordwriter</name>
- <value>true</value>
- </property>
-</configuration>
-
diff --git a/dtrain/test/wc_pipes/run.sh b/dtrain/test/wc_pipes/run.sh
deleted file mode 100755
index de7d8aef..00000000
--- a/dtrain/test/wc_pipes/run.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-
-HADOOP=~/z/hadoop-0.20.2/
-
-$HADOOP/bin/hadoop dfs -put wordcount wordcount
-
-$HADOOP/bin/hadoop pipes -D hadoop.pipes.java.recordreader=true \
- -D hadoop.pipes.java.recordwriter=true \
- -input in/bible.txt -output out/bible_out \
- -program ./wordcount
-
diff --git a/dtrain/test/wc_pipes/wordcount.cc b/dtrain/test/wc_pipes/wordcount.cc
deleted file mode 100644
index 39560a31..00000000
--- a/dtrain/test/wc_pipes/wordcount.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-#include "wordcount.hh"
-
-
-void
-WordcountMapper::map(HadoopPipes::MapContext & context)
-{
- typedef boost::tokenizer<> tokenizer_t;
- tokenizer_t tokenizer(context.getInputValue());
-
- for( tokenizer_t::const_iterator i = tokenizer.begin();
- tokenizer.end() != i; ++i ) {
- context.emit(boost::to_lower_copy(*i), "1");
- }
-}
-
-void
-WordcountReducer::reduce(HadoopPipes::ReduceContext & context)
-{
- uint32_t count( 0 );
-
- do {
- ++count;
- } while( context.nextValue() );
-
- std::cout << context.getInputKey() << endl;
- context.emit( context.getInputKey(),
- boost::lexical_cast<std::string>(count) );
-}
-
-
-int
-main( int argc, char * argv[] )
-{
- HadoopPipes::TemplateFactory2<WordcountMapper,
- WordcountReducer> factory;
- return HadoopPipes::runTask( factory );
-}
-
diff --git a/dtrain/test/wc_pipes/wordcount.h b/dtrain/test/wc_pipes/wordcount.h
deleted file mode 100644
index c8fc8a29..00000000
--- a/dtrain/test/wc_pipes/wordcount.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __WORDCOUNT_HH__
-#define __WORDCOUNT_HH__
-
-
-#include <iostream>
-#include <string>
-
-#include "hadoop/Pipes.hh"
-#include "hadoop/TemplateFactory.hh"
-
-#include <boost/algorithm/string.hpp>
-#include <boost/tokenizer.hpp>
-#include <boost/lexical_cast.hpp>
-
-using namespace std;
-
-
-class WordcountMapper : public HadoopPipes::Mapper
-{
- public:
- WordcountMapper( const HadoopPipes::TaskContext & ) {};
- void map( HadoopPipes::MapContext &context );
-};
-
-class WordcountReducer : public HadoopPipes::Reducer
-{
- public:
- WordcountReducer( const HadoopPipes::TaskContext & ) {};
- void reduce( HadoopPipes::ReduceContext & context );
-};
-
-
-#endif
-