diff options
Diffstat (limited to 'dtrain/test/wc_pipes')
-rw-r--r-- | dtrain/test/wc_pipes/bible.txt.gz | bin | 1193106 -> 0 bytes | |||
-rw-r--r-- | dtrain/test/wc_pipes/jobconf.xml | 16 | ||||
-rwxr-xr-x | dtrain/test/wc_pipes/run.sh | 11 | ||||
-rw-r--r-- | dtrain/test/wc_pipes/wordcount.cc | 38 | ||||
-rw-r--r-- | dtrain/test/wc_pipes/wordcount.h | 34 |
5 files changed, 0 insertions, 99 deletions
diff --git a/dtrain/test/wc_pipes/bible.txt.gz b/dtrain/test/wc_pipes/bible.txt.gz Binary files differdeleted file mode 100644 index a25a65d9..00000000 --- a/dtrain/test/wc_pipes/bible.txt.gz +++ /dev/null diff --git a/dtrain/test/wc_pipes/jobconf.xml b/dtrain/test/wc_pipes/jobconf.xml deleted file mode 100644 index facdbce6..00000000 --- a/dtrain/test/wc_pipes/jobconf.xml +++ /dev/null @@ -1,16 +0,0 @@ -<?xml version="1.0"?> -<configuration> - <property> - <name>hadoop.pipes.executable</name> - <value>path/to/dp_hadoop_pipes_test</value> - </property> - <property> - <name>hadoop.pipes.java.recordreader</name> - <value>true</value> - </property> - <property> - <name>hadoop.pipes.java.recordwriter</name> - <value>true</value> - </property> -</configuration> - diff --git a/dtrain/test/wc_pipes/run.sh b/dtrain/test/wc_pipes/run.sh deleted file mode 100755 index de7d8aef..00000000 --- a/dtrain/test/wc_pipes/run.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -HADOOP=~/z/hadoop-0.20.2/ - -$HADOOP/bin/hadoop dfs -put wordcount wordcount - -$HADOOP/bin/hadoop pipes -D hadoop.pipes.java.recordreader=true \ - -D hadoop.pipes.java.recordwriter=true \ - -input in/bible.txt -output out/bible_out \ - -program ./wordcount - diff --git a/dtrain/test/wc_pipes/wordcount.cc b/dtrain/test/wc_pipes/wordcount.cc deleted file mode 100644 index 39560a31..00000000 --- a/dtrain/test/wc_pipes/wordcount.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "wordcount.hh" - - -void -WordcountMapper::map(HadoopPipes::MapContext & context) -{ - typedef boost::tokenizer<> tokenizer_t; - tokenizer_t tokenizer(context.getInputValue()); - - for( tokenizer_t::const_iterator i = tokenizer.begin(); - tokenizer.end() != i; ++i ) { - context.emit(boost::to_lower_copy(*i), "1"); - } -} - -void -WordcountReducer::reduce(HadoopPipes::ReduceContext & context) -{ - uint32_t count( 0 ); - - do { - ++count; - } while( context.nextValue() ); - - std::cout << context.getInputKey() << endl; - context.emit( context.getInputKey(), - boost::lexical_cast<std::string>(count) ); -} - - -int -main( int argc, char * argv[] ) -{ - HadoopPipes::TemplateFactory2<WordcountMapper, - WordcountReducer> factory; - return HadoopPipes::runTask( factory ); -} - diff --git a/dtrain/test/wc_pipes/wordcount.h b/dtrain/test/wc_pipes/wordcount.h deleted file mode 100644 index c8fc8a29..00000000 --- a/dtrain/test/wc_pipes/wordcount.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __WORDCOUNT_HH__ -#define __WORDCOUNT_HH__ - - -#include <iostream> -#include <string> - -#include "hadoop/Pipes.hh" -#include "hadoop/TemplateFactory.hh" - -#include <boost/algorithm/string.hpp> -#include <boost/tokenizer.hpp> -#include <boost/lexical_cast.hpp> - -using namespace std; - - -class WordcountMapper : public HadoopPipes::Mapper -{ - public: - WordcountMapper( const HadoopPipes::TaskContext & ) {}; - void map( HadoopPipes::MapContext &context ); -}; - -class WordcountReducer : public HadoopPipes::Reducer -{ - public: - WordcountReducer( const HadoopPipes::TaskContext & ) {}; - void reduce( HadoopPipes::ReduceContext & context ); -}; - - -#endif - |