diff options
Diffstat (limited to 'hadoop/wordcount/pipes/wordcount.cc')
-rw-r--r-- | hadoop/wordcount/pipes/wordcount.cc | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/hadoop/wordcount/pipes/wordcount.cc b/hadoop/wordcount/pipes/wordcount.cc new file mode 100644 index 0000000..c9394d5 --- /dev/null +++ b/hadoop/wordcount/pipes/wordcount.cc @@ -0,0 +1,38 @@ +#include "wordcount.hh" + + +void +WordcountMapper::map(HadoopPipes::MapContext &context) +{ + typedef boost::tokenizer<> tokenizer_t; + tokenizer_t tokenizer(context.getInputValue()); + + for(tokenizer_t::const_iterator i = tokenizer.begin(); + tokenizer.end() != i; ++i) { + context.emit(boost::to_lower_copy(*i), "1"); + } +} + +void +WordcountReducer::reduce(HadoopPipes::ReduceContext &context) +{ + uint32_t count(0); + + do { + ++count; + } while(context.nextValue()); + + //std::cout << context.getInputKey() << endl; + context.emit(context.getInputKey(), + boost::lexical_cast<std::string>(count)); +} + + +int +main(int argc, char *argv[]) +{ + HadoopPipes::TemplateFactory2<WordcountMapper, + WordcountReducer> factory; + return HadoopPipes::runTask(factory); +} + |