big update: working iterating, pretty output, test scripts and more

author: Patrick Simianer <p@simianer.de> 2011-08-29 22:02:45 +0200
committer: Patrick Simianer <p@simianer.de> 2011-09-23 19:13:58 +0200
commit: aceb387526478e34e41db6c046f707234953e0b5 (patch)
tree: 3cb19b9f1c3390d52c4a732e22a3b31b52e4f483 /dtrain/test/wc_pipes/wordcount.cc
parent: 2001f2c1c96049b78f9aa5aaa05aeca26e3fc55a (diff)
1 files changed, 38 insertions, 0 deletions
diff --git a/dtrain/test/wc_pipes/wordcount.cc b/dtrain/test/wc_pipes/wordcount.cc
new file mode 100644
index 00000000..39560a31
--- /dev/null
+++ b/dtrain/test/wc_pipes/wordcount.cc
@@ -0,0 +1,38 @@
+#include "wordcount.hh"
+
+
+void
+WordcountMapper::map(HadoopPipes::MapContext & context)
+{
+  typedef boost::tokenizer<> tokenizer_t;
+  tokenizer_t tokenizer(context.getInputValue());
+
+  for( tokenizer_t::const_iterator i = tokenizer.begin();
+      tokenizer.end() != i; ++i ) {
+    context.emit(boost::to_lower_copy(*i), "1");
+  }
+}
+
+void
+WordcountReducer::reduce(HadoopPipes::ReduceContext & context)
+{
+  uint32_t count( 0 );
+
+  do {
+	++count;
+  } while( context.nextValue() );
+
+  std::cout << context.getInputKey() << endl;
+  context.emit( context.getInputKey(),
+                boost::lexical_cast<std::string>(count) );
+}
+
+
+int
+main( int argc, char * argv[] )
+{
+  HadoopPipes::TemplateFactory2<WordcountMapper,
+                                WordcountReducer> factory;
+  return HadoopPipes::runTask( factory );
+}
+
author	Patrick Simianer <p@simianer.de>	2011-08-29 22:02:45 +0200
committer	Patrick Simianer <p@simianer.de>	2011-09-23 19:13:58 +0200
commit	aceb387526478e34e41db6c046f707234953e0b5 (patch)
tree	3cb19b9f1c3390d52c4a732e22a3b31b52e4f483 /dtrain/test/wc_pipes/wordcount.cc
parent	2001f2c1c96049b78f9aa5aaa05aeca26e3fc55a (diff)