From 0f9024d49f7622d1c135aa2e3f9ddc6bc4349fb9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 3 Dec 2011 21:38:59 +0100 Subject: new rule count --- dtrain/scfg/features/count/featurecount.cc | 49 ------------------------------ 1 file changed, 49 deletions(-) delete mode 100644 dtrain/scfg/features/count/featurecount.cc (limited to 'dtrain/scfg/features/count/featurecount.cc') diff --git a/dtrain/scfg/features/count/featurecount.cc b/dtrain/scfg/features/count/featurecount.cc deleted file mode 100644 index db31885c..00000000 --- a/dtrain/scfg/features/count/featurecount.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "featurecount.hh" - - -void -FeatureCountMapper::map( HadoopPipes::MapContext &context ) -{ - string line = context.getInputValue(); - - // get features substr - size_t i = 0, c = 0, beg = 0, end = 0; - string::iterator it = line.begin(); - string s; - while ( c != 12 ) { - s = *it; - if ( s == "|" ) c += 1; - if ( beg == 0 && c == 9 ) beg = i+2; - if ( c == 12 ) end = i-beg-3; - it++; - i++; - } - string sub = line.substr( beg, end ); - - // emit feature:1 - vector f_tok; - boost::split( f_tok, sub, boost::is_any_of(" ") ); - vector::iterator f; - for ( f = f_tok.begin(); f != f_tok.end(); f++ ) { - if ( f->find("=1") != string::npos ) context.emit(*f, "1"); - } -} - -void -FeatureCountReducer::reduce( HadoopPipes::ReduceContext &context ) -{ - size_t sum = 0; - while ( context.nextValue() ) sum += HadoopUtils::toInt( context.getInputValue() ); - context.emit( context.getInputKey(), HadoopUtils::toString(sum) ); -} - - -int -main( int argc, char * argv[] ) -{ - HadoopPipes::TemplateFactory2 factory; - - return HadoopPipes::runTask(factory); -} - -- cgit v1.2.3