diff options
author | Patrick Simianer <p@simianer.de> | 2011-12-03 21:38:59 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-12-03 21:38:59 +0100 |
commit | 578ed26bf818c1f18d5a319cde9b13ef9ff59f46 (patch) | |
tree | b315bbaa2d44e8f54b1d70b1b1bb21704d89a266 /dtrain/scfg/features/count/featurecount.cc | |
parent | 4d1679b517ff46a1d6a152796f1033a4c380492c (diff) |
new rule count
Diffstat (limited to 'dtrain/scfg/features/count/featurecount.cc')
-rw-r--r-- | dtrain/scfg/features/count/featurecount.cc | 49 |
1 files changed, 0 insertions, 49 deletions
diff --git a/dtrain/scfg/features/count/featurecount.cc b/dtrain/scfg/features/count/featurecount.cc deleted file mode 100644 index db31885c..00000000 --- a/dtrain/scfg/features/count/featurecount.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "featurecount.hh" - - -void -FeatureCountMapper::map( HadoopPipes::MapContext &context ) -{ - string line = context.getInputValue(); - - // get features substr - size_t i = 0, c = 0, beg = 0, end = 0; - string::iterator it = line.begin(); - string s; - while ( c != 12 ) { - s = *it; - if ( s == "|" ) c += 1; - if ( beg == 0 && c == 9 ) beg = i+2; - if ( c == 12 ) end = i-beg-3; - it++; - i++; - } - string sub = line.substr( beg, end ); - - // emit feature:1 - vector<string> f_tok; - boost::split( f_tok, sub, boost::is_any_of(" ") ); - vector<string>::iterator f; - for ( f = f_tok.begin(); f != f_tok.end(); f++ ) { - if ( f->find("=1") != string::npos ) context.emit(*f, "1"); - } -} - -void -FeatureCountReducer::reduce( HadoopPipes::ReduceContext &context ) -{ - size_t sum = 0; - while ( context.nextValue() ) sum += HadoopUtils::toInt( context.getInputValue() ); - context.emit( context.getInputKey(), HadoopUtils::toString(sum) ); -} - - -int -main( int argc, char * argv[] ) -{ - HadoopPipes::TemplateFactory2<FeatureCountMapper, - FeatureCountReducer> factory; - - return HadoopPipes::runTask(factory); -} - |