diff options
author | Patrick Simianer <p@simianer.de> | 2011-12-03 21:38:59 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-12-03 21:38:59 +0100 |
commit | 0f9024d49f7622d1c135aa2e3f9ddc6bc4349fb9 (patch) | |
tree | fc087cb9a222d00fc8b2f6d4484a62c581b84e54 /dtrain/scfg/features/count/featurecount.cc | |
parent | 68fd129f5f69162fc2385bd3e57335968dfc74c2 (diff) |
new rule count
Diffstat (limited to 'dtrain/scfg/features/count/featurecount.cc')
-rw-r--r-- | dtrain/scfg/features/count/featurecount.cc | 49 |
1 files changed, 0 insertions, 49 deletions
diff --git a/dtrain/scfg/features/count/featurecount.cc b/dtrain/scfg/features/count/featurecount.cc deleted file mode 100644 index db31885c..00000000 --- a/dtrain/scfg/features/count/featurecount.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include "featurecount.hh" - - -void -FeatureCountMapper::map( HadoopPipes::MapContext &context ) -{ - string line = context.getInputValue(); - - // get features substr - size_t i = 0, c = 0, beg = 0, end = 0; - string::iterator it = line.begin(); - string s; - while ( c != 12 ) { - s = *it; - if ( s == "|" ) c += 1; - if ( beg == 0 && c == 9 ) beg = i+2; - if ( c == 12 ) end = i-beg-3; - it++; - i++; - } - string sub = line.substr( beg, end ); - - // emit feature:1 - vector<string> f_tok; - boost::split( f_tok, sub, boost::is_any_of(" ") ); - vector<string>::iterator f; - for ( f = f_tok.begin(); f != f_tok.end(); f++ ) { - if ( f->find("=1") != string::npos ) context.emit(*f, "1"); - } -} - -void -FeatureCountReducer::reduce( HadoopPipes::ReduceContext &context ) -{ - size_t sum = 0; - while ( context.nextValue() ) sum += HadoopUtils::toInt( context.getInputValue() ); - context.emit( context.getInputKey(), HadoopUtils::toString(sum) ); -} - - -int -main( int argc, char * argv[] ) -{ - HadoopPipes::TemplateFactory2<FeatureCountMapper, - FeatureCountReducer> factory; - - return HadoopPipes::runTask(factory); -} - |