diff options
author | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-23 16:59:07 +0000 |
---|---|---|
committer | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-23 16:59:07 +0000 |
commit | 0533638ace6681fd6bfc95722a0efe4a6a6cf630 (patch) | |
tree | 9f32585099baf56816e8c46ed167adf9a81ab550 /gi | |
parent | 18b6947b83708f0d63c054243245e6678b1c979e (diff) |
Cleaned up extractor
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@379 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi')
-rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index 37051721..13a2b421 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -266,18 +266,15 @@ sub extract_context { print STDERR "$OUT_CONTEXTS exists, reusing...\n"; } else { my $ccopt = "-c $ITEMS_IN_MEMORY"; - my $pipe = "| $REDUCER "; + my $postsort = "| $REDUCER "; if ($COMPLETE_CACHE) { print STDERR "COMPLETE_CACHE is set: removing memory limits on cache.\n"; $ccopt = "-c 0"; - $pipe = ""; + $postsort = "" unless ($PRESERVE_PHRASES); } + my $presort = ($PRESERVE_PHRASES ? "| $REMOVE_TAGS_CONTEXT --phrase=tok --context=tag " : ""); - if ($PRESERVE_PHRASES) { - $pipe = "| $REMOVE_TAGS_CONTEXT --phrase=tok --context=tag " . $pipe; - } - - my $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER $ccopt -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language $LANGUAGE --context_language $LANGUAGE | $SORT_KEYS $pipe | $GZIP > $OUT_CONTEXTS"; + my $cmd = "$EXTRACTOR -i $CORPUS_CLUSTER $ccopt -L $BASE_PHRASE_MAX_SIZE -C -S $CONTEXT_SIZE --phrase_language $LANGUAGE --context_language $LANGUAGE $presort | $SORT_KEYS $postsort | $GZIP > $OUT_CONTEXTS"; safesystem($cmd) or die "Failed to extract contexts."; } } |