diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-28 00:22:42 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-28 00:22:42 +0000 |
commit | ad5a1a959648483f6d0d049af7ce54346c28728f (patch) | |
tree | 6d1085d28dfcee2a407bdca64ecf11ae178b068c /word-aligner/makefiles/makefile.grammars | |
parent | e474b6a282e00e4a48e0938ceaecc7ea8e682ef4 (diff) |
change stem handling
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@693 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner/makefiles/makefile.grammars')
-rw-r--r-- | word-aligner/makefiles/makefile.grammars | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index f4b956bc..8a10cb19 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -1,8 +1,7 @@ -all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f +all: corpus.f-e.lex-grammar.gz corpus.e-f.lex-grammar.gz corpus.class.e corpus.class.f corpus.stemmed.f fstem.map corpus.stemmed.e estem.map clean: - $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* - + $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* SUPPORT_DIR = $(SCRIPT_DIR)/support GZIP = /usr/bin/gzip ZCAT = zcat @@ -12,21 +11,42 @@ SUPPLEMENT_WEIGHTS = $(SUPPORT_DIR)/supplement_weights_file.pl EXTRACT_VOCAB = $(SUPPORT_DIR)/extract_vocab.pl ORTHONORM_E = $(SCRIPT_DIR)/ortho-norm/$(E_LANG).pl ORTHONORM_F = $(SCRIPT_DIR)/ortho-norm/$(F_LANG).pl +STEM_F = $(SCRIPT_DIR)/stemmers/$(F_LANG).pl +STEM_E = $(SCRIPT_DIR)/stemmers/$(E_LANG).pl + CLASSIFY = $(SUPPORT_DIR)/classify.pl MAKE_LEX_GRAMMAR = $(SUPPORT_DIR)/make_lex_grammar.pl MODEL1 = $(TRAINING_DIR)/model1 MERGE_CORPUS = $(SUPPORT_DIR)/merge_corpus.pl -orthonorm-dict.e: corpus.e - $(EXTRACT_VOCAB) corpus.e > e.voc +e.voc: corpus.e + $(EXTRACT_VOCAB) < corpus.e > $@ + +f.voc: corpus.f + $(EXTRACT_VOCAB) < corpus.f > $@ + +orthonorm-dict.e: corpus.e e.voc $(ORTHONORM_E) < e.voc > e.ortho-voc $(MERGE_CORPUS) e.voc e.ortho-voc > $@ -orthonorm-dict.f: corpus.f - $(EXTRACT_VOCAB) corpus.f > f.voc +orthonorm-dict.f: corpus.f f.voc $(ORTHONORM_F) < f.voc > f.ortho-voc $(MERGE_CORPUS) f.voc f.ortho-voc > $@ +# this is just a "stem" map +estem.map: e.voc + $(STEM_E) --vocab < e.voc > $@ + +fstem.map: f.voc + $(STEM_F) --vocab < f.voc > $@ + +# corpus.stemmed.f can use context to do "stemming" +corpus.stemmed.f: corpus.f + $(STEM_F) < corpus.f > $@ + +corpus.stemmed.e: corpus.e + $(STEM_E) < corpus.e > $@ + voc2class.e: corpus.e $(MKCLS) $(MKCLS) -c$(NCLASSES) -n10 -pcorpus.e -Vvoc2class.e opt |