summaryrefslogtreecommitdiff
path: root/gi/morf-segmentation/morfsegment.py
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
commit9339c80d465545aec5a6dccfef7c83ca715bf11f (patch)
tree64c56d558331edad1db3832018c80e799551c39a /gi/morf-segmentation/morfsegment.py
parent438dac41810b7c69fa10203ac5130d20efa2da9f (diff)
parentafd7da3b2338661657ad0c4e9eec681e014d37bf (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'gi/morf-segmentation/morfsegment.py')
-rwxr-xr-xgi/morf-segmentation/morfsegment.py50
1 files changed, 0 insertions, 50 deletions
diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py
deleted file mode 100755
index 85b9d4fb..00000000
--- a/gi/morf-segmentation/morfsegment.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python
-
-import sys
-import gzip
-
-#usage: morfsegment.py inputvocab.gz segmentation.ready
-# stdin: the data to segment
-# stdout: the segmented data
-
-if len(sys.argv) < 3:
- print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]"
- print " stdin: the data to segment"
- print " stdout: the segmented data"
- sys.exit()
-
-#read index:
-split_index={}
-
-marker="##"
-
-if len(sys.argv) > 3:
- marker=sys.argv[3]
-
-word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz
-seg_vocab=open(sys.argv[2], 'r') #segm.ready..
-
-for seg in seg_vocab:
- #seg = ver# #wonder\n
- #wordline = 1 verwonder\n
- word = word_vocab.readline().strip().split(' ')
- assert(len(word) == 2)
- word = word[1]
- seg=seg.strip()
-
- if seg != word:
- split_index[word] = seg
-
-word_vocab.close()
-seg_vocab.close()
-
-for line in sys.stdin:
- words = line.strip().split()
-
- newsent = []
- for word in words:
- splitword = split_index.get(word, word)
- newsent.append(splitword)
-
- print ' '.join(newsent)
-