summaryrefslogtreecommitdiff
path: root/gi/morf-segmentation/morfsegment.py
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
commit6f29f345dc06c1a1033475eac1d1340781d1d603 (patch)
tree6fa4cdd7aefd7d54c9585c2c6274db61bb8b159a /gi/morf-segmentation/morfsegment.py
parentb510da2e562c695c90d565eb295c749569c59be8 (diff)
parentc615c37501fa8576584a510a9d2bfe2fdd5bace7 (diff)
merge upstream/master
Diffstat (limited to 'gi/morf-segmentation/morfsegment.py')
-rwxr-xr-xgi/morf-segmentation/morfsegment.py50
1 files changed, 0 insertions, 50 deletions
diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py
deleted file mode 100755
index 85b9d4fb..00000000
--- a/gi/morf-segmentation/morfsegment.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python
-
-import sys
-import gzip
-
-#usage: morfsegment.py inputvocab.gz segmentation.ready
-# stdin: the data to segment
-# stdout: the segmented data
-
-if len(sys.argv) < 3:
- print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]"
- print " stdin: the data to segment"
- print " stdout: the segmented data"
- sys.exit()
-
-#read index:
-split_index={}
-
-marker="##"
-
-if len(sys.argv) > 3:
- marker=sys.argv[3]
-
-word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz
-seg_vocab=open(sys.argv[2], 'r') #segm.ready..
-
-for seg in seg_vocab:
- #seg = ver# #wonder\n
- #wordline = 1 verwonder\n
- word = word_vocab.readline().strip().split(' ')
- assert(len(word) == 2)
- word = word[1]
- seg=seg.strip()
-
- if seg != word:
- split_index[word] = seg
-
-word_vocab.close()
-seg_vocab.close()
-
-for line in sys.stdin:
- words = line.strip().split()
-
- newsent = []
- for word in words:
- splitword = split_index.get(word, word)
- newsent.append(splitword)
-
- print ' '.join(newsent)
-