summaryrefslogtreecommitdiff
path: root/gi/morf-segmentation/morfsegment.py
diff options
context:
space:
mode:
authorAvneesh Saluja <asaluja@gmail.com>2013-03-28 18:28:16 -0700
committerAvneesh Saluja <asaluja@gmail.com>2013-03-28 18:28:16 -0700
commit3d8d656fa7911524e0e6885647173474524e0784 (patch)
tree81b1ee2fcb67980376d03f0aa48e42e53abff222 /gi/morf-segmentation/morfsegment.py
parentbe7f57fdd484e063775d7abf083b9fa4c403b610 (diff)
parent96fedabebafe7a38a6d5928be8fff767e411d705 (diff)
fixed conflicts
Diffstat (limited to 'gi/morf-segmentation/morfsegment.py')
-rwxr-xr-xgi/morf-segmentation/morfsegment.py50
1 files changed, 0 insertions, 50 deletions
diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py
deleted file mode 100755
index 85b9d4fb..00000000
--- a/gi/morf-segmentation/morfsegment.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python
-
-import sys
-import gzip
-
-#usage: morfsegment.py inputvocab.gz segmentation.ready
-# stdin: the data to segment
-# stdout: the segmented data
-
-if len(sys.argv) < 3:
- print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]"
- print " stdin: the data to segment"
- print " stdout: the segmented data"
- sys.exit()
-
-#read index:
-split_index={}
-
-marker="##"
-
-if len(sys.argv) > 3:
- marker=sys.argv[3]
-
-word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz
-seg_vocab=open(sys.argv[2], 'r') #segm.ready..
-
-for seg in seg_vocab:
- #seg = ver# #wonder\n
- #wordline = 1 verwonder\n
- word = word_vocab.readline().strip().split(' ')
- assert(len(word) == 2)
- word = word[1]
- seg=seg.strip()
-
- if seg != word:
- split_index[word] = seg
-
-word_vocab.close()
-seg_vocab.close()
-
-for line in sys.stdin:
- words = line.strip().split()
-
- newsent = []
- for word in words:
- splitword = split_index.get(word, word)
- newsent.append(splitword)
-
- print ' '.join(newsent)
-