From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cab.ark.cs.cmu.edu>
Date: Tue, 2 Oct 2012 00:19:43 -0400
Subject: cdec cleanup, remove bayesian stuff, parsing stuff

---
 gi/morf-segmentation/morfsegment.py | 50 -------------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100755 gi/morf-segmentation/morfsegment.py

(limited to 'gi/morf-segmentation/morfsegment.py')

diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py
deleted file mode 100755
index 85b9d4fb..00000000
--- a/gi/morf-segmentation/morfsegment.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python
-
-import sys
-import gzip
-
-#usage: morfsegment.py inputvocab.gz segmentation.ready
-#  stdin: the data to segment
-#  stdout: the segmented data
-
-if len(sys.argv) < 3:
-  print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]"
-  print "  stdin: the data to segment"
-  print "  stdout: the segmented data"
-  sys.exit()
-
-#read index:
-split_index={}
-
-marker="##"
-
-if len(sys.argv) > 3:
-  marker=sys.argv[3]
-
-word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz
-seg_vocab=open(sys.argv[2], 'r') #segm.ready..
-
-for seg in seg_vocab:
-  #seg = ver# #wonder\n
-  #wordline = 1 verwonder\n
-  word = word_vocab.readline().strip().split(' ')
-  assert(len(word) == 2)
-  word = word[1]
-  seg=seg.strip()
-
-  if seg != word:
-    split_index[word] = seg
-
-word_vocab.close()
-seg_vocab.close()
-
-for line in sys.stdin:
-  words = line.strip().split()
-
-  newsent = []
-  for word in words:
-    splitword = split_index.get(word, word)
-    newsent.append(splitword)
-
-  print ' '.join(newsent)
-
-- 
cgit v1.2.3