summaryrefslogtreecommitdiff
path: root/gi/posterior-regularisation/prjava
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-18 20:40:27 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-18 20:40:27 +0000
commitbd1019e31893ecd4799f4cb1c3000582d291c7a5 (patch)
tree4659f171ab3f2d66578c906640f4f84f1a6bbbb2 /gi/posterior-regularisation/prjava
parentfe77bf221cc95c410e20d81786a63f6dfcd715eb (diff)
Changed to UTF8
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@311 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/posterior-regularisation/prjava')
-rw-r--r--gi/posterior-regularisation/prjava/src/io/FileUtil.java18
-rw-r--r--gi/posterior-regularisation/prjava/src/phrase/Trainer.java8
2 files changed, 14 insertions, 12 deletions
diff --git a/gi/posterior-regularisation/prjava/src/io/FileUtil.java b/gi/posterior-regularisation/prjava/src/io/FileUtil.java
index 81e7747b..6720d087 100644
--- a/gi/posterior-regularisation/prjava/src/io/FileUtil.java
+++ b/gi/posterior-regularisation/prjava/src/io/FileUtil.java
@@ -8,24 +8,25 @@ public class FileUtil
public static BufferedReader reader(File file) throws FileNotFoundException, IOException
{
if (file.getName().endsWith(".gz"))
- return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
+ return new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF8"));
else
- return new BufferedReader(new FileReader(file));
+ return new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8"));
}
public static PrintStream printstream(File file) throws FileNotFoundException, IOException
{
if (file.getName().endsWith(".gz"))
- return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)));
+ return new PrintStream(new GZIPOutputStream(new FileOutputStream(file)), true, "UTF8");
else
- return new PrintStream(new FileOutputStream(file));
+ return new PrintStream(new FileOutputStream(file), true, "UTF8");
}
- public static Scanner openInFile(String filename){
+ public static Scanner openInFile(String filename)
+ {
Scanner localsc=null;
try
{
- localsc=new Scanner (new FileInputStream(filename));
+ localsc=new Scanner(new FileInputStream(filename), "UTF8");
}catch(IOException ioe){
System.out.println(ioe.getMessage());
@@ -33,10 +34,11 @@ public class FileUtil
return localsc;
}
- public static FileInputStream openInputStream(String infilename){
+ public static FileInputStream openInputStream(String infilename)
+ {
FileInputStream fis=null;
try {
- fis =(new FileInputStream(infilename));
+ fis = new FileInputStream(infilename);
} catch (IOException ioe) {
System.out.println(ioe.getMessage());
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
index d1322c26..7f0b1970 100644
--- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
+++ b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java
@@ -34,10 +34,10 @@ public class Trainer
parser.accepts("agree");
parser.accepts("no-parameter-cache");
parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5);
- parser.accepts("rare-word").withRequiredArg().ofType(Integer.class).defaultsTo(0);
- parser.accepts("rare-edge").withRequiredArg().ofType(Integer.class).defaultsTo(0);
- parser.accepts("rare-phrase").withRequiredArg().ofType(Integer.class).defaultsTo(0);
- parser.accepts("rare-context").withRequiredArg().ofType(Integer.class).defaultsTo(0);
+ parser.accepts("rare-word").withRequiredArg().ofType(Integer.class).defaultsTo(10);
+ parser.accepts("rare-edge").withRequiredArg().ofType(Integer.class).defaultsTo(1);
+ parser.accepts("rare-phrase").withRequiredArg().ofType(Integer.class).defaultsTo(2);
+ parser.accepts("rare-context").withRequiredArg().ofType(Integer.class).defaultsTo(2);
OptionSet options = parser.parse(args);
if (options.has("help") || !options.has("in"))