summaryrefslogtreecommitdiff
path: root/inference.py
diff options
context:
space:
mode:
Diffstat (limited to 'inference.py')
-rwxr-xr-xinference.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/inference.py b/inference.py
index 9f17c5f..7b49ed9 100755
--- a/inference.py
+++ b/inference.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
+import codecs
import datasets
import json
import os
@@ -13,7 +14,7 @@ from transformers import AutoProcessor, AutoModelForImageTextToText
def clean_str(s):
- return s.removeprefix("```json").removesuffix("```").replace("\n", "").strip()
+ return codecs.decode(s.removeprefix("```json").removesuffix("```").replace("\n", "").strip(), "unicode_escape")
def captioning_prompt(image):
@@ -34,7 +35,7 @@ def captioning_prompt(image):
def captioning_prompt_with_source(image, source):
prompt = captioning_prompt(image)
- prefix = json.dumps({"English": source}).removesuffix("}") + ', "German": "'
+ prefix = json.dumps({"English": source, ensure_ascii=False)}).removesuffix("}") + ', "German": "'
prompt.append({"role": "assistant", "content": [{"type": "text", "text": prefix}]})
return prompt
@@ -164,7 +165,7 @@ def main():
args,
example_id=x["id"],
)
- print(f"{x['id']}\t{json.dumps(output)}")
+ print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}")
elif args.mode == "translate": # Generate German translation given English source
for x in dataset:
@@ -177,7 +178,7 @@ def main():
example_id=x["id"],
)
output = {"English": input_data["English"], "German": output["Translation"]}
- print(f"{x['id']}\t{json.dumps(output)}")
+ print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}")
elif args.mode == "with_prefix": # Generate German translation given English caption and image
for x in dataset:
@@ -189,7 +190,7 @@ def main():
args,
example_id=x["id"],
)
- print(f"{x['id']}\t{json.dumps(output)}")
+ print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}")
else:
sys.stderr.write(f"Unkown mode '{args.mode}'")