diff options
| author | pks <pks@pks.rocks> | 2025-12-07 22:07:33 +0100 |
|---|---|---|
| committer | pks <pks@pks.rocks> | 2025-12-07 22:07:33 +0100 |
| commit | 4d668e16b69db3ef1dea2138a080d69214a9f1c1 (patch) | |
| tree | b1083ad1092c08d62c84fa0a8e89c2f7931b307e | |
| parent | 099531c77b998bcfd14d641ebdb37fbc25fa9c5f (diff) | |
| -rwxr-xr-x | inference.py | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/inference.py b/inference.py index 9f17c5f..7b49ed9 100755 --- a/inference.py +++ b/inference.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +import codecs import datasets import json import os @@ -13,7 +14,7 @@ from transformers import AutoProcessor, AutoModelForImageTextToText def clean_str(s): - return s.removeprefix("```json").removesuffix("```").replace("\n", "").strip() + return codecs.decode(s.removeprefix("```json").removesuffix("```").replace("\n", "").strip(), "unicode_escape") def captioning_prompt(image): @@ -34,7 +35,7 @@ def captioning_prompt(image): def captioning_prompt_with_source(image, source): prompt = captioning_prompt(image) - prefix = json.dumps({"English": source}).removesuffix("}") + ', "German": "' + prefix = json.dumps({"English": source, ensure_ascii=False)}).removesuffix("}") + ', "German": "' prompt.append({"role": "assistant", "content": [{"type": "text", "text": prefix}]}) return prompt @@ -164,7 +165,7 @@ def main(): args, example_id=x["id"], ) - print(f"{x['id']}\t{json.dumps(output)}") + print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}") elif args.mode == "translate": # Generate German translation given English source for x in dataset: @@ -177,7 +178,7 @@ def main(): example_id=x["id"], ) output = {"English": input_data["English"], "German": output["Translation"]} - print(f"{x['id']}\t{json.dumps(output)}") + print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}") elif args.mode == "with_prefix": # Generate German translation given English caption and image for x in dataset: @@ -189,7 +190,7 @@ def main(): args, example_id=x["id"], ) - print(f"{x['id']}\t{json.dumps(output)}") + print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}") else: sys.stderr.write(f"Unkown mode '{args.mode}'") |
