From 4d668e16b69db3ef1dea2138a080d69214a9f1c1 Mon Sep 17 00:00:00 2001 From: pks Date: Sun, 7 Dec 2025 22:07:33 +0100 Subject: WIP --- inference.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/inference.py b/inference.py index 9f17c5f..7b49ed9 100755 --- a/inference.py +++ b/inference.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +import codecs import datasets import json import os @@ -13,7 +14,7 @@ from transformers import AutoProcessor, AutoModelForImageTextToText def clean_str(s): - return s.removeprefix("```json").removesuffix("```").replace("\n", "").strip() + return codecs.decode(s.removeprefix("```json").removesuffix("```").replace("\n", "").strip(), "unicode_escape") def captioning_prompt(image): @@ -34,7 +35,7 @@ def captioning_prompt(image): def captioning_prompt_with_source(image, source): prompt = captioning_prompt(image) - prefix = json.dumps({"English": source}).removesuffix("}") + ', "German": "' + prefix = json.dumps({"English": source, ensure_ascii=False)}).removesuffix("}") + ', "German": "' prompt.append({"role": "assistant", "content": [{"type": "text", "text": prefix}]}) return prompt @@ -164,7 +165,7 @@ def main(): args, example_id=x["id"], ) - print(f"{x['id']}\t{json.dumps(output)}") + print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}") elif args.mode == "translate": # Generate German translation given English source for x in dataset: @@ -177,7 +178,7 @@ def main(): example_id=x["id"], ) output = {"English": input_data["English"], "German": output["Translation"]} - print(f"{x['id']}\t{json.dumps(output)}") + print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}") elif args.mode == "with_prefix": # Generate German translation given English caption and image for x in dataset: @@ -189,7 +190,7 @@ def main(): args, example_id=x["id"], ) - print(f"{x['id']}\t{json.dumps(output)}") + print(f"{x['id']}\t{json.dumps(output, ensure_ascii=False)}") else: sys.stderr.write(f"Unkown mode '{args.mode}'") -- cgit v1.2.3