diff options
| -rwxr-xr-x | make_dataset.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/make_dataset.py b/make_dataset.py index b7695b3..b2f2cee 100755 --- a/make_dataset.py +++ b/make_dataset.py @@ -6,7 +6,7 @@ import os from datasets import Dataset, DatasetDict from glob import glob -from PIL.Image import Image +from PIL import Image def make_dataset(base="./baseline"): @@ -14,13 +14,13 @@ def make_dataset(base="./baseline"): user_prompts = [] images = [] - file_ids = [] + ids = [] assistant_replies = [] for filename in glob(f"{base}/*.jsonl"): with open(filename, "r") as f: data = json.loads(f.read()) - id_ = os.path.basename(filename) + id_ = os.path.basename(filename).removesuffix(".jsonl") image_path = f"../d/Images/{id_.removesuffix(".jsonl")}.jpg" user_prompts.append(prompt) @@ -29,7 +29,7 @@ def make_dataset(base="./baseline"): "German": data["Translation"], }, ensure_ascii=False, indent=0)) ids.append(id_) - images.append(Image(image_path).convert("RGB")) + images.append(Image.open(image_path).convert("RGB")) return Dataset.from_dict({"id": ids, "image": images, "user": user_prompts, "assistant": assistant_replies}) |
