diff options
| author | pks <pks@pks.rocks> | 2025-12-02 13:20:43 +0100 |
|---|---|---|
| committer | pks <pks@pks.rocks> | 2025-12-02 13:20:43 +0100 |
| commit | c0ed7b3ada7f41faaad9a2a64697d6a0e385ed86 (patch) | |
| tree | 5a5af88d08d1392f6d22fda6614efb15826447ab /make_dataset.py | |
| parent | d19bd3fbf54f08db5e563b17c64991ef9b9706a6 (diff) | |
WIP
Diffstat (limited to 'make_dataset.py')
| -rwxr-xr-x | make_dataset.py | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/make_dataset.py b/make_dataset.py index b7695b3..b2f2cee 100755 --- a/make_dataset.py +++ b/make_dataset.py @@ -6,7 +6,7 @@ import os from datasets import Dataset, DatasetDict from glob import glob -from PIL.Image import Image +from PIL import Image def make_dataset(base="./baseline"): @@ -14,13 +14,13 @@ def make_dataset(base="./baseline"): user_prompts = [] images = [] - file_ids = [] + ids = [] assistant_replies = [] for filename in glob(f"{base}/*.jsonl"): with open(filename, "r") as f: data = json.loads(f.read()) - id_ = os.path.basename(filename) + id_ = os.path.basename(filename).removesuffix(".jsonl") image_path = f"../d/Images/{id_.removesuffix(".jsonl")}.jpg" user_prompts.append(prompt) @@ -29,7 +29,7 @@ def make_dataset(base="./baseline"): "German": data["Translation"], }, ensure_ascii=False, indent=0)) ids.append(id_) - images.append(Image(image_path).convert("RGB")) + images.append(Image.open(image_path).convert("RGB")) return Dataset.from_dict({"id": ids, "image": images, "user": user_prompts, "assistant": assistant_replies}) |
