diff options
Diffstat (limited to 'make_dataset.py')
| -rwxr-xr-x | make_dataset.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/make_dataset.py b/make_dataset.py index b2f2cee..157f97a 100755 --- a/make_dataset.py +++ b/make_dataset.py @@ -9,7 +9,7 @@ from glob import glob from PIL import Image -def make_dataset(base="./baseline"): +def make_dataset(base="./d/baseline/jsonl"): prompt = "You are a professional English-German translator and also a renowned photography critic.\n\nWrite a detailed caption for this image in a single sentence. Translate the caption into German. The output needs to be JSON, the keys being 'English' and 'German' for the respective captions. Only output the JSON, nothing else." + "<start_of_image>" user_prompts = [] @@ -21,7 +21,7 @@ def make_dataset(base="./baseline"): with open(filename, "r") as f: data = json.loads(f.read()) id_ = os.path.basename(filename).removesuffix(".jsonl") - image_path = f"../d/Images/{id_.removesuffix(".jsonl")}.jpg" + image_path = f"d/Images/{id_.removesuffix(".jsonl")}.jpg" user_prompts.append(prompt) assistant_replies.append(json.dumps({ |
