1 files changed, 0 insertions, 46 deletions
diff --git a/format-data.py b/format-data.py
deleted file mode 100644
index 60c13d7..0000000
--- a/format-data.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import json
-
-from glob import glob
-
-from PIL import Image
-
-
-def format_example_for_gemma3_preferences(data, target_score, translation_score):
-    prompt = """<bos><start_of_turn>user
-You are a professional English-German translator and also a renowned photography critic.
-
-<start_of_image>
-Write a detailed caption for this image in a single sentence. Translate the caption into German. The output needs to be JSON, the keys being 'English' and 'German' for the respective captions. Only output the JSON, nothing else.<end_of_turn>"""
-    translation = f"""<start_of_turn>model
-{data['Translation']}<end_of_turn>"""
-    target = f"""<start_of_turn>model
-{data['German']}<end_of_turn>"""
-
-    if target_score == translation_score:
-        return None, None, None
-    elif target_score > translation_score:
-        return prompt, target, translation
-    else:
-        return prompt, translation, target
-
-
-def main():
-    with open("baseline/target.gemba-gpt4.1.scores", "r") as f:
-        scores_target = [int(line.strip()) for line in f.readlines()]
-    with open("baseline/translation.gemba-gpt4.1.scores", "r") as f:
-        scores_translation = [int(line.strip()) for line in f.readlines()]
-
-    for index, filename in enumerate(sorted(glob("baseline/*.jsonl"))):
-        with open(filename, "r") as f:
-            data = json.loads(f.read())
-        prompt, c, r = format_example_for_gemma3_preferences(data, scores_target[index], scores_translation[index])
-        print(f"{c=} {scores_target[index] > scores_translation[index]}")
-
-
-    from transformers import AutoTokenizer
-    model_id = "google/gemma-3-4b-it"
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-        
-
-if __name__ == "__main__":
-    main()