diff options
| author | pks <pks@pks.rocks> | 2025-12-05 22:28:02 +0100 |
|---|---|---|
| committer | pks <pks@pks.rocks> | 2025-12-05 22:28:02 +0100 |
| commit | 406c46ce1cfaf56b3b7334152dedd3101d50207e (patch) | |
| tree | 118603c0aa40509f695f6b14c90909ff8787e154 | |
| parent | 64f714542dbe8ee015afa94b3418d8f51c558070 (diff) | |
WIP
| -rw-r--r-- | format-data.py | 46 | ||||
| -rw-r--r-- | inference.py (renamed from inference2.py) | 0 | ||||
| -rw-r--r-- | test.py | 75 |
3 files changed, 0 insertions, 121 deletions
diff --git a/format-data.py b/format-data.py deleted file mode 100644 index 60c13d7..0000000 --- a/format-data.py +++ /dev/null @@ -1,46 +0,0 @@ -import json - -from glob import glob - -from PIL import Image - - -def format_example_for_gemma3_preferences(data, target_score, translation_score): - prompt = """<bos><start_of_turn>user -You are a professional English-German translator and also a renowned photography critic. - -<start_of_image> -Write a detailed caption for this image in a single sentence. Translate the caption into German. The output needs to be JSON, the keys being 'English' and 'German' for the respective captions. Only output the JSON, nothing else.<end_of_turn>""" - translation = f"""<start_of_turn>model -{data['Translation']}<end_of_turn>""" - target = f"""<start_of_turn>model -{data['German']}<end_of_turn>""" - - if target_score == translation_score: - return None, None, None - elif target_score > translation_score: - return prompt, target, translation - else: - return prompt, translation, target - - -def main(): - with open("baseline/target.gemba-gpt4.1.scores", "r") as f: - scores_target = [int(line.strip()) for line in f.readlines()] - with open("baseline/translation.gemba-gpt4.1.scores", "r") as f: - scores_translation = [int(line.strip()) for line in f.readlines()] - - for index, filename in enumerate(sorted(glob("baseline/*.jsonl"))): - with open(filename, "r") as f: - data = json.loads(f.read()) - prompt, c, r = format_example_for_gemma3_preferences(data, scores_target[index], scores_translation[index]) - print(f"{c=} {scores_target[index] > scores_translation[index]}") - - - from transformers import AutoTokenizer - model_id = "google/gemma-3-4b-it" - tokenizer = AutoTokenizer.from_pretrained(model_id) - - -if __name__ == "__main__": - main() diff --git a/inference2.py b/inference.py index 67e633a..67e633a 100644 --- a/inference2.py +++ b/inference.py diff --git a/test.py b/test.py deleted file mode 100644 index 35ceb5f..0000000 --- a/test.py +++ /dev/null @@ -1,75 +0,0 @@ -import torch -import requests -from PIL import Image -from transformers import AutoProcessor, AutoModelForCausalLM - -from glob import glob -import json -import os -from datasets import Dataset - - -def make_chat_data(base="./baseline"): - dataset = [] - for filename in sorted(glob(f"{base}/*.jsonl"))[0:1]: - with open(filename, "r") as f: - data = json.loads(f.read()) - image_path = f"../Images/{os.path.basename(filename).removesuffix(".jsonl")}.jpg" - image = Image.open(image_path).convert("RGB") - chat = [{ - "role": "user", - "content": [ - {"type": "text", "text": "You are a professional English-German translator and also a renowned photography critic.\n\nWrite a detailed caption for this image in a single sentence. Translate the caption into German. The output needs to be JSON, the keys being 'English' and 'German' for the respective captions. Only output the JSON, nothing else."}, - {"type": "image"} - ] - }, - #{ "role": "assistant", - # "content": [{"type": "text", "text": '{"English": ' + json.dumps(data["English"]) + ', "German": ' + json.dumps(data["Translation"]) + '}'}] - #} - ] - item = {"image": image, "chat": chat} - dataset.append(item) - - return Dataset.from_list(dataset) - - -model_id = "google/gemma-3-4b-it" -processor = AutoProcessor.from_pretrained(model_id, use_fast=True) -model = AutoModelForCausalLM.from_pretrained( - model_id, - dtype=torch.bfloat16, - device_map="auto", - attn_implementation="eager", -) -device = model.device -dataset = make_chat_data() -chat_prompt = processor.tokenizer.apply_chat_template( - [item["chat"] for item in dataset], - tokenize=False, - add_generation_prompt=True, -) - -print(dataset[0]) - -inputs = processor( - text=chat_prompt, - images=[item["image"] for item in dataset], - return_tensors="pt" -).to(device) - -print("Keys in the output:", inputs.keys()) - -input_ids = inputs["input_ids"] -print("\nShape of input_ids:", input_ids.shape) -print("input_ids:", input_ids) -decoded_text = processor.decode(input_ids[0], skip_special_tokens=False) -print("\nDecoded input_ids (showing special tokens):") -print(decoded_text) -pixel_values = inputs["pixel_values"] -print("\n--- Generating a Response ---") -output = model.generate( - **inputs, - max_new_tokens=100 -) -generated_text = processor.decode(output[0], skip_special_tokens=True) -print("\nModel's response:\n", generated_text) |
