summaryrefslogtreecommitdiff
path: root/inference.py
diff options
context:
space:
mode:
authorpks <pks@pks.rocks>2025-12-05 22:23:16 +0100
committerpks <pks@pks.rocks>2025-12-05 22:23:16 +0100
commitf243793b76a8ace9b8a690cb02afb0e91a5b0531 (patch)
tree7ec30fdd8642a3edfe41ed09ced8271200150909 /inference.py
parentc0ed7b3ada7f41faaad9a2a64697d6a0e385ed86 (diff)
WIP
Diffstat (limited to 'inference.py')
-rwxr-xr-xinference.py110
1 files changed, 53 insertions, 57 deletions
diff --git a/inference.py b/inference.py
index 69ab33b..c8adb16 100755
--- a/inference.py
+++ b/inference.py
@@ -10,7 +10,7 @@ import torch
from glob import glob
from PIL import Image
-from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+from transformers import AutoProcessor, AutoModelForImageTextToText
def clean_str(s):
@@ -71,18 +71,19 @@ def make_inputs(processor,
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("--model", default="google/gemma-3-4b-it")
- parser.add_argument("--lora-adapter", default=None)
- parser.add_argument("--mode", choices=["from_scratch", "with_prefix", "translate"])
+ parser.add_argument("--model", default="google/gemma-3-4b-it", type=str)
+ parser.add_argument("--attention-implementation", default="eager", type=str)
+ parser.add_argument("--lora-adapter", default=None, type=str)
+ parser.add_argument("--mode", choices=["from_scratch", "with_prefix", "translate"], type=str, required=True)
parser.add_argument("--dataset", default="asdf2k/caption_translation", type=str)
- parser.add_argument("--data-subset", choices=["train", "dev", "test"], default="test")
+ parser.add_argument("--data-subset", choices=["train", "dev", "test"], default="test", type=str)
args = parser.parse_args()
- model = Gemma3ForConditionalGeneration.from_pretrained(
+ model = AutoModelForImageTextToText.from_pretrained(
args.model,
device_map="cuda",
dtype=torch.bfloat16,
- attn_implementation="eager",
+ attn_implementation=args.attention_implementation,
).eval()
processor = AutoProcessor.from_pretrained(args.model, use_fast=True)
@@ -94,7 +95,7 @@ def main():
if args.mode == "translate": # Generate German translation given English source
for x in dataset:
- sys.stderr.write(f"Processing id={x['id']=}\n")
+ sys.stderr.write(f"Processing id={x['id']}\n")
data = json.loads(x["assistant"])
@@ -104,21 +105,23 @@ def main():
input_len = inputs["input_ids"].shape[-1]
with torch.inference_mode():
- generation = model.generate(**inputs,
- max_new_tokens=300,
- do_sample=True,
- top_p=1.0,
- top_k=50)
- generation = generation[0][input_len:]
-
- decoded = clean_str(processor.decode(generation, skip_special_tokens=True))
+ output = model.generate(**inputs,
+ max_new_tokens=args.max_new_tokens,
+ do_sample=args.do_sample,
+ temperature=args.temperature,
+ top_p=args.top_p,
+ top_k=args.top_k,
+ disable_compile=True)
+ output = generation[0][input_len:]
+
+ output = clean_str(processor.decode(output, skip_special_tokens=True))
+
try:
- new_data = json.loads(decoded)
+ output = json.loads(output)
except:
- sys.stderr.write(f"Error loading JSON from string '{decoded}' for {filename=}\n")
+ sys.stderr.write(f"Error loading JSON from string '{output}' for id{x['id']}\n")
- data.update(new_data)
- print(json.dumps(data))
+ print(json.dumps(output))
elif args.mode == "from_scratch": # Generate caption & translation from scratch
for x in dataset:
@@ -129,57 +132,50 @@ def main():
input_len = inputs["input_ids"].shape[-1]
with torch.inference_mode():
- generation = model.generate(**inputs,
- max_new_tokens=300,
- do_sample=True,
- temperature=0.8,
- top_p=1.0,
- top_k=50,
- eos_token_id=stop_token_ids,
- disable_compile=True)
- generation = generation[0][input_len:]
-
- decoded = clean_str(processor.decode(generation, skip_special_tokens=True))
+ output = model.generate(**inputs,
+ max_new_tokens=args.max_new_tokens,
+ do_sample=args.do_sample,
+ temperature=args.temperature,
+ top_p=args.top_p,
+ top_k=args.top_k,
+ eos_token_id=stop_token_ids,
+ disable_compile=True)
+ output = output[0][input_len:]
+
+ output = clean_str(processor.decode(output, skip_special_tokens=True))
try:
- _ = json.loads(decoded)
+ output = json.loads(output)
except:
- sys.stderr.write(f"Error loading JSON from string '{decoded}' for {filename=}\n")
+ sys.stderr.write(f"Error loading JSON from string '{output}' for id{x['id']}\n")
- sys.stderr.write(f"{decoded=}\n")
- with open(f"{os.path.basename(filename).removesuffix('.jpg')}.jsonl", "w") as f:
- f.write(f"{decoded}\n")
+ print(json.dumps(output))
elif args.mode == "with_prefix": # Generate German translation given English caption and image
- for filename in glob("./baseline/files_test/*.jsonl"):
- image = "../d/Images/" + os.path.basename(filename).removesuffix(".jsonl") + ".jpg"
- sys.stderr.write(f"Processing {filename=}\n")
- with open(filename, "r+") as f:
- data = json.loads(f.read())
+ for x in dataset:
+ sys.stderr.write(f"Processing id={x['id']}\n")
+ data = json.loads(x['assistant_reply'])
prompt = captioning_prompt_with_source(Image.open(image), data["English"])
inputs = make_inputs(processor,
prompt,
model.device)
-
input_len = inputs["input_ids"].shape[-1] # Will not cut off assistant prefix
with torch.inference_mode():
- generation = model.generate(**inputs,
- max_new_tokens=300,
- do_sample=True,
- top_p=1.0,
- top_k=50)
- generation = generation[0] # batch size 1
- truncated_generation = generation[input_len:]
-
- decoded = processor.decode(truncated_generation, skip_special_tokens=True).removeprefix("```json").removesuffix("```").replace("\n", "").strip()
+ output = model.generate(**inputs,
+ max_new_tokens=args.max_new_tokens,
+ do_sample=args.do_sample,
+ args.temperature,
+ top_p=args.top_p,
+ top_k=args.top_k)
+ output = generation[0][input_len:]
+
+ output = clean_str(processor.decode(output, skip_special_tokens=True))
try:
- _ = json.loads(decoded)
+ output = json.loads(output)
except:
- sys.stderr.write(f"Error loading JSON from string '{decoded}' for {filename=}\n")
-
- sys.stderr.write(f"{decoded=}\n")
- with open(f"{os.path.basename(filename)}", "w") as f:
- f.write(f"{decoded}\n")
+ sys.stderr.write(f"Error loading JSON from string '{output}' for id{x['id']}\n")
+
+ print(json.dumps(output))
else:
sys.stderr.write(f"Unkown mode '{args.mode}'")