From 3645232a2e350b37a20deb67f88c654f15efb635 Mon Sep 17 00:00:00 2001 From: pks Date: Mon, 1 Dec 2025 11:24:14 +0100 Subject: WIP --- inference.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) mode change 100644 => 100755 inference.py (limited to 'inference.py') diff --git a/inference.py b/inference.py old mode 100644 new mode 100755 index d20dc55..297f423 --- a/inference.py +++ b/inference.py @@ -74,6 +74,7 @@ def main(): args.model_id, device_map="cuda", dtype=torch.bfloat16, + attn_implementation="eager", ).eval() processor = AutoProcessor.from_pretrained(args.model_id, use_fast=True) @@ -128,8 +129,11 @@ def main(): generation = model.generate(**inputs, max_new_tokens=300, do_sample=True, + temperature=0.8, top_p=1.0, - top_k=50) + top_k=50, + eos_token_id=stop_token_ids, + disable_compile=True) generation = generation[0][input_len:] decoded = processor.decode(generation, skip_special_tokens=True).removeprefix("```json").removesuffix("```").replace("\n", "").strip() -- cgit v1.2.3