Eureka: LLM-coded reward functions.
Eureka
Bases: MightyRunner
Eureka: LLM-coded reward functions.
Source code in mighty/mighty_runners/mighty_eureka_runner.py
| def __init__(self, cfg: DictConfig) -> None:
super().__init__(cfg)
self.iterations = cfg.iterations
local_llm = cfg.local_llm
if local_llm:
self.model, self.prompting_function = get_llm(cfg.model_name)
self.generation_kwargs = OmegaConf.to_container(cfg.generation_kwargs)
else:
raise NotImplementedError("Currently only local models supported.")
if "prompt_dir" in cfg:
self.prompt_dir = cfg.prompt_dir
else:
self.prompt_dir = Path(__file__).parent.absolute() / "eureka_prompts"
with open(self.prompt_dir / "user_prompt.txt", "r") as f:
self.user_prompt = f.read()
with open(self.prompt_dir / "system_prompt.txt", "r") as f:
self.system_prompt = f.read()
# TODO: adapt these to task
self.prompt = [
{"role": "system", "content": self.system_prompt},
{"role": "user", "content": self.user_prompt},
]
|