Source code for factly.llms.openai_model

from __future__ import annotations

from typing import Union

from deepeval.models.llms import GPTModel
from deepeval.models.llms.openai_model import (
    json_mode_models,
    log_retry_error,
    retryable_exceptions,
    structured_outputs_models,
)
from deepeval.models.llms.utils import trim_and_load_json
from openai import AsyncOpenAI, OpenAI
from pydantic import BaseModel
from tenacity import (
    retry,
    retry_if_exception_type,
    wait_exponential_jitter,
)

from factly.llms.base_model import FactlyModelMixin


[docs] class FactlyGptModel(FactlyModelMixin, GPTModel): """Factly GPT model."""
[docs] def __init__( self, model: str, system_prompt: str, prompt_name: str, temperature: float = 0.0, top_p: float = 1.0, max_tokens: int = 1, *args, **kwargs, ): """Initialize the Factly GPT model. Args: model: Model identifier, can be "<provider>/<model>" for LiteLLM or "<model>" for direct provider models system_prompt: System prompt to use for generating responses prompt_name: Display name for this model configuration in reports temperature: Sampling temperature between 0.0 and 2.0 top_p: Nucleus sampling parameter between 0.0 and 1.0 max_tokens: Maximum number of tokens to generate """ actual_model_name = self.get_actual_model_name(model) super().__init__(actual_model_name, *args, **kwargs) self.model_name = model # Redefine the model name self.actual_model_name = actual_model_name self.system_prompt = system_prompt self.prompt_name = prompt_name self.temperature = temperature self.top_p = top_p self.max_tokens = max_tokens
[docs] @retry( wait=wait_exponential_jitter(initial=1, exp_base=2, jitter=2, max=10), retry=retry_if_exception_type(retryable_exceptions), after=log_retry_error, ) async def ainvoke( self, prompt: str, schema: BaseModel | None = None ) -> Union[str, dict, BaseModel]: """Generate a response from the model asynchronously.""" messages = self.create_messages(prompt) client = self.load_model(async_mode=True) if schema: if self.actual_model_name in structured_outputs_models: completion = await client.beta.chat.completions.parse( # type: ignore model=self.model_name, messages=messages, response_format=schema, temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens, ) return completion.choices[0].message.parsed if self.actual_model_name in json_mode_models: completion = await client.beta.chat.completions.parse( # type: ignore model=self.model_name, messages=messages, response_format={"type": "json_object"}, temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens, ) json_output = trim_and_load_json(completion.choices[0].message.content) return schema.model_validate(json_output) completion = await client.chat.completions.create( # type: ignore model=self.model_name, messages=messages, temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens, ) output = completion.choices[0].message.content or "" if schema: json_output = trim_and_load_json(output) return schema.model_validate(json_output) return str(completion.choices[0].message.content)
[docs] def load_model(self, async_mode: bool = False) -> Union[OpenAI, AsyncOpenAI]: """Load the OpenAI client in sync or async mode. Args: async_mode: Whether to load the async client Returns: OpenAI client instance """ if not async_mode: return OpenAI(api_key=self._openai_api_key, base_url=self.base_url) return AsyncOpenAI(api_key=self._openai_api_key, base_url=self.base_url)