Source code for factly.cli

"""Factly CLI entrypoint."""

import logging
import os
import sys
from pathlib import Path

import click
import openai
from dotenv import load_dotenv

from . import __copyright__, __version__
from .tasks import list_available_tasks, resolve_tasks

logger = logging.getLogger(__name__)

BASE_DIR = Path(__file__).resolve().parent.parent
if BASE_DIR not in sys.path:
    sys.path.append(str(BASE_DIR))

BANNER = r"""

        ██████                      █████    ████
       ███░░███                    ░░███    ░░███
      ░███ ░░░   ██████    ██████  ███████   ░███  █████ ████
     ███████    ░░░░░███  ███░░███░░░███░    ░███ ░░███ ░███
    ░░░███░      ███████ ░███ ░░░   ░███     ░███  ░███ ░███
      ░███      ███░░███ ░███  ███  ░███ ███ ░███  ░███ ░███
      █████    ░░████████░░██████   ░░█████  █████ ░░███████
     ░░░░░      ░░░░░░░░  ░░░░░░     ░░░░░  ░░░░░   ░░░░░███
                                                    ███ ░███
                                                   ░░██████
                                                    ░░░░░░



"""


load_dotenv(BASE_DIR / ".env")

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.base_url = os.getenv("OPENAI_API_BASE")


[docs] class RichGroup(click.Group): """Custom Click group that displays a banner before the help text."""
[docs] def format_help(self, ctx, formatter): """Writes the help into the formatter if it exists. This method is called by Click when the help text is requested. """ click.secho(BANNER, nl=False) super().format_help(ctx, formatter)
@click.group( cls=RichGroup, help="CLI tool to evaluate ChatGPT factuality on MMLU benchmark.", ) @click.version_option( version=__version__, prog_name="factly", message=f"""%(prog)s %(version)s {__copyright__} This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.""", ) def cli(): """Entrypoint for factly CLI.""" pass @cli.command() @click.option( "--instructions", type=click.Path(exists=True, dir_okay=False, path_type=Path), default=Path.cwd() / "instructions.yaml", help=( "Path to YAML file with system instruction variants. " "Default is `instructions.yaml` in the current working directory." ), ) @click.option( "--model", type=str, default=os.getenv("OPENAI_MODEL"), help="OpenAI model to use for evaluation.", ) @click.option( "--n-shots", type=int, default=0, help="Number of shots for few-shot learning (default: 0).", ) @click.option( "--tasks", type=str, default=None, multiple=True, help=( "List of tasks or categories to evaluate. " "Use 'factly list-tasks' to see available options." ), ) @click.option( "--verbose", is_flag=True, help="Show detailed progress information during evaluation.", ) @click.option( "-j", "--workers", type=int, default=None, help=( "Maximum number of concurrent question evaluations. " "(default: auto-determined by system resources)." ), ) @click.option( "--plot", is_flag=True, help="Generate a plot of the results after evaluation.", ) @click.option( "--plot-path", type=click.Path(dir_okay=False, path_type=Path), default=None, help="Path to save the plot (default: ./outputs/factuality-<model>-t<count>.png).", ) def evaluate( instructions: Path, model: str, n_shots: int, verbose: bool, tasks: list[str] | None, workers: int | None = None, plot: bool = False, plot_path: Path | None = None, ): """Evaluate the model on the MMLU benchmark.""" from factly.benchmarks import evaluate as do_evaluate openai.api_key = os.getenv("OPENAI_API_KEY") openai.base_url = os.getenv("OPENAI_API_BASE") try: # Convert None to empty list to satisfy type checking task_names = tasks if tasks is not None else [] # Resolve task names to actual MMLUTask objects mmlu_tasks = resolve_tasks(task_names) logger.info( "Evaluating %d tasks: %s", len(mmlu_tasks), ", ".join([t.name for t in mmlu_tasks]), ) do_evaluate( instructions=instructions, model=model or "gpt-4o", tasks=mmlu_tasks, n_shots=n_shots, workers=workers, verbose=verbose, plot=plot, plot_path=plot_path, ) except ValueError as e: logger.error("Error resolving tasks: %s", e, exc_info=True) logger.info("Use 'factly list-tasks' to see available tasks") sys.exit(1) @cli.command("list-tasks") def list_tasks(): """List all available MMLU tasks for evaluation.""" click.echo(list_available_tasks()) def main(args: list[str] | None = None) -> int: load_dotenv(BASE_DIR / ".env") try: # Invoke the Click command cli.main(args=args, standalone_mode=False) return 0 except click.exceptions.NoSuchOption: # Handle case where no option is provided click.echo("No such option. Use --help for more information.", err=True) return 2 except click.exceptions.Abort: # Handle keyboard interrupts gracefully click.echo("Operation aborted by user") return 130 # Standard exit code for SIGINT except click.exceptions.Exit as e: # Handle normal exit return e.exit_code except Exception as exc: # pylint: disable=broad-exception-caught # Handle unexpected errors logger.error(exc, exc_info=True) return 1