Source code for factly.tasks

"""MMLU task registry and management for Factly."""

from __future__ import annotations

import logging
from enum import Enum
from typing import Optional

from deepeval.benchmarks.mmlu.task import MMLUTask

logger = logging.getLogger(__name__)


# Task categories for organization
[docs] class TaskCategory(str, Enum): """Categories for organizing MMLU tasks.""" STEM = "STEM" HUMANITIES = "Humanities" SOCIAL_SCIENCES = "Social Sciences" MEDICAL = "Medical" BUSINESS = "Business & Law" COMPUTER_SCIENCE = "Computer Science" OTHER = "Other"
# Task metadata for better discoverability and documentation TASK_METADATA: dict[MMLUTask, dict] = { # STEM MMLUTask.ELEMENTARY_MATHEMATICS: { "category": TaskCategory.STEM, "description": "Basic arithmetic, fractions, and elementary algebra", }, MMLUTask.HIGH_SCHOOL_MATHEMATICS: { "category": TaskCategory.STEM, "description": ( "High school mathematics problems covering algebra, " "geometry, and basic calculus" ), }, MMLUTask.COLLEGE_MATHEMATICS: { "category": TaskCategory.STEM, "description": ( "College-level mathematics including calculus, " "linear algebra, and numerical methods" ), }, MMLUTask.ABSTRACT_ALGEBRA: { "category": TaskCategory.STEM, "description": ( "Advanced mathematical concepts including groups, rings, and fields" ), }, MMLUTask.HIGH_SCHOOL_PHYSICS: { "category": TaskCategory.STEM, "description": ( "High school physics problems covering mechanics, " "thermodynamics, and basic electricity" ), }, MMLUTask.COLLEGE_PHYSICS: { "category": TaskCategory.STEM, "description": ( "College-level physics including advanced mechanics, " "electromagnetism, and quantum physics" ), }, MMLUTask.CONCEPTUAL_PHYSICS: { "category": TaskCategory.STEM, "description": ( "Conceptual understanding of physics phenomena without heavy mathematics" ), }, MMLUTask.HIGH_SCHOOL_CHEMISTRY: { "category": TaskCategory.STEM, "description": ( "High school chemistry covering atomic structure, " "reactions, and basic organic chemistry" ), }, MMLUTask.COLLEGE_CHEMISTRY: { "category": TaskCategory.STEM, "description": ( "College-level chemistry including thermodynamics, " "kinetics, and spectroscopy" ), }, MMLUTask.HIGH_SCHOOL_BIOLOGY: { "category": TaskCategory.STEM, "description": ( "High school biology covering cells, genetics, ecology, and evolution" ), }, MMLUTask.COLLEGE_BIOLOGY: { "category": TaskCategory.STEM, "description": ( "College-level biology including molecular biology, " "physiology, and biotechnology" ), }, MMLUTask.ASTRONOMY: { "category": TaskCategory.STEM, "description": ( "Astronomy concepts including celestial bodies, " "cosmology, and space exploration" ), }, MMLUTask.HIGH_SCHOOL_STATISTICS: { "category": TaskCategory.STEM, "description": ( "High school statistics covering probability, " "distributions, and hypothesis testing" ), }, MMLUTask.ELECTRICAL_ENGINEERING: { "category": TaskCategory.STEM, "description": "Electrical engineering principles, circuits, and systems", }, # Computer Science MMLUTask.HIGH_SCHOOL_COMPUTER_SCIENCE: { "category": TaskCategory.COMPUTER_SCIENCE, "description": "High school computer science concepts and algorithms", }, MMLUTask.COLLEGE_COMPUTER_SCIENCE: { "category": TaskCategory.COMPUTER_SCIENCE, "description": ( "College-level computer science including data structures, " "algorithms, and theory" ), }, MMLUTask.MACHINE_LEARNING: { "category": TaskCategory.COMPUTER_SCIENCE, "description": "Machine learning concepts, algorithms, and applications", }, MMLUTask.COMPUTER_SECURITY: { "category": TaskCategory.COMPUTER_SCIENCE, "description": "Computer security principles, threats, and protections", }, # Social Sciences MMLUTask.HIGH_SCHOOL_MICROECONOMICS: { "category": TaskCategory.SOCIAL_SCIENCES, "description": ( "High school microeconomics concepts including supply/demand " "and market structures" ), }, MMLUTask.HIGH_SCHOOL_MACROECONOMICS: { "category": TaskCategory.SOCIAL_SCIENCES, "description": ( "High school macroeconomics covering GDP, inflation, " "and fiscal/monetary policy" ), }, MMLUTask.ECONOMETRICS: { "category": TaskCategory.SOCIAL_SCIENCES, "description": "Statistical methods applied to economic data", }, MMLUTask.HIGH_SCHOOL_PSYCHOLOGY: { "category": TaskCategory.SOCIAL_SCIENCES, "description": ( "High school psychology concepts covering cognition, " "development, and behavior" ), }, MMLUTask.PROFESSIONAL_PSYCHOLOGY: { "category": TaskCategory.SOCIAL_SCIENCES, "description": ( "Professional-level psychology theories, methods, and applications" ), }, MMLUTask.SOCIOLOGY: { "category": TaskCategory.SOCIAL_SCIENCES, "description": "Sociological theories, methods, and social phenomena", }, MMLUTask.HUMAN_SEXUALITY: { "category": TaskCategory.SOCIAL_SCIENCES, "description": "Human sexuality, sexual development, and related topics", }, MMLUTask.HIGH_SCHOOL_GOVERNMENT_AND_POLITICS: { "category": TaskCategory.SOCIAL_SCIENCES, "description": "High school government and politics concepts", }, MMLUTask.SECURITY_STUDIES: { "category": TaskCategory.SOCIAL_SCIENCES, "description": "International security studies, conflicts, and strategy", }, MMLUTask.US_FOREIGN_POLICY: { "category": TaskCategory.SOCIAL_SCIENCES, "description": "US foreign policy history, principles, and applications", }, # Humanities MMLUTask.HIGH_SCHOOL_EUROPEAN_HISTORY: { "category": TaskCategory.HUMANITIES, "description": ( "High school European history covering major events, figures, and movements" ), }, MMLUTask.HIGH_SCHOOL_WORLD_HISTORY: { "category": TaskCategory.HUMANITIES, "description": ( "High school world history covering global historical events " "and civilizations" ), }, MMLUTask.HIGH_SCHOOL_US_HISTORY: { "category": TaskCategory.HUMANITIES, "description": ( "High school US history covering American historical events and figures" ), }, MMLUTask.PREHISTORY: { "category": TaskCategory.HUMANITIES, "description": "Human prehistory before written records", }, MMLUTask.HIGH_SCHOOL_GEOGRAPHY: { "category": TaskCategory.HUMANITIES, "description": "High school geography covering physical and human geography", }, MMLUTask.WORLD_RELIGIONS: { "category": TaskCategory.HUMANITIES, "description": "Major world religions, beliefs, practices, and history", }, MMLUTask.PHILOSOPHY: { "category": TaskCategory.HUMANITIES, "description": "Philosophical theories, concepts, and historical figures", }, MMLUTask.MORAL_SCENARIOS: { "category": TaskCategory.HUMANITIES, "description": "Ethical decision-making in hypothetical scenarios", }, MMLUTask.MORAL_DISPUTES: { "category": TaskCategory.HUMANITIES, "description": "Arguments on both sides of contentious moral issues", }, MMLUTask.FORMAL_LOGIC: { "category": TaskCategory.HUMANITIES, "description": "Formal logic principles, rules, and proofs", }, MMLUTask.LOGICAL_FALLACIES: { "category": TaskCategory.HUMANITIES, "description": "Common logical fallacies and errors in reasoning", }, # Medical MMLUTask.CLINICAL_KNOWLEDGE: { "category": TaskCategory.MEDICAL, "description": "Clinical medical knowledge and patient care", }, MMLUTask.MEDICAL_GENETICS: { "category": TaskCategory.MEDICAL, "description": "Medical applications of genetics and genomics", }, MMLUTask.PROFESSIONAL_MEDICINE: { "category": TaskCategory.MEDICAL, "description": "Professional medical practice, diagnosis, and treatment", }, MMLUTask.COLLEGE_MEDICINE: { "category": TaskCategory.MEDICAL, "description": "College-level medicine and healthcare concepts", }, MMLUTask.ANATOMY: { "category": TaskCategory.MEDICAL, "description": "Human anatomical structures and systems", }, MMLUTask.VIROLOGY: { "category": TaskCategory.MEDICAL, "description": "Viruses, viral diseases, and treatments", }, MMLUTask.NUTRITION: { "category": TaskCategory.MEDICAL, "description": "Nutritional science, diet, and health", }, MMLUTask.HUMAN_AGING: { "category": TaskCategory.MEDICAL, "description": "Biological and psychological aspects of human aging", }, # Business & Law MMLUTask.BUSINESS_ETHICS: { "category": TaskCategory.BUSINESS, "description": "Ethical principles and dilemmas in business contexts", }, MMLUTask.PROFESSIONAL_ACCOUNTING: { "category": TaskCategory.BUSINESS, "description": "Professional accounting principles and practices", }, MMLUTask.PROFESSIONAL_LAW: { "category": TaskCategory.BUSINESS, "description": "Professional legal concepts, cases, and principles", }, MMLUTask.INTERNATIONAL_LAW: { "category": TaskCategory.BUSINESS, "description": "International legal frameworks, treaties, and cases", }, MMLUTask.JURISPRUDENCE: { "category": TaskCategory.BUSINESS, "description": "Philosophy and theory of law", }, MMLUTask.MARKETING: { "category": TaskCategory.BUSINESS, "description": "Marketing principles, strategies, and analysis", }, MMLUTask.MANAGEMENT: { "category": TaskCategory.BUSINESS, "description": "Management principles, leadership, and organizational behavior", }, MMLUTask.PUBLIC_RELATIONS: { "category": TaskCategory.BUSINESS, "description": "Public relations practices and strategies", }, # Other MMLUTask.GLOBAL_FACTS: { "category": TaskCategory.OTHER, "description": "Facts about countries, populations, and global phenomena", }, MMLUTask.MISCELLANEOUS: { "category": TaskCategory.OTHER, "description": "Various topics that don't fit into other categories", }, }
[docs] def get_all_tasks() -> list[MMLUTask]: """Get all supported MMLU tasks. Returns: List of all MMLU tasks supported by Factly """ return list(TASK_METADATA.keys())
[docs] def get_tasks_by_category(category: TaskCategory) -> list[MMLUTask]: """Get all tasks belonging to a specific category. Args: category: The category to filter by Returns: List of MMLU tasks in the specified category """ return [ task for task, metadata in TASK_METADATA.items() if metadata["category"] == category ]
[docs] def get_task_by_name(name: str) -> Optional[MMLUTask]: """Get an MMLU task by its name (case-insensitive). Args: name: The name of the task, can be partial match Returns: The matching MMLU task or None if not found """ name_lower = name.lower().replace("-", "_") # First try exact match for task in TASK_METADATA: if task.name.lower() == name_lower: return task # Then try partial match for task in TASK_METADATA: if name_lower in task.name.lower(): return task return None
[docs] def resolve_tasks(task_names: list[str]) -> list[MMLUTask]: """Resolve a list of task names to actual MMLU tasks. Args: task_names: List of task names provided by the user Returns: List of resolved MMLU tasks Raises: ValueError: If any task name cannot be resolved """ if not task_names: return get_all_tasks() resolved_tasks = [] unresolved = [] for name in task_names: # Check if it's a category name try: category = TaskCategory(name) category_tasks = get_tasks_by_category(category) resolved_tasks.extend(category_tasks) continue except ValueError: pass # Try to resolve as a task name task = get_task_by_name(name) if task: resolved_tasks.append(task) else: unresolved.append(name) if unresolved: raise ValueError(f"Could not resolve task names: {', '.join(unresolved)}") return list(set(resolved_tasks)) # Deduplicate
[docs] def list_available_tasks() -> str: """Generate a formatted string listing all available tasks. Returns: Formatted string with all available tasks grouped by category """ result = [] result.append("Available MMLU Tasks:") for category in TaskCategory: tasks = get_tasks_by_category(category) if not tasks: continue result.append(f"\n{category.value}:") for task in tasks: metadata = TASK_METADATA[task] result.append(f" - {task.name}") result.append(f" {metadata['description']}") result.append("\n\nUsage examples:") result.append(" factly evaluate --tasks STEM # Run all STEM tasks") result.append( " factly evaluate --tasks mathematics --tasks physics # Run specific tasks" ) return "\n".join(result)