Source code for src.llm.schemas

from pydantic import BaseModel, Field
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
from datetime import datetime

[docs] @dataclass class RubricItem: """One criterion from the evaluation rubric, with its score and explanation.""" criterion: str score: float explanation: str = None
[docs] @dataclass class EvaluatorOutput: """Structured output from DescriptionEvaluator""" total_score: float rubric_breakdown: List[RubricItem] comments: Optional[str] = None
[docs] class Citation(BaseModel): """Citation schema for each statemnt in the improved description.""" statement: str = Field(description="The statement from the enhanced description.") source: str = Field(description="Source of the added information (original_description, uploaded_document, or user_feedback)") # Original description or uploaded document quote: str = Field(description="The exact quote or statement from the source.") doc_title: Optional[str] = Field(default=None, description="Document title (filename without extension) for uploaded_document sources") page: Optional[int] = Field(default=None, description="Page number in source document for uploaded_document sources") chunk_index: Optional[int] = Field(default=None, description="Chunk index for uploaded_document sources")
[docs] class EditorOutput(BaseModel): """Output from the description editor""" original_text: str = Field(description="Original description text") suggested_text: str = Field(description="Improved description text") rationale: str = Field(description="Explanation of changes made") citation: List[Citation] = Field(default_factory=list, description="Citations for added information") context_used: List[Dict[str, Any]] = Field(default_factory=list, description="Metadata for retrieved context chunks (doc_title, page, chunk_index, snippet)")
[docs] class EditingSession(BaseModel): """Schema for saving/loading editing sessions""" metadata: Dict[str, Any] = Field(description="Session metadata") created_at: str = Field(description="ISO format timestamp of session creation") original_description: Optional[str] = Field(default=None, description="The original description being edited") current_description: Optional[str] = Field(default=None, description="The current version of the description") conversation_history: List[Dict[str, str]] = Field( default_factory=list, description="History of user feedback and assistant responses" ) rubric: Dict[str, Any] = Field(description="Evaluation rubric used in this session") config: Dict[str, Any] = Field( default_factory=dict, description="Configuration settings like use_rag and top_k_context" )
[docs] def get_summary(self) -> str: """Get a human-readable summary of the session""" summary = f"Session created: {self.created_at}\n" summary += f"Conversation turns: {len(self.conversation_history)}\n" if self.original_description: summary += f"Original description length: {len(self.original_description)} chars\n" if self.current_description: summary += f"Current description length: {len(self.current_description)} chars\n" summary += f"RAG enabled: {self.config.get('use_rag', False)}\n" return summary
[docs] @dataclass class PDFChunk: """A single text chunk extracted from a PDF, optionally with its embedding vector.""" chunk_id: str text: str embedding: Optional[List[float]] = None source_pdf: Optional[str] = None