Source code for src.evaluator.evaluator

from src.llm.client import RoccoClient
from src.llm.schemas import EvaluatorOutput, RubricItem
from src.common.utils import _build_rubric, _build_few_shot_examples
from src.prompts.loader import load_prompt, render
import json
import re
import logging
from typing import List, Dict, Any

logger = logging.getLogger(__name__)


[docs] class DescriptionEvaluator: """Evaluates dataset descriptions against a rubric""" def __init__(self, model: RoccoClient, rubric: List[Dict[str, Any]], examples: List[Dict[str, Any]]): self.model = model self.rubric = rubric self.examples = examples
[docs] def build_prompt(self, draft_text: str) -> str: """Combine rubric, examples, and draft into prompt""" rubric_str = _build_rubric(self.rubric) examples_str = _build_few_shot_examples(self.examples) # Load prompt template and render prompt_data = load_prompt("evaluator") prompt = render( prompt_data["user"], rubric=rubric_str, examples=examples_str, description=draft_text ) return prompt
[docs] def evaluate(self, draft_text: str) -> EvaluatorOutput: #, context: Optional[list[str]]=[""]) -> EvaluatorOutput: """Call the LLM and return structured evaluation""" prompt = self.build_prompt(draft_text) try: raw_resp = self.model.send_prompt(prompt, params={"response_format": {"type": "json_object"}}) except Exception: raw_resp = self.model.send_prompt(prompt) if raw_resp is None: raise RuntimeError("LLM returned no response. Check your API key and network connection.") # print(raw_resp) # Try to parse as JSON first (if your prompt requests JSON output) try: data = json.loads(raw_resp.strip()) rubric_breakdown = [ RubricItem( criterion=item["criterion"], score=item["score"], explanation=item.get("explanation", "") ) for item in data["rubric_breakdown"] ] total_score = sum(item.score for item in rubric_breakdown) comments = data.get("comments", None) return EvaluatorOutput( total_score=total_score, rubric_breakdown=rubric_breakdown, comments=comments ) except Exception as json_err: logger.warning(f"JSON parsing failed: {str(json_err)}") logger.debug(f"Raw response (first 500 chars): {raw_resp[:500]}") # Fallback 1.5: try to extract JSON block from mixed text (e.g., "Here is my evaluation:\n{...}") json_block_match = re.search(r'(\{[\s\S]*\})', raw_resp) if json_block_match: try: data = json.loads(json_block_match.group(1)) rubric_breakdown = [ RubricItem( criterion=item["criterion"], score=item["score"], explanation=item.get("explanation", "") ) for item in data["rubric_breakdown"] ] total_score = sum(item.score for item in rubric_breakdown) logger.info("Successfully parsed JSON from mixed-text response") return EvaluatorOutput( total_score=total_score, rubric_breakdown=rubric_breakdown, comments=data.get("comments", None) ) except Exception as block_err: logger.warning(f"JSON block extraction failed: {str(block_err)}") # Fallback: try to extract JSON from markdown code blocks (Gemini often wraps in ```json) json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', raw_resp) if json_match: try: data = json.loads(json_match.group(1)) rubric_breakdown = [ RubricItem( criterion=item["criterion"], score=item["score"], explanation=item.get("explanation", "") ) for item in data["rubric_breakdown"] ] total_score = sum(item.score for item in rubric_breakdown) logger.info("Successfully parsed JSON from markdown code block") return EvaluatorOutput( total_score=total_score, rubric_breakdown=rubric_breakdown, comments=data.get("comments", None) ) except Exception as markdown_err: logger.warning(f"Markdown JSON extraction failed: {str(markdown_err)}") # Final fallback: parse regex rubric_breakdown = [] total_score = 0 item_regex = re.compile( r'\{"criterion":\s*"([^"]+)",\s*"score":\s*([\d.]+),\s*"explanation":\s*"([^"]*)"\s*\}', re.MULTILINE ) matches = list(item_regex.finditer(raw_resp)) if not matches: logger.error(f"Failed to parse response from LLM. Response: {raw_resp[:1000]}") raise ValueError( f"Could not parse LLM response. Expected JSON format. " f"Got (first 200 chars): {raw_resp[:200]}" ) for match in matches: criterion = match.group(1) score = float(match.group(2)) explanation = match.group(3) rubric_breakdown.append(RubricItem(criterion, score, explanation)) total_score += score return EvaluatorOutput( total_score=total_score, rubric_breakdown=rubric_breakdown, comments=None )
[docs] def print_evaluation_result(self, evaluation_output: EvaluatorOutput) -> None: """Utility to print evaluation results""" print(f"Total Score: {evaluation_output.total_score}") print("Justifications:\n") print("-" * 80) for item in evaluation_output.rubric_breakdown: print(f"Criterion: {item.criterion} \t Score: {item.score}") print(f"Explanation: {item.explanation}\n")