Source code for src.evaluator.evaluator

from src.llm.client import RoccoClient
from src.llm.schemas import EvaluatorOutput, RubricItem
from src.common.utils import _build_rubric, _build_few_shot_examples
from src.prompts.loader import load_prompt, render
import json
import re
import logging
from typing import List, Dict, Any

logger = logging.getLogger(__name__)



[docs]
class DescriptionEvaluator:
    """Evaluates dataset descriptions against a rubric"""

    def __init__(self, model: RoccoClient, rubric: List[Dict[str, Any]], examples: List[Dict[str, Any]]):
        self.model = model
        self.rubric = rubric
        self.examples = examples


[docs]
    def build_prompt(self, draft_text: str) -> str:
        """Combine rubric, examples, and draft into prompt"""
        rubric_str = _build_rubric(self.rubric)
        examples_str = _build_few_shot_examples(self.examples)

        # Load prompt template and render
        prompt_data = load_prompt("evaluator")
        prompt = render(
            prompt_data["user"],
            rubric=rubric_str,
            examples=examples_str,
            description=draft_text
        )
        return prompt



[docs]
    def evaluate(self, draft_text: str) -> EvaluatorOutput: #, context: Optional[list[str]]=[""]) -> EvaluatorOutput:
        """Call the LLM and return structured evaluation"""
        prompt = self.build_prompt(draft_text)
        try:
            raw_resp = self.model.send_prompt(prompt, params={"response_format": {"type": "json_object"}})
        except Exception:
            raw_resp = self.model.send_prompt(prompt)

        if raw_resp is None:
            raise RuntimeError("LLM returned no response. Check your API key and network connection.")
        # print(raw_resp)       
        # Try to parse as JSON first (if your prompt requests JSON output)
        try:
            data = json.loads(raw_resp.strip())
            rubric_breakdown = [
                RubricItem(
                    criterion=item["criterion"],
                    score=item["score"],
                    explanation=item.get("explanation", "")
                )
                for item in data["rubric_breakdown"]
            ]
            total_score = sum(item.score for item in rubric_breakdown)
            comments = data.get("comments", None)
            return EvaluatorOutput(
                total_score=total_score,
                rubric_breakdown=rubric_breakdown,
                comments=comments
            )
        except Exception as json_err:
            logger.warning(f"JSON parsing failed: {str(json_err)}")
            logger.debug(f"Raw response (first 500 chars): {raw_resp[:500]}")

            # Fallback 1.5: try to extract JSON block from mixed text (e.g., "Here is my evaluation:\n{...}")
            json_block_match = re.search(r'(\{[\s\S]*\})', raw_resp)
            if json_block_match:
                try:
                    data = json.loads(json_block_match.group(1))
                    rubric_breakdown = [
                        RubricItem(
                            criterion=item["criterion"],
                            score=item["score"],
                            explanation=item.get("explanation", "")
                        )
                        for item in data["rubric_breakdown"]
                    ]
                    total_score = sum(item.score for item in rubric_breakdown)
                    logger.info("Successfully parsed JSON from mixed-text response")
                    return EvaluatorOutput(
                        total_score=total_score,
                        rubric_breakdown=rubric_breakdown,
                        comments=data.get("comments", None)
                    )
                except Exception as block_err:
                    logger.warning(f"JSON block extraction failed: {str(block_err)}")

            # Fallback: try to extract JSON from markdown code blocks (Gemini often wraps in ```json)
            json_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', raw_resp)
            if json_match:
                try:
                    data = json.loads(json_match.group(1))
                    rubric_breakdown = [
                        RubricItem(
                            criterion=item["criterion"],
                            score=item["score"],
                            explanation=item.get("explanation", "")
                        )
                        for item in data["rubric_breakdown"]
                    ]
                    total_score = sum(item.score for item in rubric_breakdown)
                    logger.info("Successfully parsed JSON from markdown code block")
                    return EvaluatorOutput(
                        total_score=total_score,
                        rubric_breakdown=rubric_breakdown,
                        comments=data.get("comments", None)
                    )
                except Exception as markdown_err:
                    logger.warning(f"Markdown JSON extraction failed: {str(markdown_err)}")

            # Final fallback: parse regex
            rubric_breakdown = []
            total_score = 0
            item_regex = re.compile(
                r'\{"criterion":\s*"([^"]+)",\s*"score":\s*([\d.]+),\s*"explanation":\s*"([^"]*)"\s*\}',
                re.MULTILINE
            )
            matches = list(item_regex.finditer(raw_resp))

            if not matches:
                logger.error(f"Failed to parse response from LLM. Response: {raw_resp[:1000]}")
                raise ValueError(
                    f"Could not parse LLM response. Expected JSON format. "
                    f"Got (first 200 chars): {raw_resp[:200]}"
                )

            for match in matches:
                criterion = match.group(1)
                score = float(match.group(2))
                explanation = match.group(3)
                rubric_breakdown.append(RubricItem(criterion, score, explanation))
                total_score += score

            return EvaluatorOutput(
                total_score=total_score,
                rubric_breakdown=rubric_breakdown,
                comments=None
            )



[docs]
    def print_evaluation_result(self, evaluation_output: EvaluatorOutput) -> None:
        """Utility to print evaluation results"""
        print(f"Total Score: {evaluation_output.total_score}")
        print("Justifications:\n")
        print("-" * 80)
        for item in evaluation_output.rubric_breakdown:
            print(f"Criterion: {item.criterion} \t Score: {item.score}")
            print(f"Explanation: {item.explanation}\n")