Source code for src.retriever.retriever

from typing import List, Optional, Union
from pathlib import Path
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_core.vectorstores import VectorStore
from src.ingestor.embedder import DocumentEmbedder


[docs] class VectorStoreManager: """Manages vector store operations (create, save, load, query)""" def __init__(self, embedder: DocumentEmbedder): """ Initialize vector store manager. Args: embedder: DocumentEmbedder instance to use for vectorization """ self.embedder = embedder self.vector_store: Optional[VectorStore] = None
[docs] def create_from_documents(self, documents: List[Document]) -> VectorStore: """ Create a new vector store from documents. Args: documents: List of Document objects to index Returns: Created vector store """ self.vector_store = FAISS.from_documents( documents, self.embedder.embeddings #get_embeddings() ) return self.vector_store
[docs] def add_documents(self, documents: List[Document]) -> None: """ Add documents to existing vector store. Args: documents: List of Document objects to add """ if self.vector_store is None: raise ValueError("Vector store not initialized. Call create_from_documents first.") self.vector_store.add_documents(documents)
[docs] def save(self, path: Union[str, Path]) -> None: """ Save vector store to disk. Args: path: Directory path to save the vector store """ if self.vector_store is None: raise ValueError("No vector store to save") path = Path(path) path.mkdir(parents=True, exist_ok=True) self.vector_store.save_local(str(path))
[docs] def load(self, path: Union[str, Path]) -> VectorStore: """ Load vector store from disk. Args: path: Directory path containing the saved vector store Returns: Loaded vector store """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"Vector store not found at: {path}") self.vector_store = FAISS.load_local( str(path), self.embedder.get_embeddings(), allow_dangerous_deserialization=True ) print(f"Vector store loaded from: {path}") return self.vector_store
[docs] def similarity_search_with_score( self, query: str, k: int = 4 ) -> List[tuple[Document, float]]: """ Search for similar documents with similarity scores. Args: query: Query text k: Number of results to return Returns: List of (document, score) tuples """ if self.vector_store is None: raise ValueError("Vector store not initialized") return self.vector_store.similarity_search_with_score(query, k=k)
[docs] def get_vector_store(self) -> VectorStore: """Get the underlying vector store object""" if self.vector_store is None: raise ValueError("Vector store not initialized") return self.vector_store