145b0cc96f
- Phase 12: FeedbackRepository + td_feedback 테이블, Gradio 👍/👎 이벤트, run_id 추적, LangSmith create_feedback() 연동 - Phase 13: 커스텀 _SemanticSplitter 제거 → langchain_experimental.SemanticChunker 교체, buffer_size/threshold_type 환경변수 적용 - Phase 13-B: RerankService (Cross-Encoder), RetrieverService.search()에 reranker 통합, tools.py as_retriever() → search() 전환 - Bug 5: mlx_chat_model enable_thinking 런타임 오버라이드, agent_service stream_mode=["messages","custom"] 이중 스트림, thinking 토큰 custom 이벤트로 emit - ROADMAP: LLM 모델명 8B 반영, RAG에 Reranker 추가, 추천 진행 순서 갱신 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
76 lines
2.6 KiB
Python
76 lines
2.6 KiB
Python
from langchain_core.documents import Document
|
|
from langchain_qdrant import QdrantVectorStore
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.models import Filter, FieldCondition, MatchValue, FilterSelector
|
|
|
|
|
|
class RetrieverService:
|
|
"""Qdrant 벡터 검색 서비스. LangGraph Tool 및 직접 검색 모두 지원."""
|
|
|
|
def __init__(
|
|
self,
|
|
embeddings,
|
|
qdrant_url: str,
|
|
collection_name: str,
|
|
top_k: int,
|
|
reranker=None,
|
|
rerank_fetch_k: int = 10,
|
|
):
|
|
self._client = QdrantClient(url=qdrant_url)
|
|
self._collection_name = collection_name
|
|
self._store = QdrantVectorStore(
|
|
client=self._client,
|
|
collection_name=collection_name,
|
|
embedding=embeddings,
|
|
)
|
|
self._top_k = top_k
|
|
self._reranker = reranker
|
|
self._rerank_fetch_k = rerank_fetch_k
|
|
|
|
def as_retriever(self):
|
|
return self._store.as_retriever(search_kwargs={"k": self._top_k})
|
|
|
|
def search(self, query: str) -> list[Document]:
|
|
fetch_k = self._rerank_fetch_k if self._reranker else self._top_k
|
|
docs = self._store.similarity_search(query, k=fetch_k)
|
|
if self._reranker:
|
|
docs = self._reranker.rerank(query, docs, top_k=self._top_k)
|
|
return docs
|
|
|
|
def list_documents(self) -> list[str]:
|
|
"""Qdrant에 저장된 고유 파일 경로 목록을 반환한다."""
|
|
sources: set[str] = set()
|
|
offset = None
|
|
while True:
|
|
results, next_offset = self._client.scroll(
|
|
collection_name=self._collection_name,
|
|
with_payload=True,
|
|
limit=200,
|
|
offset=offset,
|
|
)
|
|
for point in results:
|
|
src = (point.payload or {}).get("metadata", {}).get("source", "")
|
|
if src:
|
|
sources.add(src)
|
|
if next_offset is None:
|
|
break
|
|
offset = next_offset
|
|
return sorted(sources)
|
|
|
|
def delete_document(self, source: str) -> None:
|
|
"""파일 경로로 저장된 모든 청크를 Qdrant에서 삭제한다."""
|
|
try:
|
|
self._client.delete(
|
|
collection_name=self._collection_name,
|
|
points_selector=FilterSelector(
|
|
filter=Filter(
|
|
must=[FieldCondition(
|
|
key="metadata.source",
|
|
match=MatchValue(value=source),
|
|
)]
|
|
)
|
|
),
|
|
)
|
|
except Exception:
|
|
pass
|