Compare commits

...

2 Commits

Author SHA1 Message Date
6b9267af02 Move ClickHouse and Qdrant clients to settings
Some checks failed
release / docker (push) Failing after 48s
2025-11-01 17:49:38 +03:00
c8a9a1123c Switch bot agent to Postgres checkpointer 2025-11-01 17:49:38 +03:00
7 changed files with 662 additions and 616 deletions

View File

@ -11,6 +11,7 @@ dependencies = [
"langchain>=0.3.27",
"langchain-openai>=0.3.35",
"langchain-qdrant>=1.1.0",
"langgraph-checkpoint-postgres>=3.0.0",
"psycopg[binary]>=3.2.12",
"pydantic>=2.0",
"pypdf>=6.1.2",

1204
uv.lock

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,8 @@ import os
from pathlib import Path
import sentry_sdk
import clickhouse_connect
from qdrant_client import QdrantClient
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent.parent
@ -90,6 +92,8 @@ DATABASES = {
},
}
DB_URI = f"postgres://{DATABASES['default']['USER']}:{DATABASES['default']['PASSWORD']}@{DATABASES['default']['HOST']}:{DATABASES['default']['PORT']}/{DATABASES['default']['NAME']}?sslmode=disable"
# Password validation
# https://docs.djangoproject.com/en/5.2/ref/settings/#auth-password-validators
@ -160,3 +164,7 @@ LOGGING = {
},
},
}
CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
QDARNT_CLIENT = QdrantClient(url="http://localhost:6333")

View File

@ -7,8 +7,9 @@ from pypdf import PdfReader
from vacancies.main.models import Customer, CustomerCV
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from vacancies.main.vector_store import add_vectors, extract_features
from vacancies.conf.settings import DB_URI
SYSTEM_PROMPT = """
Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
@ -25,14 +26,6 @@ async def get_user_resume(user_id: int):
return customer_cv.content if customer_cv else ""
agent = create_agent(
model=ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal"),
tools=[get_user_resume],
system_prompt=SYSTEM_PROMPT,
checkpointer=InMemorySaver(),
)
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
await Customer.objects.aget_or_create(
telegram_id=update.effective_user.id,
@ -46,14 +39,22 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
async def prompt(update: Update, context: ContextTypes.DEFAULT_TYPE):
message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
async with AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer:
agent = create_agent(
model=ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal"),
tools=[get_user_resume],
system_prompt=SYSTEM_PROMPT,
checkpointer=checkpointer,
)
response = await agent.ainvoke(
input={"messages": [{"role": "user", "content": f'user_id = {update.effective_user.id}\n{update.message.text}'}]},
config={"configurable": {"thread_id": update.effective_user.id}},
)
message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
await context.bot.editMessageText(response['messages'][-1].content, update.effective_chat.id, message.id)
response = await agent.ainvoke(
input={"messages": [{"role": "user", "content": f'user_id = {update.effective_user.id}\n{update.message.text}'}]},
config={"configurable": {"thread_id": update.effective_user.id}},
)
await context.bot.editMessageText(response['messages'][-1].content, update.effective_chat.id, message.id)
async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:

View File

@ -1,8 +1,7 @@
from django.core.management import BaseCommand
import clickhouse_connect
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
from vacancies.conf.settings import CLICKHOUSE_CLIENT
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
query = """
SELECT id, chat_username, telegram_id, message, timestamp
@ -40,7 +39,7 @@ class Command(BaseCommand):
next_page_offset = response[1]
exist_points_set = tuple(set(exist_points_ids))
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows
result_rows_len = len(result_rows)
for index, row in enumerate(result_rows):
(id, chat_username, telegram_id, message, timestamp) = row

View File

@ -1,9 +1,19 @@
import sys
import asyncio
from django.core.management import BaseCommand
from vacancies.main.bot import application
from langgraph.checkpoint.postgres import PostgresSaver
from vacancies.conf.settings import DB_URI
class Command(BaseCommand):
help = "Run bot"
def handle(self, *args, **options):
with PostgresSaver.from_conn_string(DB_URI) as checkpointer:
checkpointer.setup()
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
application.run_polling()

View File

@ -1,11 +1,10 @@
from qdrant_client import models
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from qdrant_client import QdrantClient
from qdrant_client.models import Filter
from vacancies.main.models import VacancyFeatures
from vacancies.conf.settings import QDARNT_CLIENT
client = QdrantClient(url="http://localhost:6333")
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
@ -32,13 +31,13 @@ vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not client.collection_exists("vacancies"):
client.create_collection(
if not QDARNT_CLIENT.collection_exists("vacancies"):
QDARNT_CLIENT.create_collection(
collection_name="vacancies",
vectors_config=vectors_config
)
if not client.collection_exists("cvs"):
client.create_collection(
if not QDARNT_CLIENT.collection_exists("cvs"):
QDARNT_CLIENT.create_collection(
collection_name="cvs",
vectors_config=vectors_config
)
@ -71,7 +70,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
max_similarities = {}
for name, vec in vectors.items():
if any(v != 0 for v in vec):
results = client.query_points(
results = QDARNT_CLIENT.query_points(
collection_name="vacancies",
query=vec,
using=name,
@ -93,7 +92,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
if scored and scored[0]["score"] > 33: # threshold
return
client.upsert(
QDARNT_CLIENT.upsert(
collection_name=collection_name,
points=[
models.PointStruct(
@ -106,7 +105,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
def search_similarities(query_filter: Filter, cv_id: int):
cv = client.retrieve(
cv = QDARNT_CLIENT.retrieve(
collection_name="cvs",
ids=[cv_id],
with_vectors=True,
@ -116,7 +115,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
vacancies_content = {}
for name, vec in cv.vector.items():
if any(v != 0 for v in vec):
results = client.query_points(
results = QDARNT_CLIENT.query_points(
collection_name="vacancies",
query=vec,
using=name,