From 6b9267af02647662cf1eeb017d6e35108436ec3c Mon Sep 17 00:00:00 2001 From: estromenko Date: Sat, 1 Nov 2025 17:49:26 +0300 Subject: [PATCH] Move ClickHouse and Qdrant clients to settings --- vacancies/conf/settings.py | 6 ++++++ ...ollect_vacancies_from_telegram_messages.py | 5 ++--- vacancies/main/vector_store.py | 19 +++++++++---------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/vacancies/conf/settings.py b/vacancies/conf/settings.py index ae4fdf2..be1898f 100644 --- a/vacancies/conf/settings.py +++ b/vacancies/conf/settings.py @@ -14,6 +14,8 @@ import os from pathlib import Path import sentry_sdk +import clickhouse_connect +from qdrant_client import QdrantClient # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent.parent @@ -162,3 +164,7 @@ LOGGING = { }, }, } + +CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host="127.0.0.1", port=18123) + +QDARNT_CLIENT = QdrantClient(url="http://localhost:6333") diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py index a96c2b8..21f67b5 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py +++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py @@ -1,8 +1,7 @@ from django.core.management import BaseCommand -import clickhouse_connect from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant +from vacancies.conf.settings import CLICKHOUSE_CLIENT -clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123) query = """ SELECT id, chat_username, telegram_id, message, timestamp @@ -40,7 +39,7 @@ class Command(BaseCommand): next_page_offset = response[1] exist_points_set = tuple(set(exist_points_ids)) - result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows + result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows result_rows_len = len(result_rows) for index, row in enumerate(result_rows): (id, chat_username, telegram_id, message, timestamp) = row diff --git a/vacancies/main/vector_store.py b/vacancies/main/vector_store.py index eecae07..1b287bd 100644 --- a/vacancies/main/vector_store.py +++ b/vacancies/main/vector_store.py @@ -1,11 +1,10 @@ from qdrant_client import models from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI -from qdrant_client import QdrantClient from qdrant_client.models import Filter from vacancies.main.models import VacancyFeatures +from vacancies.conf.settings import QDARNT_CLIENT -client = QdrantClient(url="http://localhost:6333") FEATURE_NAMES = [ "job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack", @@ -32,13 +31,13 @@ vectors_config = { name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES } -if not client.collection_exists("vacancies"): - client.create_collection( +if not QDARNT_CLIENT.collection_exists("vacancies"): + QDARNT_CLIENT.create_collection( collection_name="vacancies", vectors_config=vectors_config ) -if not client.collection_exists("cvs"): - client.create_collection( +if not QDARNT_CLIENT.collection_exists("cvs"): + QDARNT_CLIENT.create_collection( collection_name="cvs", vectors_config=vectors_config ) @@ -71,7 +70,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): max_similarities = {} for name, vec in vectors.items(): if any(v != 0 for v in vec): - results = client.query_points( + results = QDARNT_CLIENT.query_points( collection_name="vacancies", query=vec, using=name, @@ -93,7 +92,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): if scored and scored[0]["score"] > 33: # threshold return - client.upsert( + QDARNT_CLIENT.upsert( collection_name=collection_name, points=[ models.PointStruct( @@ -106,7 +105,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): def search_similarities(query_filter: Filter, cv_id: int): - cv = client.retrieve( + cv = QDARNT_CLIENT.retrieve( collection_name="cvs", ids=[cv_id], with_vectors=True, @@ -116,7 +115,7 @@ def search_similarities(query_filter: Filter, cv_id: int): vacancies_content = {} for name, vec in cv.vector.items(): if any(v != 0 for v in vec): - results = client.query_points( + results = QDARNT_CLIENT.query_points( collection_name="vacancies", query=vec, using=name,