Move ClickHouse and Qdrant clients to settings
Some checks failed
release / docker (push) Failing after 48s

This commit is contained in:
estromenko 2025-11-01 17:49:26 +03:00
parent c8a9a1123c
commit 6b9267af02
3 changed files with 17 additions and 13 deletions

View File

@ -14,6 +14,8 @@ import os
from pathlib import Path
import sentry_sdk
import clickhouse_connect
from qdrant_client import QdrantClient
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent.parent
@ -162,3 +164,7 @@ LOGGING = {
},
},
}
CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
QDARNT_CLIENT = QdrantClient(url="http://localhost:6333")

View File

@ -1,8 +1,7 @@
from django.core.management import BaseCommand
import clickhouse_connect
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
from vacancies.conf.settings import CLICKHOUSE_CLIENT
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
query = """
SELECT id, chat_username, telegram_id, message, timestamp
@ -40,7 +39,7 @@ class Command(BaseCommand):
next_page_offset = response[1]
exist_points_set = tuple(set(exist_points_ids))
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows
result_rows_len = len(result_rows)
for index, row in enumerate(result_rows):
(id, chat_username, telegram_id, message, timestamp) = row

View File

@ -1,11 +1,10 @@
from qdrant_client import models
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from qdrant_client import QdrantClient
from qdrant_client.models import Filter
from vacancies.main.models import VacancyFeatures
from vacancies.conf.settings import QDARNT_CLIENT
client = QdrantClient(url="http://localhost:6333")
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
@ -32,13 +31,13 @@ vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not client.collection_exists("vacancies"):
client.create_collection(
if not QDARNT_CLIENT.collection_exists("vacancies"):
QDARNT_CLIENT.create_collection(
collection_name="vacancies",
vectors_config=vectors_config
)
if not client.collection_exists("cvs"):
client.create_collection(
if not QDARNT_CLIENT.collection_exists("cvs"):
QDARNT_CLIENT.create_collection(
collection_name="cvs",
vectors_config=vectors_config
)
@ -71,7 +70,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
max_similarities = {}
for name, vec in vectors.items():
if any(v != 0 for v in vec):
results = client.query_points(
results = QDARNT_CLIENT.query_points(
collection_name="vacancies",
query=vec,
using=name,
@ -93,7 +92,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
if scored and scored[0]["score"] > 33: # threshold
return
client.upsert(
QDARNT_CLIENT.upsert(
collection_name=collection_name,
points=[
models.PointStruct(
@ -106,7 +105,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
def search_similarities(query_filter: Filter, cv_id: int):
cv = client.retrieve(
cv = QDARNT_CLIENT.retrieve(
collection_name="cvs",
ids=[cv_id],
with_vectors=True,
@ -116,7 +115,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
vacancies_content = {}
for name, vec in cv.vector.items():
if any(v != 0 for v in vec):
results = client.query_points(
results = QDARNT_CLIENT.query_points(
collection_name="vacancies",
query=vec,
using=name,