diff --git a/vacancies/conf/settings.py b/vacancies/conf/settings.py index d1e2c6d..324b19e 100644 --- a/vacancies/conf/settings.py +++ b/vacancies/conf/settings.py @@ -14,8 +14,6 @@ import os from pathlib import Path import sentry_sdk -import clickhouse_connect -from qdrant_client import QdrantClient # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent.parent @@ -165,6 +163,7 @@ LOGGING = { }, } -CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host=os.getenv("CLICKHOUSE_HOST", "127.0.0.1"), port=int(os.getenv("CLICKHOUSE_PORT", "18123"))) +CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123")) -QDARNT_CLIENT = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333")) +QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333") diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py index 21f67b5..b93b3f1 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py +++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py @@ -1,7 +1,9 @@ from django.core.management import BaseCommand +import clickhouse_connect from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant -from vacancies.conf.settings import CLICKHOUSE_CLIENT +from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT +clickhouse_client = clickhouse_connect.create_client(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT) query = """ SELECT id, chat_username, telegram_id, message, timestamp @@ -39,7 +41,7 @@ class Command(BaseCommand): next_page_offset = response[1] exist_points_set = tuple(set(exist_points_ids)) - result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows + result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows result_rows_len = len(result_rows) for index, row in enumerate(result_rows): (id, chat_username, telegram_id, message, timestamp) = row diff --git a/vacancies/main/vector_store.py b/vacancies/main/vector_store.py index 1b287bd..bcd7e8d 100644 --- a/vacancies/main/vector_store.py +++ b/vacancies/main/vector_store.py @@ -1,10 +1,12 @@ from qdrant_client import models from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI +from qdrant_client import QdrantClient from qdrant_client.models import Filter from vacancies.main.models import VacancyFeatures -from vacancies.conf.settings import QDARNT_CLIENT +from vacancies.conf.settings import QDRANT_URL +qdrant_client = QdrantClient(url=QDRANT_URL) FEATURE_NAMES = [ "job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack", @@ -31,13 +33,13 @@ vectors_config = { name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES } -if not QDARNT_CLIENT.collection_exists("vacancies"): - QDARNT_CLIENT.create_collection( +if not qdrant_client.collection_exists("vacancies"): + qdrant_client.create_collection( collection_name="vacancies", vectors_config=vectors_config ) -if not QDARNT_CLIENT.collection_exists("cvs"): - QDARNT_CLIENT.create_collection( +if not qdrant_client.collection_exists("cvs"): + qdrant_client.create_collection( collection_name="cvs", vectors_config=vectors_config ) @@ -70,7 +72,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): max_similarities = {} for name, vec in vectors.items(): if any(v != 0 for v in vec): - results = QDARNT_CLIENT.query_points( + results = qdrant_client.query_points( collection_name="vacancies", query=vec, using=name, @@ -92,7 +94,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): if scored and scored[0]["score"] > 33: # threshold return - QDARNT_CLIENT.upsert( + qdrant_client.upsert( collection_name=collection_name, points=[ models.PointStruct( @@ -105,7 +107,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): def search_similarities(query_filter: Filter, cv_id: int): - cv = QDARNT_CLIENT.retrieve( + cv = qdrant_client.retrieve( collection_name="cvs", ids=[cv_id], with_vectors=True, @@ -115,7 +117,7 @@ def search_similarities(query_filter: Filter, cv_id: int): vacancies_content = {} for name, vec in cv.vector.items(): if any(v != 0 for v in vec): - results = QDARNT_CLIENT.query_points( + results = qdrant_client.query_points( collection_name="vacancies", query=vec, using=name,