Use env vars to db's connections
All checks were successful
release / docker (push) Successful in 49s

This commit is contained in:
estromenko 2025-11-01 18:16:54 +03:00
parent 384df2c78b
commit 477307b993
3 changed files with 18 additions and 15 deletions

View File

@ -14,8 +14,6 @@ import os
from pathlib import Path from pathlib import Path
import sentry_sdk import sentry_sdk
import clickhouse_connect
from qdrant_client import QdrantClient
# Build paths inside the project like this: BASE_DIR / 'subdir'. # Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent.parent BASE_DIR = Path(__file__).resolve().parent.parent.parent
@ -165,6 +163,7 @@ LOGGING = {
}, },
} }
CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host=os.getenv("CLICKHOUSE_HOST", "127.0.0.1"), port=int(os.getenv("CLICKHOUSE_PORT", "18123"))) CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1")
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123"))
QDARNT_CLIENT = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333")) QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")

View File

@ -1,7 +1,9 @@
from django.core.management import BaseCommand from django.core.management import BaseCommand
import clickhouse_connect
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
from vacancies.conf.settings import CLICKHOUSE_CLIENT from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT
clickhouse_client = clickhouse_connect.create_client(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT)
query = """ query = """
SELECT id, chat_username, telegram_id, message, timestamp SELECT id, chat_username, telegram_id, message, timestamp
@ -39,7 +41,7 @@ class Command(BaseCommand):
next_page_offset = response[1] next_page_offset = response[1]
exist_points_set = tuple(set(exist_points_ids)) exist_points_set = tuple(set(exist_points_ids))
result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
result_rows_len = len(result_rows) result_rows_len = len(result_rows)
for index, row in enumerate(result_rows): for index, row in enumerate(result_rows):
(id, chat_username, telegram_id, message, timestamp) = row (id, chat_username, telegram_id, message, timestamp) = row

View File

@ -1,10 +1,12 @@
from qdrant_client import models from qdrant_client import models
from langchain_openai import OpenAIEmbeddings from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
from qdrant_client import QdrantClient
from qdrant_client.models import Filter from qdrant_client.models import Filter
from vacancies.main.models import VacancyFeatures from vacancies.main.models import VacancyFeatures
from vacancies.conf.settings import QDARNT_CLIENT from vacancies.conf.settings import QDRANT_URL
qdrant_client = QdrantClient(url=QDRANT_URL)
FEATURE_NAMES = [ FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack", "job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
@ -31,13 +33,13 @@ vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
} }
if not QDARNT_CLIENT.collection_exists("vacancies"): if not qdrant_client.collection_exists("vacancies"):
QDARNT_CLIENT.create_collection( qdrant_client.create_collection(
collection_name="vacancies", collection_name="vacancies",
vectors_config=vectors_config vectors_config=vectors_config
) )
if not QDARNT_CLIENT.collection_exists("cvs"): if not qdrant_client.collection_exists("cvs"):
QDARNT_CLIENT.create_collection( qdrant_client.create_collection(
collection_name="cvs", collection_name="cvs",
vectors_config=vectors_config vectors_config=vectors_config
) )
@ -70,7 +72,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
max_similarities = {} max_similarities = {}
for name, vec in vectors.items(): for name, vec in vectors.items():
if any(v != 0 for v in vec): if any(v != 0 for v in vec):
results = QDARNT_CLIENT.query_points( results = qdrant_client.query_points(
collection_name="vacancies", collection_name="vacancies",
query=vec, query=vec,
using=name, using=name,
@ -92,7 +94,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
if scored and scored[0]["score"] > 33: # threshold if scored and scored[0]["score"] > 33: # threshold
return return
QDARNT_CLIENT.upsert( qdrant_client.upsert(
collection_name=collection_name, collection_name=collection_name,
points=[ points=[
models.PointStruct( models.PointStruct(
@ -105,7 +107,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
def search_similarities(query_filter: Filter, cv_id: int): def search_similarities(query_filter: Filter, cv_id: int):
cv = QDARNT_CLIENT.retrieve( cv = qdrant_client.retrieve(
collection_name="cvs", collection_name="cvs",
ids=[cv_id], ids=[cv_id],
with_vectors=True, with_vectors=True,
@ -115,7 +117,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
vacancies_content = {} vacancies_content = {}
for name, vec in cv.vector.items(): for name, vec in cv.vector.items():
if any(v != 0 for v in vec): if any(v != 0 for v in vec):
results = QDARNT_CLIENT.query_points( results = qdrant_client.query_points(
collection_name="vacancies", collection_name="vacancies",
query=vec, query=vec,
using=name, using=name,