Use env vars to db's connections
All checks were successful
release / docker (push) Successful in 49s
All checks were successful
release / docker (push) Successful in 49s
This commit is contained in:
parent
384df2c78b
commit
477307b993
@ -14,8 +14,6 @@ import os
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
import clickhouse_connect
|
|
||||||
from qdrant_client import QdrantClient
|
|
||||||
|
|
||||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||||
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
||||||
@ -165,6 +163,7 @@ LOGGING = {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host=os.getenv("CLICKHOUSE_HOST", "127.0.0.1"), port=int(os.getenv("CLICKHOUSE_PORT", "18123")))
|
CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1")
|
||||||
|
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123"))
|
||||||
|
|
||||||
QDARNT_CLIENT = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333"))
|
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
||||||
from vacancies.conf.settings import CLICKHOUSE_CLIENT
|
from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT
|
||||||
|
|
||||||
|
clickhouse_client = clickhouse_connect.create_client(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT)
|
||||||
|
|
||||||
query = """
|
query = """
|
||||||
SELECT id, chat_username, telegram_id, message, timestamp
|
SELECT id, chat_username, telegram_id, message, timestamp
|
||||||
@ -39,7 +41,7 @@ class Command(BaseCommand):
|
|||||||
next_page_offset = response[1]
|
next_page_offset = response[1]
|
||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
result_rows_len = len(result_rows)
|
result_rows_len = len(result_rows)
|
||||||
for index, row in enumerate(result_rows):
|
for index, row in enumerate(result_rows):
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
from qdrant_client import models
|
from qdrant_client import models
|
||||||
from langchain_openai import OpenAIEmbeddings
|
from langchain_openai import OpenAIEmbeddings
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.models import Filter
|
from qdrant_client.models import Filter
|
||||||
from vacancies.main.models import VacancyFeatures
|
from vacancies.main.models import VacancyFeatures
|
||||||
from vacancies.conf.settings import QDARNT_CLIENT
|
from vacancies.conf.settings import QDRANT_URL
|
||||||
|
|
||||||
|
qdrant_client = QdrantClient(url=QDRANT_URL)
|
||||||
|
|
||||||
FEATURE_NAMES = [
|
FEATURE_NAMES = [
|
||||||
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
||||||
@ -31,13 +33,13 @@ vectors_config = {
|
|||||||
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
||||||
}
|
}
|
||||||
|
|
||||||
if not QDARNT_CLIENT.collection_exists("vacancies"):
|
if not qdrant_client.collection_exists("vacancies"):
|
||||||
QDARNT_CLIENT.create_collection(
|
qdrant_client.create_collection(
|
||||||
collection_name="vacancies",
|
collection_name="vacancies",
|
||||||
vectors_config=vectors_config
|
vectors_config=vectors_config
|
||||||
)
|
)
|
||||||
if not QDARNT_CLIENT.collection_exists("cvs"):
|
if not qdrant_client.collection_exists("cvs"):
|
||||||
QDARNT_CLIENT.create_collection(
|
qdrant_client.create_collection(
|
||||||
collection_name="cvs",
|
collection_name="cvs",
|
||||||
vectors_config=vectors_config
|
vectors_config=vectors_config
|
||||||
)
|
)
|
||||||
@ -70,7 +72,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
|
|||||||
max_similarities = {}
|
max_similarities = {}
|
||||||
for name, vec in vectors.items():
|
for name, vec in vectors.items():
|
||||||
if any(v != 0 for v in vec):
|
if any(v != 0 for v in vec):
|
||||||
results = QDARNT_CLIENT.query_points(
|
results = qdrant_client.query_points(
|
||||||
collection_name="vacancies",
|
collection_name="vacancies",
|
||||||
query=vec,
|
query=vec,
|
||||||
using=name,
|
using=name,
|
||||||
@ -92,7 +94,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
|
|||||||
if scored and scored[0]["score"] > 33: # threshold
|
if scored and scored[0]["score"] > 33: # threshold
|
||||||
return
|
return
|
||||||
|
|
||||||
QDARNT_CLIENT.upsert(
|
qdrant_client.upsert(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
points=[
|
points=[
|
||||||
models.PointStruct(
|
models.PointStruct(
|
||||||
@ -105,7 +107,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
|
|||||||
|
|
||||||
|
|
||||||
def search_similarities(query_filter: Filter, cv_id: int):
|
def search_similarities(query_filter: Filter, cv_id: int):
|
||||||
cv = QDARNT_CLIENT.retrieve(
|
cv = qdrant_client.retrieve(
|
||||||
collection_name="cvs",
|
collection_name="cvs",
|
||||||
ids=[cv_id],
|
ids=[cv_id],
|
||||||
with_vectors=True,
|
with_vectors=True,
|
||||||
@ -115,7 +117,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
|
|||||||
vacancies_content = {}
|
vacancies_content = {}
|
||||||
for name, vec in cv.vector.items():
|
for name, vec in cv.vector.items():
|
||||||
if any(v != 0 for v in vec):
|
if any(v != 0 for v in vec):
|
||||||
results = QDARNT_CLIENT.query_points(
|
results = qdrant_client.query_points(
|
||||||
collection_name="vacancies",
|
collection_name="vacancies",
|
||||||
query=vec,
|
query=vec,
|
||||||
using=name,
|
using=name,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user