Move ClickHouse and Qdrant clients to settings
Some checks failed
release / docker (push) Failing after 48s
Some checks failed
release / docker (push) Failing after 48s
This commit is contained in:
parent
c8a9a1123c
commit
6b9267af02
@ -14,6 +14,8 @@ import os
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
|
import clickhouse_connect
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
|
|
||||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||||
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
||||||
@ -162,3 +164,7 @@ LOGGING = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CLICKHOUSE_CLIENT = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
||||||
|
|
||||||
|
QDARNT_CLIENT = QdrantClient(url="http://localhost:6333")
|
||||||
|
|||||||
@ -1,8 +1,7 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
import clickhouse_connect
|
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
||||||
|
from vacancies.conf.settings import CLICKHOUSE_CLIENT
|
||||||
|
|
||||||
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
|
||||||
|
|
||||||
query = """
|
query = """
|
||||||
SELECT id, chat_username, telegram_id, message, timestamp
|
SELECT id, chat_username, telegram_id, message, timestamp
|
||||||
@ -40,7 +39,7 @@ class Command(BaseCommand):
|
|||||||
next_page_offset = response[1]
|
next_page_offset = response[1]
|
||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = CLICKHOUSE_CLIENT.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
result_rows_len = len(result_rows)
|
result_rows_len = len(result_rows)
|
||||||
for index, row in enumerate(result_rows):
|
for index, row in enumerate(result_rows):
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|||||||
@ -1,11 +1,10 @@
|
|||||||
from qdrant_client import models
|
from qdrant_client import models
|
||||||
from langchain_openai import OpenAIEmbeddings
|
from langchain_openai import OpenAIEmbeddings
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from qdrant_client import QdrantClient
|
|
||||||
from qdrant_client.models import Filter
|
from qdrant_client.models import Filter
|
||||||
from vacancies.main.models import VacancyFeatures
|
from vacancies.main.models import VacancyFeatures
|
||||||
|
from vacancies.conf.settings import QDARNT_CLIENT
|
||||||
|
|
||||||
client = QdrantClient(url="http://localhost:6333")
|
|
||||||
|
|
||||||
FEATURE_NAMES = [
|
FEATURE_NAMES = [
|
||||||
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
||||||
@ -32,13 +31,13 @@ vectors_config = {
|
|||||||
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
||||||
}
|
}
|
||||||
|
|
||||||
if not client.collection_exists("vacancies"):
|
if not QDARNT_CLIENT.collection_exists("vacancies"):
|
||||||
client.create_collection(
|
QDARNT_CLIENT.create_collection(
|
||||||
collection_name="vacancies",
|
collection_name="vacancies",
|
||||||
vectors_config=vectors_config
|
vectors_config=vectors_config
|
||||||
)
|
)
|
||||||
if not client.collection_exists("cvs"):
|
if not QDARNT_CLIENT.collection_exists("cvs"):
|
||||||
client.create_collection(
|
QDARNT_CLIENT.create_collection(
|
||||||
collection_name="cvs",
|
collection_name="cvs",
|
||||||
vectors_config=vectors_config
|
vectors_config=vectors_config
|
||||||
)
|
)
|
||||||
@ -71,7 +70,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
|
|||||||
max_similarities = {}
|
max_similarities = {}
|
||||||
for name, vec in vectors.items():
|
for name, vec in vectors.items():
|
||||||
if any(v != 0 for v in vec):
|
if any(v != 0 for v in vec):
|
||||||
results = client.query_points(
|
results = QDARNT_CLIENT.query_points(
|
||||||
collection_name="vacancies",
|
collection_name="vacancies",
|
||||||
query=vec,
|
query=vec,
|
||||||
using=name,
|
using=name,
|
||||||
@ -93,7 +92,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
|
|||||||
if scored and scored[0]["score"] > 33: # threshold
|
if scored and scored[0]["score"] > 33: # threshold
|
||||||
return
|
return
|
||||||
|
|
||||||
client.upsert(
|
QDARNT_CLIENT.upsert(
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
points=[
|
points=[
|
||||||
models.PointStruct(
|
models.PointStruct(
|
||||||
@ -106,7 +105,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
|
|||||||
|
|
||||||
|
|
||||||
def search_similarities(query_filter: Filter, cv_id: int):
|
def search_similarities(query_filter: Filter, cv_id: int):
|
||||||
cv = client.retrieve(
|
cv = QDARNT_CLIENT.retrieve(
|
||||||
collection_name="cvs",
|
collection_name="cvs",
|
||||||
ids=[cv_id],
|
ids=[cv_id],
|
||||||
with_vectors=True,
|
with_vectors=True,
|
||||||
@ -116,7 +115,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
|
|||||||
vacancies_content = {}
|
vacancies_content = {}
|
||||||
for name, vec in cv.vector.items():
|
for name, vec in cv.vector.items():
|
||||||
if any(v != 0 for v in vec):
|
if any(v != 0 for v in vec):
|
||||||
results = client.query_points(
|
results = QDARNT_CLIENT.query_points(
|
||||||
collection_name="vacancies",
|
collection_name="vacancies",
|
||||||
query=vec,
|
query=vec,
|
||||||
using=name,
|
using=name,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user