Rebalance vector store weights and exclude extra cvs from vacancies
All checks were successful
release / docker (push) Successful in 36s

This commit is contained in:
estromenko 2025-11-03 15:44:30 +03:00
parent 54084a1021
commit 92c1d97727
2 changed files with 17 additions and 17 deletions

View File

@ -23,7 +23,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт' 'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
]) >= 5 ]) >= 5
AND arrayCount(x -> position(lower(message), x) > 0, [ AND arrayCount(x -> position(lower(message), x) > 0, [
'о себе', 'обо мне', 'умею', '#ищу' 'о себе', 'обо мне', 'умею', '#ищу', '#резюме'
]) = 0 ]) = 0
AND id NOT IN %(exist_points)s AND id NOT IN %(exist_points)s
""" """

View File

@ -14,19 +14,19 @@ FEATURE_NAMES = [
] ]
weights = { weights = {
"job_title": 10, "job_title": 25,
"employment_type": 2, "employment_type": 5,
"work_format": 2, "work_format": 5,
"experience": 3, "experience": 8,
"position_level": 5, "position_level": 12,
"industry": 4, "industry": 10,
"tech_stack": 5, "tech_stack": 14,
"location": 2, "location": 5,
"salary_range": 2, "salary_range": 5,
"languages": 2, "languages": 5,
"education": 1, "education": 2,
"schedule": 1, "schedule": 2,
"additional_requirements": 1, "additional_requirements": 2,
} }
vectors_config = { vectors_config = {
@ -76,7 +76,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
collection_name="vacancies", collection_name="vacancies",
query=vec, query=vec,
using=name, using=name,
limit=1000, limit=100,
) )
for res in results.points: for res in results.points:
vid = res.id vid = res.id
@ -91,7 +91,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
scored.append({"id": vid, "score": total}) scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True) scored.sort(key=lambda x: x["score"], reverse=True)
if scored and scored[0]["score"] > 33: # threshold if scored and scored[0]["score"] > 98: # threshold
return return
qdrant_client.upsert( qdrant_client.upsert(
@ -121,7 +121,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
collection_name="vacancies", collection_name="vacancies",
query=vec, query=vec,
using=name, using=name,
limit=1000, limit=100,
with_payload=True, with_payload=True,
query_filter=query_filter, query_filter=query_filter,
) )