From 92c1d977271204bf351514f2220b1518df30b05b Mon Sep 17 00:00:00 2001 From: estromenko Date: Mon, 3 Nov 2025 15:44:30 +0300 Subject: [PATCH] Rebalance vector store weights and exclude extra cvs from vacancies --- ...ollect_vacancies_from_telegram_messages.py | 2 +- vacancies/main/vector_store.py | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py index 122018d..a360c6e 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py +++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py @@ -23,7 +23,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY 'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт' ]) >= 5 AND arrayCount(x -> position(lower(message), x) > 0, [ - 'о себе', 'обо мне', 'умею', '#ищу' + 'о себе', 'обо мне', 'умею', '#ищу', '#резюме' ]) = 0 AND id NOT IN %(exist_points)s """ diff --git a/vacancies/main/vector_store.py b/vacancies/main/vector_store.py index bcd7e8d..44dac9c 100644 --- a/vacancies/main/vector_store.py +++ b/vacancies/main/vector_store.py @@ -14,19 +14,19 @@ FEATURE_NAMES = [ ] weights = { - "job_title": 10, - "employment_type": 2, - "work_format": 2, - "experience": 3, - "position_level": 5, - "industry": 4, - "tech_stack": 5, - "location": 2, - "salary_range": 2, - "languages": 2, - "education": 1, - "schedule": 1, - "additional_requirements": 1, + "job_title": 25, + "employment_type": 5, + "work_format": 5, + "experience": 8, + "position_level": 12, + "industry": 10, + "tech_stack": 14, + "location": 5, + "salary_range": 5, + "languages": 5, + "education": 2, + "schedule": 2, + "additional_requirements": 2, } vectors_config = { @@ -76,7 +76,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): collection_name="vacancies", query=vec, using=name, - limit=1000, + limit=100, ) for res in results.points: vid = res.id @@ -91,7 +91,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): scored.append({"id": vid, "score": total}) scored.sort(key=lambda x: x["score"], reverse=True) - if scored and scored[0]["score"] > 33: # threshold + if scored and scored[0]["score"] > 98: # threshold return qdrant_client.upsert( @@ -121,7 +121,7 @@ def search_similarities(query_filter: Filter, cv_id: int): collection_name="vacancies", query=vec, using=name, - limit=1000, + limit=100, with_payload=True, query_filter=query_filter, )