From 92c1d977271204bf351514f2220b1518df30b05b Mon Sep 17 00:00:00 2001
From: estromenko <estromenko@mail.ru>
Date: Mon, 3 Nov 2025 15:44:30 +0300
Subject: [PATCH] Rebalance vector store weights and exclude extra cvs from
 vacancies

---
 ...ollect_vacancies_from_telegram_messages.py |  2 +-
 vacancies/main/vector_store.py                | 32 +++++++++----------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py
index 122018d..a360c6e 100644
--- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py
+++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py
@@ -23,7 +23,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
     'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
   ]) >= 5
   AND arrayCount(x -> position(lower(message), x) > 0, [
-    'о себе', 'обо мне', 'умею', '#ищу'
+    'о себе', 'обо мне', 'умею', '#ищу', '#резюме'
   ]) = 0
   AND id NOT IN %(exist_points)s
 """
diff --git a/vacancies/main/vector_store.py b/vacancies/main/vector_store.py
index bcd7e8d..44dac9c 100644
--- a/vacancies/main/vector_store.py
+++ b/vacancies/main/vector_store.py
@@ -14,19 +14,19 @@ FEATURE_NAMES = [
 ]
 
 weights = {
-    "job_title": 10,
-    "employment_type": 2,
-    "work_format": 2,
-    "experience": 3,
-    "position_level": 5,
-    "industry": 4,
-    "tech_stack": 5,
-    "location": 2,
-    "salary_range": 2,
-    "languages": 2,
-    "education": 1,
-    "schedule": 1,
-    "additional_requirements": 1,
+    "job_title": 25,
+    "employment_type": 5,
+    "work_format": 5,
+    "experience": 8,
+    "position_level": 12,
+    "industry": 10,
+    "tech_stack": 14,
+    "location": 5,
+    "salary_range": 5,
+    "languages": 5,
+    "education": 2,
+    "schedule": 2,
+    "additional_requirements": 2,
 }
 
 vectors_config = {
@@ -76,7 +76,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
                 collection_name="vacancies",
                 query=vec,
                 using=name,
-                limit=1000,
+                limit=100,
             )
             for res in results.points:
                 vid = res.id
@@ -91,7 +91,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
         scored.append({"id": vid, "score": total})
 
     scored.sort(key=lambda x: x["score"], reverse=True)
-    if scored and scored[0]["score"] > 33:  # threshold
+    if scored and scored[0]["score"] > 98:  # threshold
         return
 
     qdrant_client.upsert(
@@ -121,7 +121,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
                 collection_name="vacancies",
                 query=vec,
                 using=name,
-                limit=1000,
+                limit=100,
                 with_payload=True,
                 query_filter=query_filter,
             )