Rebalance vector store weights and exclude extra cvs from vacancies

2025-11-03 15:44:30 +03:00 · 2025-11-03 15:44:30 +03:00 · 92c1d97727
commit 92c1d97727
parent 54084a1021
2 changed files with 17 additions and 17 deletions
--- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py
+++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py
@ -23,7 +23,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
    'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
  ]) >= 5
  AND arrayCount(x -> position(lower(message), x) > 0, [
-    'о себе', 'обо мне', 'умею', '#ищу'
+    'о себе', 'обо мне', 'умею', '#ищу', '#резюме'
  ]) = 0
  AND id NOT IN %(exist_points)s
 """
--- a/vacancies/main/vector_store.py
+++ b/vacancies/main/vector_store.py
@ -14,19 +14,19 @@ FEATURE_NAMES = [
 ]
 weights = {
-    "job_title": 10,
+    "job_title": 25,
-    "employment_type": 2,
+    "employment_type": 5,
-    "work_format": 2,
+    "work_format": 5,
-    "experience": 3,
+    "experience": 8,
-    "position_level": 5,
+    "position_level": 12,
-    "industry": 4,
+    "industry": 10,
-    "tech_stack": 5,
+    "tech_stack": 14,
-    "location": 2,
+    "location": 5,
-    "salary_range": 2,
+    "salary_range": 5,
-    "languages": 2,
+    "languages": 5,
-    "education": 1,
+    "education": 2,
-    "schedule": 1,
+    "schedule": 2,
-    "additional_requirements": 1,
+    "additional_requirements": 2,
 }
 vectors_config = {
@ -76,7 +76,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
                collection_name="vacancies",
                query=vec,
                using=name,
-                limit=1000,
+                limit=100,
            )
            for res in results.points:
                vid = res.id
@ -91,7 +91,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
        scored.append({"id": vid, "score": total})
    scored.sort(key=lambda x: x["score"], reverse=True)
-    if scored and scored[0]["score"] > 33:  # threshold
+    if scored and scored[0]["score"] > 98:  # threshold
        return
    qdrant_client.upsert(
@ -121,7 +121,7 @@ def search_similarities(query_filter: Filter, cv_id: int):
                collection_name="vacancies",
                query=vec,
                using=name,
-                limit=1000,
+                limit=100,
                with_payload=True,
                query_filter=query_filter,
            )