Use llm for vacancies post processing

2025-10-31 16:37:03 +03:00 · 2025-10-31 16:37:03 +03:00 · 59eac494ba
commit 59eac494ba
parent 44ca1dcf98
3 changed files with 22 additions and 7 deletions
--- a/vacancies/main/bot.py
+++ b/vacancies/main/bot.py
@ -62,6 +62,8 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N


 async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
+
    if not update.message.document:
        await context.bot.send_message(chat_id=update.effective_chat.id, text="Не удалось прочитать информацию из файла! Попробуйте другой формат.")
        return
@ -84,7 +86,7 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
        {'content': customer_cv.content, 'features_json': features.model_dump()},
    )

-    await context.bot.send_message(chat_id=update.effective_chat.id, text="Отлично! Запомнил Ваше резюме.")
+    await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)


 application = ApplicationBuilder().token(os.environ["BOT_TOKEN"]).build()
--- a/vacancies/main/management/commands/generate_recommended_vacancies.py
+++ b/vacancies/main/management/commands/generate_recommended_vacancies.py
@ -20,7 +20,11 @@ class Command(BaseCommand):
            ).values_list('vacancy_id', flat=True)

            query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
-            search_result_id, vacancy_content, link = search_similarities(query_filter, customer_cv.id)
+            result = search_similarities(query_filter, customer_cv.id)
+            if not result:
+                continue
+
+            search_result_id, vacancy_content, link = result

            recommendation = RecommendedVacancy.objects.create(
                customer=customer_cv.customer,
--- a/vacancies/main/vector_store.py
+++ b/vacancies/main/vector_store.py
@ -106,7 +106,7 @@ def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
    )


-def search_similarities(query_filter: Filter, cv_id: int) -> list[dict]:
+def search_similarities(query_filter: Filter, cv_id: int):
    vectors = client.retrieve(
        collection_name="cvs",
        ids=[cv_id],
@ -141,10 +141,19 @@ def search_similarities(query_filter: Filter, cv_id: int) -> list[dict]:
        total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
        scored.append({"id": vid, "score": total, "content": vacancies_content[vid]["content"], "link": vacancies_content[vid]["link"]})

-    scored.sort(key=lambda x: x["score"], reverse=True)
-    for i in range(20):
-        print(f"{scored[i]['content']} {scored[i]['score']}")
-    return scored[0]["id"], scored[0]["content"], scored[0]["link"]
+    prompt = f"""
+    Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс.
+    Если среди вакансий нет подходящих, то верни -1.
+    В ответе выведи только число.
+    {scored}
+    """
+    openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
+    response = openai_client.invoke(prompt)
+    index = int(response.content)
+    if index == -1:
+        return None
+
+    return scored[index]["id"], scored[index]["content"], scored[index]["link"]


 def extract_features(content: str) -> VacancyFeatures: