From 8a8dd532dc7c2b5e4ebcfdb02f7d99bd6d4529d0 Mon Sep 17 00:00:00 2001 From: V1ammer Date: Thu, 4 Dec 2025 00:51:28 +0300 Subject: [PATCH] Extract original title from vacancies --- manage.py | 0 vacancies/main/bot.py | 8 ++-- .../collect_vacancies_from_hh_parser.py | 39 ++++++++++++++++--- ...ollect_vacancies_from_telegram_messages.py | 2 + .../migrations/0014_vacancy_original_title.py | 18 +++++++++ vacancies/main/models.py | 1 + vacancies/main/prompts.py | 11 +++--- 7 files changed, 64 insertions(+), 15 deletions(-) mode change 100755 => 100644 manage.py create mode 100644 vacancies/main/migrations/0014_vacancy_original_title.py diff --git a/manage.py b/manage.py old mode 100755 new mode 100644 diff --git a/vacancies/main/bot.py b/vacancies/main/bot.py index c265b81..91a5fb0 100644 --- a/vacancies/main/bot.py +++ b/vacancies/main/bot.py @@ -5,11 +5,14 @@ import traceback from typing import Literal from asgiref.sync import sync_to_async +from django.conf import settings from langchain.agents import create_agent from langchain_openai import ChatOpenAI from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver +from openai import AsyncOpenAI from pydantic import BaseModel from pypdf import PdfReader +from qdrant_client import AsyncQdrantClient from telegram import ( InlineKeyboardButton, InlineKeyboardMarkup, @@ -29,9 +32,6 @@ from vacancies.conf.settings import DB_URI from vacancies.main import prompts from vacancies.main.models import Customer, CustomerCV, JobTitle from vacancies.main.recommendations import get_next_vacancy -from django.conf import settings -from qdrant_client import AsyncQdrantClient -from openai import AsyncOpenAI qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL) openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1") @@ -150,7 +150,7 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE): min_salary_rub: int | None max_salary_rub: int | None - openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1) + openai_client = ChatOpenAI(base_url="https://openrouter.ai/api/v1", model_name="gpt-5-mini", temperature=0, seed=42, top_p=1) structured_llm = openai_client.with_structured_output(Structure) prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}' diff --git a/vacancies/main/management/commands/collect_vacancies_from_hh_parser.py b/vacancies/main/management/commands/collect_vacancies_from_hh_parser.py index a3dbc0f..728f4ab 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_hh_parser.py +++ b/vacancies/main/management/commands/collect_vacancies_from_hh_parser.py @@ -1,18 +1,24 @@ +import re +from typing import Literal + from django.core.management import BaseCommand from django.utils import timezone -from vacancies.hh_parser.models import Vacancy as ExternalVacancy -from vacancies.main.models import Vacancy, JobTitle from flashrank import Ranker, RerankRequest -import re +from langchain_openai import ChatOpenAI +from pydantic import BaseModel -tags_regex = re.compile('<.*?>') +from vacancies.hh_parser.models import Vacancy as ExternalVacancy +from vacancies.main import prompts +from vacancies.main.models import JobTitle, Vacancy + +tags_regex = re.compile('<.*?>') reranker = Ranker("ms-marco-TinyBERT-L-2-v2") class Command(BaseCommand): help = "Collect vacancies from hh.ru parser" - + def _remove_tags(self, text): return re.sub(tags_regex, "", text) @@ -23,6 +29,26 @@ class Command(BaseCommand): queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False) total_vacancies = queryset.count() + # job_titles = JobTitle.objects.values_list('title', flat=True) + class Structure(BaseModel): + # job_title: Literal[tuple(job_titles)] + # original_title: str + # min_salary_rub: int | None + # max_salary_rub: int | None + # company_name: str + requirements: str + + openai_client = ChatOpenAI( + model_name="openai/gpt-5-mini", + openai_api_base="https://openrouter.ai/api/v1", + temperature=0, + seed=42, + top_p=1, + ) + structured_llm = openai_client.with_structured_output(Structure) + prompt = prompts.STRUCTURED_OUTPUT_PROMPT + response = structured_llm.invoke(prompt) + for index, vacancy in enumerate(queryset): results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages)) ordered_results = sorted(results, key=lambda i: i["score"], reverse=True) @@ -32,10 +58,11 @@ class Command(BaseCommand): external_id=vacancy.id, defaults=dict( job_title_id=job_title_id, + original_title=vacancy.title, min_salary_rub=vacancy.min_payment, max_salary_rub=vacancy.max_payment, company_name=vacancy.company, - requirements=self._remove_tags(vacancy.description), + requirements=response.requirements, content=self._remove_tags(vacancy.description), timestamp=timezone.make_aware(vacancy.created_at), link=vacancy.link, diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py index 2e76d32..670a086 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py +++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py @@ -42,6 +42,7 @@ class Command(BaseCommand): class Structure(BaseModel): job_title: Literal[tuple(job_titles)] + original_title: str min_salary_rub: int | None max_salary_rub: int | None company_name: str @@ -73,6 +74,7 @@ class Command(BaseCommand): vacancies.append(Vacancy( external_id=id, job_title_id=job_title_map[response.job_title], + original_title=response.original_title, min_salary_rub=response.min_salary_rub, max_salary_rub=response.max_salary_rub, company_name=response.company_name, diff --git a/vacancies/main/migrations/0014_vacancy_original_title.py b/vacancies/main/migrations/0014_vacancy_original_title.py new file mode 100644 index 0000000..e5c1e77 --- /dev/null +++ b/vacancies/main/migrations/0014_vacancy_original_title.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.7 on 2025-12-03 19:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('main', '0013_alter_vacancy_options'), + ] + + operations = [ + migrations.AddField( + model_name='vacancy', + name='original_title', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/vacancies/main/models.py b/vacancies/main/models.py index c0b1428..f454b47 100644 --- a/vacancies/main/models.py +++ b/vacancies/main/models.py @@ -42,6 +42,7 @@ class CustomerCV(models.Model): class Vacancy(models.Model): job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE) + original_title = models.CharField(max_length=255, null=True, blank=True) external_id = models.CharField(max_length=255, unique=True) min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) diff --git a/vacancies/main/prompts.py b/vacancies/main/prompts.py index 4f1c204..4433b5a 100644 --- a/vacancies/main/prompts.py +++ b/vacancies/main/prompts.py @@ -16,7 +16,7 @@ BOT_SYSTEM_PROMPT = """ STRUCTURED_OUTPUT_PROMPT = """ -You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.). +You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.). You also need to analyze vacansies and structure the information from them according to the scheme. You don't need to change or invent anything in the job posting below. You only need to structure the information provided. @@ -49,15 +49,16 @@ Example vacancy: πŸ”₯ Π§Ρ‚ΠΎ ΠΌΡ‹ ΠΏΡ€Π΅Π΄Π»Π°Π³Π°Π΅ΠΌ: β€’ Полная ΡƒΠ΄Π°Π»Π΅Π½ΠΊΠ° ΠΈΠ»ΠΈ свободноС посСщСниС офисов Π² МосквС ΠΈ Π‘Π°Π½ΠΊΡ‚-ΠŸΠ΅Ρ‚Π΅Ρ€Π±ΡƒΡ€Π³Π΅ -β€’ IT-ΠΈΠΏΠΎΡ‚Π΅ΠΊΠ° ΠΈ ΠΎΡ„ΠΎΡ€ΠΌΠ»Π΅Π½ΠΈΠ΅ Π² Π°ΠΊΠΊΡ€Π΅Π΄ΠΈΡ‚ΠΎΠ²Π°Π½Π½ΡƒΡŽ IT-компанию -β€’ БСсплатноС ΠΏΠΈΡ‚Π°Π½ΠΈΠ΅ Π² офисах, Π”ΠœΠ‘ со стоматологиСй (послС ΠΈΡΠΏΡ‹Ρ‚Π°Ρ‚Π΅Π»ΡŒΠ½ΠΎΠ³ΠΎ срока) +β€’ IT-ΠΈΠΏΠΎΡ‚Π΅ΠΊΠ° ΠΈ ΠΎΡ„ΠΎΡ€ΠΌΠ»Π΅Π½ΠΈΠ΅ Π² Π°ΠΊΠΊΡ€Π΅Π΄ΠΈΡ‚ΠΎΠ²Π°Π½Π½ΡƒΡŽ IT-компанию +β€’ БСсплатноС ΠΏΠΈΡ‚Π°Π½ΠΈΠ΅ Π² офисах, Π”ΠœΠ‘ со стоматологиСй (послС ΠΈΡΠΏΡ‹Ρ‚Π°Ρ‚Π΅Π»ΡŒΠ½ΠΎΠ³ΠΎ срока) β€’ ΠžΠΏΠ»Π°Ρ‡ΠΈΠ²Π°Π΅ΠΌΡ‹Π΅ Day Off, ΠΊΠΎΡ€ΠΏΠΎΡ€Π°Ρ‚ΠΈΠ²Π½ΠΎΠ΅ ΠΎΠ±ΡƒΡ‡Π΅Π½ΠΈΠ΅ ΠΈ IT-мСроприятия πŸ’˜ ΠšΠΎΠ½Ρ‚Π°ΠΊΡ‚Ρ‹: @Alens_HR' -Structured output of the example vacansy: +Structured output of the example vacancy: { - job_title: "Network Security Team lead - Infrastructure Security", + job_title: "Network Security lead", + original_title: "Network Security Team lead - Infrastructure Security", company_name: "Wildberries", min_salary_rub: None, max_salary_rub: 500000,