Extract original title from vacancies

This commit is contained in:
V1ammer 2025-12-04 00:51:28 +03:00
parent d3d4766abb
commit 8a8dd532dc
7 changed files with 64 additions and 15 deletions

0
manage.py Executable file → Normal file
View File

View File

@ -5,11 +5,14 @@ import traceback
from typing import Literal from typing import Literal
from asgiref.sync import sync_to_async from asgiref.sync import sync_to_async
from django.conf import settings
from langchain.agents import create_agent from langchain.agents import create_agent
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from openai import AsyncOpenAI
from pydantic import BaseModel from pydantic import BaseModel
from pypdf import PdfReader from pypdf import PdfReader
from qdrant_client import AsyncQdrantClient
from telegram import ( from telegram import (
InlineKeyboardButton, InlineKeyboardButton,
InlineKeyboardMarkup, InlineKeyboardMarkup,
@ -29,9 +32,6 @@ from vacancies.conf.settings import DB_URI
from vacancies.main import prompts from vacancies.main import prompts
from vacancies.main.models import Customer, CustomerCV, JobTitle from vacancies.main.models import Customer, CustomerCV, JobTitle
from vacancies.main.recommendations import get_next_vacancy from vacancies.main.recommendations import get_next_vacancy
from django.conf import settings
from qdrant_client import AsyncQdrantClient
from openai import AsyncOpenAI
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL) qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1") openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
@ -150,7 +150,7 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
min_salary_rub: int | None min_salary_rub: int | None
max_salary_rub: int | None max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1) openai_client = ChatOpenAI(base_url="https://openrouter.ai/api/v1", model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure) structured_llm = openai_client.with_structured_output(Structure)
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}' prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'

View File

@ -1,18 +1,24 @@
import re
from typing import Literal
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.utils import timezone from django.utils import timezone
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
from vacancies.main.models import Vacancy, JobTitle
from flashrank import Ranker, RerankRequest from flashrank import Ranker, RerankRequest
import re from langchain_openai import ChatOpenAI
from pydantic import BaseModel
tags_regex = re.compile('<.*?>') from vacancies.hh_parser.models import Vacancy as ExternalVacancy
from vacancies.main import prompts
from vacancies.main.models import JobTitle, Vacancy
tags_regex = re.compile('<.*?>')
reranker = Ranker("ms-marco-TinyBERT-L-2-v2") reranker = Ranker("ms-marco-TinyBERT-L-2-v2")
class Command(BaseCommand): class Command(BaseCommand):
help = "Collect vacancies from hh.ru parser" help = "Collect vacancies from hh.ru parser"
def _remove_tags(self, text): def _remove_tags(self, text):
return re.sub(tags_regex, "", text) return re.sub(tags_regex, "", text)
@ -23,6 +29,26 @@ class Command(BaseCommand):
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False) queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
total_vacancies = queryset.count() total_vacancies = queryset.count()
# job_titles = JobTitle.objects.values_list('title', flat=True)
class Structure(BaseModel):
# job_title: Literal[tuple(job_titles)]
# original_title: str
# min_salary_rub: int | None
# max_salary_rub: int | None
# company_name: str
requirements: str
openai_client = ChatOpenAI(
model_name="openai/gpt-5-mini",
openai_api_base="https://openrouter.ai/api/v1",
temperature=0,
seed=42,
top_p=1,
)
structured_llm = openai_client.with_structured_output(Structure)
prompt = prompts.STRUCTURED_OUTPUT_PROMPT
response = structured_llm.invoke(prompt)
for index, vacancy in enumerate(queryset): for index, vacancy in enumerate(queryset):
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages)) results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True) ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
@ -32,10 +58,11 @@ class Command(BaseCommand):
external_id=vacancy.id, external_id=vacancy.id,
defaults=dict( defaults=dict(
job_title_id=job_title_id, job_title_id=job_title_id,
original_title=vacancy.title,
min_salary_rub=vacancy.min_payment, min_salary_rub=vacancy.min_payment,
max_salary_rub=vacancy.max_payment, max_salary_rub=vacancy.max_payment,
company_name=vacancy.company, company_name=vacancy.company,
requirements=self._remove_tags(vacancy.description), requirements=response.requirements,
content=self._remove_tags(vacancy.description), content=self._remove_tags(vacancy.description),
timestamp=timezone.make_aware(vacancy.created_at), timestamp=timezone.make_aware(vacancy.created_at),
link=vacancy.link, link=vacancy.link,

View File

@ -42,6 +42,7 @@ class Command(BaseCommand):
class Structure(BaseModel): class Structure(BaseModel):
job_title: Literal[tuple(job_titles)] job_title: Literal[tuple(job_titles)]
original_title: str
min_salary_rub: int | None min_salary_rub: int | None
max_salary_rub: int | None max_salary_rub: int | None
company_name: str company_name: str
@ -73,6 +74,7 @@ class Command(BaseCommand):
vacancies.append(Vacancy( vacancies.append(Vacancy(
external_id=id, external_id=id,
job_title_id=job_title_map[response.job_title], job_title_id=job_title_map[response.job_title],
original_title=response.original_title,
min_salary_rub=response.min_salary_rub, min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub, max_salary_rub=response.max_salary_rub,
company_name=response.company_name, company_name=response.company_name,

View File

@ -0,0 +1,18 @@
# Generated by Django 5.2.7 on 2025-12-03 19:19
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0013_alter_vacancy_options'),
]
operations = [
migrations.AddField(
model_name='vacancy',
name='original_title',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -42,6 +42,7 @@ class CustomerCV(models.Model):
class Vacancy(models.Model): class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE) job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
original_title = models.CharField(max_length=255, null=True, blank=True)
external_id = models.CharField(max_length=255, unique=True) external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)

View File

@ -16,7 +16,7 @@ BOT_SYSTEM_PROMPT = """
STRUCTURED_OUTPUT_PROMPT = """ STRUCTURED_OUTPUT_PROMPT = """
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.). You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
You also need to analyze vacansies and structure the information from them according to the scheme. You also need to analyze vacansies and structure the information from them according to the scheme.
You don't need to change or invent anything in the job posting below. You only need to structure the information provided. You don't need to change or invent anything in the job posting below. You only need to structure the information provided.
@ -49,15 +49,16 @@ Example vacancy:
🔥 Что мы предлагаем: 🔥 Что мы предлагаем:
Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге
IT-ипотека и оформление в аккредитованную IT-компанию IT-ипотека и оформление в аккредитованную IT-компанию
Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока) Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия
💘 Контакты: @Alens_HR' 💘 Контакты: @Alens_HR'
Structured output of the example vacansy: Structured output of the example vacancy:
{ {
job_title: "Network Security Team lead - Infrastructure Security", job_title: "Network Security lead",
original_title: "Network Security Team lead - Infrastructure Security",
company_name: "Wildberries", company_name: "Wildberries",
min_salary_rub: None, min_salary_rub: None,
max_salary_rub: 500000, max_salary_rub: 500000,