Extract original title from vacancies
This commit is contained in:
parent
d3d4766abb
commit
8a8dd532dc
@ -5,11 +5,14 @@ import traceback
|
|||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from asgiref.sync import sync_to_async
|
from asgiref.sync import sync_to_async
|
||||||
|
from django.conf import settings
|
||||||
from langchain.agents import create_agent
|
from langchain.agents import create_agent
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
|
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
|
||||||
|
from openai import AsyncOpenAI
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from pypdf import PdfReader
|
from pypdf import PdfReader
|
||||||
|
from qdrant_client import AsyncQdrantClient
|
||||||
from telegram import (
|
from telegram import (
|
||||||
InlineKeyboardButton,
|
InlineKeyboardButton,
|
||||||
InlineKeyboardMarkup,
|
InlineKeyboardMarkup,
|
||||||
@ -29,9 +32,6 @@ from vacancies.conf.settings import DB_URI
|
|||||||
from vacancies.main import prompts
|
from vacancies.main import prompts
|
||||||
from vacancies.main.models import Customer, CustomerCV, JobTitle
|
from vacancies.main.models import Customer, CustomerCV, JobTitle
|
||||||
from vacancies.main.recommendations import get_next_vacancy
|
from vacancies.main.recommendations import get_next_vacancy
|
||||||
from django.conf import settings
|
|
||||||
from qdrant_client import AsyncQdrantClient
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
|
|
||||||
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
|
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
|
||||||
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
|
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
|
||||||
@ -150,7 +150,7 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||||||
min_salary_rub: int | None
|
min_salary_rub: int | None
|
||||||
max_salary_rub: int | None
|
max_salary_rub: int | None
|
||||||
|
|
||||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
|
openai_client = ChatOpenAI(base_url="https://openrouter.ai/api/v1", model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
|
||||||
structured_llm = openai_client.with_structured_output(Structure)
|
structured_llm = openai_client.with_structured_output(Structure)
|
||||||
|
|
||||||
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'
|
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'
|
||||||
|
|||||||
@ -1,18 +1,24 @@
|
|||||||
|
import re
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
|
|
||||||
from vacancies.main.models import Vacancy, JobTitle
|
|
||||||
from flashrank import Ranker, RerankRequest
|
from flashrank import Ranker, RerankRequest
|
||||||
import re
|
from langchain_openai import ChatOpenAI
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
tags_regex = re.compile('<.*?>')
|
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
|
||||||
|
from vacancies.main import prompts
|
||||||
|
from vacancies.main.models import JobTitle, Vacancy
|
||||||
|
|
||||||
|
tags_regex = re.compile('<.*?>')
|
||||||
|
|
||||||
reranker = Ranker("ms-marco-TinyBERT-L-2-v2")
|
reranker = Ranker("ms-marco-TinyBERT-L-2-v2")
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = "Collect vacancies from hh.ru parser"
|
help = "Collect vacancies from hh.ru parser"
|
||||||
|
|
||||||
def _remove_tags(self, text):
|
def _remove_tags(self, text):
|
||||||
return re.sub(tags_regex, "", text)
|
return re.sub(tags_regex, "", text)
|
||||||
|
|
||||||
@ -23,6 +29,26 @@ class Command(BaseCommand):
|
|||||||
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
|
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
|
||||||
total_vacancies = queryset.count()
|
total_vacancies = queryset.count()
|
||||||
|
|
||||||
|
# job_titles = JobTitle.objects.values_list('title', flat=True)
|
||||||
|
class Structure(BaseModel):
|
||||||
|
# job_title: Literal[tuple(job_titles)]
|
||||||
|
# original_title: str
|
||||||
|
# min_salary_rub: int | None
|
||||||
|
# max_salary_rub: int | None
|
||||||
|
# company_name: str
|
||||||
|
requirements: str
|
||||||
|
|
||||||
|
openai_client = ChatOpenAI(
|
||||||
|
model_name="openai/gpt-5-mini",
|
||||||
|
openai_api_base="https://openrouter.ai/api/v1",
|
||||||
|
temperature=0,
|
||||||
|
seed=42,
|
||||||
|
top_p=1,
|
||||||
|
)
|
||||||
|
structured_llm = openai_client.with_structured_output(Structure)
|
||||||
|
prompt = prompts.STRUCTURED_OUTPUT_PROMPT
|
||||||
|
response = structured_llm.invoke(prompt)
|
||||||
|
|
||||||
for index, vacancy in enumerate(queryset):
|
for index, vacancy in enumerate(queryset):
|
||||||
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
|
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
|
||||||
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
|
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
|
||||||
@ -32,10 +58,11 @@ class Command(BaseCommand):
|
|||||||
external_id=vacancy.id,
|
external_id=vacancy.id,
|
||||||
defaults=dict(
|
defaults=dict(
|
||||||
job_title_id=job_title_id,
|
job_title_id=job_title_id,
|
||||||
|
original_title=vacancy.title,
|
||||||
min_salary_rub=vacancy.min_payment,
|
min_salary_rub=vacancy.min_payment,
|
||||||
max_salary_rub=vacancy.max_payment,
|
max_salary_rub=vacancy.max_payment,
|
||||||
company_name=vacancy.company,
|
company_name=vacancy.company,
|
||||||
requirements=self._remove_tags(vacancy.description),
|
requirements=response.requirements,
|
||||||
content=self._remove_tags(vacancy.description),
|
content=self._remove_tags(vacancy.description),
|
||||||
timestamp=timezone.make_aware(vacancy.created_at),
|
timestamp=timezone.make_aware(vacancy.created_at),
|
||||||
link=vacancy.link,
|
link=vacancy.link,
|
||||||
|
|||||||
@ -42,6 +42,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
class Structure(BaseModel):
|
class Structure(BaseModel):
|
||||||
job_title: Literal[tuple(job_titles)]
|
job_title: Literal[tuple(job_titles)]
|
||||||
|
original_title: str
|
||||||
min_salary_rub: int | None
|
min_salary_rub: int | None
|
||||||
max_salary_rub: int | None
|
max_salary_rub: int | None
|
||||||
company_name: str
|
company_name: str
|
||||||
@ -73,6 +74,7 @@ class Command(BaseCommand):
|
|||||||
vacancies.append(Vacancy(
|
vacancies.append(Vacancy(
|
||||||
external_id=id,
|
external_id=id,
|
||||||
job_title_id=job_title_map[response.job_title],
|
job_title_id=job_title_map[response.job_title],
|
||||||
|
original_title=response.original_title,
|
||||||
min_salary_rub=response.min_salary_rub,
|
min_salary_rub=response.min_salary_rub,
|
||||||
max_salary_rub=response.max_salary_rub,
|
max_salary_rub=response.max_salary_rub,
|
||||||
company_name=response.company_name,
|
company_name=response.company_name,
|
||||||
|
|||||||
18
vacancies/main/migrations/0014_vacancy_original_title.py
Normal file
18
vacancies/main/migrations/0014_vacancy_original_title.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# Generated by Django 5.2.7 on 2025-12-03 19:19
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('main', '0013_alter_vacancy_options'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='vacancy',
|
||||||
|
name='original_title',
|
||||||
|
field=models.CharField(blank=True, max_length=255, null=True),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -42,6 +42,7 @@ class CustomerCV(models.Model):
|
|||||||
|
|
||||||
class Vacancy(models.Model):
|
class Vacancy(models.Model):
|
||||||
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
|
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
|
||||||
|
original_title = models.CharField(max_length=255, null=True, blank=True)
|
||||||
external_id = models.CharField(max_length=255, unique=True)
|
external_id = models.CharField(max_length=255, unique=True)
|
||||||
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
||||||
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
||||||
|
|||||||
@ -16,7 +16,7 @@ BOT_SYSTEM_PROMPT = """
|
|||||||
|
|
||||||
|
|
||||||
STRUCTURED_OUTPUT_PROMPT = """
|
STRUCTURED_OUTPUT_PROMPT = """
|
||||||
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
|
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
|
||||||
You also need to analyze vacansies and structure the information from them according to the scheme.
|
You also need to analyze vacansies and structure the information from them according to the scheme.
|
||||||
|
|
||||||
You don't need to change or invent anything in the job posting below. You only need to structure the information provided.
|
You don't need to change or invent anything in the job posting below. You only need to structure the information provided.
|
||||||
@ -49,15 +49,16 @@ Example vacancy:
|
|||||||
|
|
||||||
🔥 Что мы предлагаем:
|
🔥 Что мы предлагаем:
|
||||||
• Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге
|
• Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге
|
||||||
• IT-ипотека и оформление в аккредитованную IT-компанию
|
• IT-ипотека и оформление в аккредитованную IT-компанию
|
||||||
• Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
|
• Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
|
||||||
• Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия
|
• Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия
|
||||||
|
|
||||||
💘 Контакты: @Alens_HR'
|
💘 Контакты: @Alens_HR'
|
||||||
|
|
||||||
Structured output of the example vacansy:
|
Structured output of the example vacancy:
|
||||||
{
|
{
|
||||||
job_title: "Network Security Team lead - Infrastructure Security",
|
job_title: "Network Security lead",
|
||||||
|
original_title: "Network Security Team lead - Infrastructure Security",
|
||||||
company_name: "Wildberries",
|
company_name: "Wildberries",
|
||||||
min_salary_rub: None,
|
min_salary_rub: None,
|
||||||
max_salary_rub: 500000,
|
max_salary_rub: 500000,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user