Compare commits

..

No commits in common. "extract-original-title-from-vacancies" and "master" have entirely different histories.

7 changed files with 15 additions and 64 deletions

0
manage.py Normal file → Executable file
View File

View File

@ -5,14 +5,11 @@ import traceback
from typing import Literal
from asgiref.sync import sync_to_async
from django.conf import settings
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from openai import AsyncOpenAI
from pydantic import BaseModel
from pypdf import PdfReader
from qdrant_client import AsyncQdrantClient
from telegram import (
InlineKeyboardButton,
InlineKeyboardMarkup,
@ -32,6 +29,9 @@ from vacancies.conf.settings import DB_URI
from vacancies.main import prompts
from vacancies.main.models import Customer, CustomerCV, JobTitle
from vacancies.main.recommendations import get_next_vacancy
from django.conf import settings
from qdrant_client import AsyncQdrantClient
from openai import AsyncOpenAI
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
@ -150,7 +150,7 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(base_url="https://openrouter.ai/api/v1", model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'

View File

@ -1,24 +1,18 @@
import re
from typing import Literal
from django.core.management import BaseCommand
from django.utils import timezone
from flashrank import Ranker, RerankRequest
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
from vacancies.main import prompts
from vacancies.main.models import JobTitle, Vacancy
from vacancies.main.models import Vacancy, JobTitle
from flashrank import Ranker, RerankRequest
import re
tags_regex = re.compile('<.*?>')
tags_regex = re.compile('<.*?>')
reranker = Ranker("ms-marco-TinyBERT-L-2-v2")
class Command(BaseCommand):
help = "Collect vacancies from hh.ru parser"
def _remove_tags(self, text):
return re.sub(tags_regex, "", text)
@ -29,26 +23,6 @@ class Command(BaseCommand):
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
total_vacancies = queryset.count()
# job_titles = JobTitle.objects.values_list('title', flat=True)
class Structure(BaseModel):
# job_title: Literal[tuple(job_titles)]
# original_title: str
# min_salary_rub: int | None
# max_salary_rub: int | None
# company_name: str
requirements: str
openai_client = ChatOpenAI(
model_name="openai/gpt-5-mini",
openai_api_base="https://openrouter.ai/api/v1",
temperature=0,
seed=42,
top_p=1,
)
structured_llm = openai_client.with_structured_output(Structure)
prompt = prompts.STRUCTURED_OUTPUT_PROMPT
response = structured_llm.invoke(prompt)
for index, vacancy in enumerate(queryset):
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
@ -58,11 +32,10 @@ class Command(BaseCommand):
external_id=vacancy.id,
defaults=dict(
job_title_id=job_title_id,
original_title=vacancy.title,
min_salary_rub=vacancy.min_payment,
max_salary_rub=vacancy.max_payment,
company_name=vacancy.company,
requirements=response.requirements,
requirements=self._remove_tags(vacancy.description),
content=self._remove_tags(vacancy.description),
timestamp=timezone.make_aware(vacancy.created_at),
link=vacancy.link,

View File

@ -42,7 +42,6 @@ class Command(BaseCommand):
class Structure(BaseModel):
job_title: Literal[tuple(job_titles)]
original_title: str
min_salary_rub: int | None
max_salary_rub: int | None
company_name: str
@ -74,7 +73,6 @@ class Command(BaseCommand):
vacancies.append(Vacancy(
external_id=id,
job_title_id=job_title_map[response.job_title],
original_title=response.original_title,
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
company_name=response.company_name,

View File

@ -1,18 +0,0 @@
# Generated by Django 5.2.7 on 2025-12-03 19:19
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0013_alter_vacancy_options'),
]
operations = [
migrations.AddField(
model_name='vacancy',
name='original_title',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -42,7 +42,6 @@ class CustomerCV(models.Model):
class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
original_title = models.CharField(max_length=255, null=True, blank=True)
external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)

View File

@ -16,7 +16,7 @@ BOT_SYSTEM_PROMPT = """
STRUCTURED_OUTPUT_PROMPT = """
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
You also need to analyze vacansies and structure the information from them according to the scheme.
You don't need to change or invent anything in the job posting below. You only need to structure the information provided.
@ -49,16 +49,15 @@ Example vacancy:
🔥 Что мы предлагаем:
Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге
IT-ипотека и оформление в аккредитованную IT-компанию
Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
IT-ипотека и оформление в аккредитованную IT-компанию
Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия
💘 Контакты: @Alens_HR'
Structured output of the example vacancy:
Structured output of the example vacansy:
{
job_title: "Network Security lead",
original_title: "Network Security Team lead - Infrastructure Security",
job_title: "Network Security Team lead - Infrastructure Security",
company_name: "Wildberries",
min_salary_rub: None,
max_salary_rub: 500000,