Compare commits

..

1 Commits

Author SHA1 Message Date
V1ammer
8a8dd532dc Extract original title from vacancies 2025-12-04 00:51:28 +03:00
7 changed files with 64 additions and 15 deletions

0
manage.py Executable file → Normal file
View File

View File

@ -5,11 +5,14 @@ import traceback
from typing import Literal
from asgiref.sync import sync_to_async
from django.conf import settings
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from openai import AsyncOpenAI
from pydantic import BaseModel
from pypdf import PdfReader
from qdrant_client import AsyncQdrantClient
from telegram import (
InlineKeyboardButton,
InlineKeyboardMarkup,
@ -29,9 +32,6 @@ from vacancies.conf.settings import DB_URI
from vacancies.main import prompts
from vacancies.main.models import Customer, CustomerCV, JobTitle
from vacancies.main.recommendations import get_next_vacancy
from django.conf import settings
from qdrant_client import AsyncQdrantClient
from openai import AsyncOpenAI
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
@ -150,7 +150,7 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
openai_client = ChatOpenAI(base_url="https://openrouter.ai/api/v1", model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'

View File

@ -1,9 +1,15 @@
import re
from typing import Literal
from django.core.management import BaseCommand
from django.utils import timezone
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
from vacancies.main.models import Vacancy, JobTitle
from flashrank import Ranker, RerankRequest
import re
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
from vacancies.main import prompts
from vacancies.main.models import JobTitle, Vacancy
tags_regex = re.compile('<.*?>')
@ -23,6 +29,26 @@ class Command(BaseCommand):
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
total_vacancies = queryset.count()
# job_titles = JobTitle.objects.values_list('title', flat=True)
class Structure(BaseModel):
# job_title: Literal[tuple(job_titles)]
# original_title: str
# min_salary_rub: int | None
# max_salary_rub: int | None
# company_name: str
requirements: str
openai_client = ChatOpenAI(
model_name="openai/gpt-5-mini",
openai_api_base="https://openrouter.ai/api/v1",
temperature=0,
seed=42,
top_p=1,
)
structured_llm = openai_client.with_structured_output(Structure)
prompt = prompts.STRUCTURED_OUTPUT_PROMPT
response = structured_llm.invoke(prompt)
for index, vacancy in enumerate(queryset):
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
@ -32,10 +58,11 @@ class Command(BaseCommand):
external_id=vacancy.id,
defaults=dict(
job_title_id=job_title_id,
original_title=vacancy.title,
min_salary_rub=vacancy.min_payment,
max_salary_rub=vacancy.max_payment,
company_name=vacancy.company,
requirements=self._remove_tags(vacancy.description),
requirements=response.requirements,
content=self._remove_tags(vacancy.description),
timestamp=timezone.make_aware(vacancy.created_at),
link=vacancy.link,

View File

@ -42,6 +42,7 @@ class Command(BaseCommand):
class Structure(BaseModel):
job_title: Literal[tuple(job_titles)]
original_title: str
min_salary_rub: int | None
max_salary_rub: int | None
company_name: str
@ -73,6 +74,7 @@ class Command(BaseCommand):
vacancies.append(Vacancy(
external_id=id,
job_title_id=job_title_map[response.job_title],
original_title=response.original_title,
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
company_name=response.company_name,

View File

@ -0,0 +1,18 @@
# Generated by Django 5.2.7 on 2025-12-03 19:19
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0013_alter_vacancy_options'),
]
operations = [
migrations.AddField(
model_name='vacancy',
name='original_title',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View File

@ -42,6 +42,7 @@ class CustomerCV(models.Model):
class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
original_title = models.CharField(max_length=255, null=True, blank=True)
external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)

View File

@ -55,9 +55,10 @@ Example vacancy:
💘 Контакты: @Alens_HR'
Structured output of the example vacansy:
Structured output of the example vacancy:
{
job_title: "Network Security Team lead - Infrastructure Security",
job_title: "Network Security lead",
original_title: "Network Security Team lead - Infrastructure Security",
company_name: "Wildberries",
min_salary_rub: None,
max_salary_rub: 500000,