Compare commits

..

4 Commits

Author SHA1 Message Date
3a7098f751 Extend structured output 2025-11-09 16:27:41 +03:00
41b3a250a7 Clean invalid vacancies from clickhouse query
All checks were successful
release / docker (push) Successful in 35s
2025-11-09 15:35:34 +03:00
96d8621d49 Remove blocking call in bot method 2025-11-09 15:30:32 +03:00
b23502ee6a Improve quality for classification 2025-11-09 15:28:51 +03:00
6 changed files with 66 additions and 16 deletions

View File

@ -0,0 +1,3 @@
import logging
logging.basicConfig(level=logging.INFO)

View File

@ -1,5 +1,6 @@
import io import io
import os import os
import asyncio
import traceback import traceback
from asgiref.sync import sync_to_async from asgiref.sync import sync_to_async
@ -66,7 +67,7 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id, text=message) await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return return
vacancy = get_next_vacancy(customer_cv) vacancy = await asyncio.to_thread(get_next_vacancy, customer_cv)
if not vacancy: if not vacancy:
message = "Вакансии закончились, возвращайтесь позже!" message = "Вакансии закончились, возвращайтесь позже!"
await context.bot.send_message(chat_id=update.effective_chat.id, text=message) await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
@ -125,25 +126,24 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
job_titles: list[Literal[tuple(db_job_titles)]] job_titles: list[Literal[tuple(db_job_titles)]]
min_salary_rub: int | None min_salary_rub: int | None
max_salary_rub: int | None max_salary_rub: int | None
years_of_experience: int | None
work_format: Literal["Удаленный", "Офис", "Гибрид", None]
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1) openai_client = ChatOpenAI(model_name="gpt-5-mini")
structured_llm = openai_client.with_structured_output(Structure) structured_llm = openai_client.with_structured_output(Structure)
prompt = f""" prompt = f"Extract sturcture from following CV. {resume}"
Ты HR-классификатор. Ниже приведён список допустимых профессий. print('1')
Твоя задача выбрать наиболее подходящие по смыслу.
Качество классификации - самое важное.
Игнорируй орфографические и стилистические различия.
Резюме:
{resume}
"""
response = await structured_llm.ainvoke(prompt) response = await structured_llm.ainvoke(prompt)
print('2')
customer = await Customer.objects.aget(telegram_id=update.effective_user.id) customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict( customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
content=resume, content=resume,
min_salary_rub=response.min_salary_rub, min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub, max_salary_rub=response.max_salary_rub,
years_of_experience=response.years_of_experience,
work_format=response.work_format,
)) ))
await customer_cv.job_titles.aset([job_title_map[job_title] for job_title in response.job_titles]) await customer_cv.job_titles.aset([job_title_map[job_title] for job_title in response.job_titles])

View File

@ -24,7 +24,8 @@ WHERE timestamp >= %(timestamp)s
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт' 'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
]) >= 5 ]) >= 5
AND arrayCount(x -> position(lower(message), x) > 0, [ AND arrayCount(x -> position(lower(message), x) > 0, [
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж' 'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж',
'не будет опубликовано'
]) = 0 ]) = 0
ORDER BY timestamp ASC ORDER BY timestamp ASC
""" """
@ -41,8 +42,10 @@ class Command(BaseCommand):
job_title: Literal[tuple(job_titles)] job_title: Literal[tuple(job_titles)]
min_salary_rub: int | None min_salary_rub: int | None
max_salary_rub: int | None max_salary_rub: int | None
min_years_of_experience: int
work_format: Literal["remote", "office", None]
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1) openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure) structured_llm = openai_client.with_structured_output(Structure)
last_timestamp = timezone.now() - timedelta(days=30) last_timestamp = timezone.now() - timedelta(days=30)
@ -59,6 +62,7 @@ class Command(BaseCommand):
Ты HR-классификатор. Ниже приведён список допустимых профессий. Ты HR-классификатор. Ниже приведён список допустимых профессий.
Твоя задача выбрать наиболее подходящую по смыслу. Твоя задача выбрать наиболее подходящую по смыслу.
Качество классификации - самое важное. Качество классификации - самое важное.
Если не уверен, то лучше укажи "Другое", ошибки недопустимы.
Игнорируй орфографические и стилистические различия. Игнорируй орфографические и стилистические различия.
Вакансия: Вакансия:
{row[3]} {row[3]}
@ -74,6 +78,8 @@ class Command(BaseCommand):
job_title_id=job_title_map[response.job_title], job_title_id=job_title_map[response.job_title],
min_salary_rub=response.min_salary_rub, min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub, max_salary_rub=response.max_salary_rub,
min_years_of_experience=response.min_years_of_experience,
work_format=response.work_format,
content=message, content=message,
timestamp=timezone.make_aware(timestamp), timestamp=timezone.make_aware(timestamp),
link=f"https://t.me/{chat_username}/{telegram_id}", link=f"https://t.me/{chat_username}/{telegram_id}",

View File

@ -0,0 +1,35 @@
# Generated by Django 5.2.7 on 2025-11-09 12:54
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0011_remove_customercv_job_title_customercv_job_titles_and_more'),
]
operations = [
migrations.AddField(
model_name='customercv',
name='work_format',
field=models.CharField(blank=True, max_length=64, null=True),
),
migrations.AddField(
model_name='customercv',
name='years_of_experience',
field=models.PositiveBigIntegerField(default=0),
preserve_default=False,
),
migrations.AddField(
model_name='vacancy',
name='min_years_of_experience',
field=models.PositiveBigIntegerField(default=0),
preserve_default=False,
),
migrations.AddField(
model_name='vacancy',
name='work_format',
field=models.CharField(blank=True, max_length=64, null=True),
),
]

View File

@ -28,6 +28,8 @@ class CustomerCV(models.Model):
job_titles = models.ManyToManyField(JobTitle, related_name="vacancies") job_titles = models.ManyToManyField(JobTitle, related_name="vacancies")
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
years_of_experience = models.PositiveBigIntegerField()
work_format = models.CharField(max_length=64, null=True, blank=True)
content = models.TextField() content = models.TextField()
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
@ -45,6 +47,8 @@ class Vacancy(models.Model):
external_id = models.CharField(max_length=255, unique=True) external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None) max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
min_years_of_experience = models.PositiveBigIntegerField()
work_format = models.CharField(max_length=64, null=True, blank=True)
content = models.TextField() content = models.TextField()
timestamp = models.DateTimeField() timestamp = models.DateTimeField()
link = models.URLField() link = models.URLField()

View File

@ -1,12 +1,14 @@
from vacancies.main.models import Vacancy from vacancies.main.models import Vacancy
from django.db.models import Q
def get_next_vacancy(customer_cv): def get_next_vacancy(customer_cv):
vacancy = Vacancy.objects.exclude( vacancy = Vacancy.objects.filter(
id__in=customer_cv.customer.recommended_vacancies.values_list("vacancy_id", flat=True), ~Q(id__in=customer_cv.customer.recommended_vacancies.values_list("vacancy_id", flat=True)),
).filter( Q(min_salary_rub__isnull=True) | Q(min_salary_rub__gt=customer_cv.min_salary_rub),
Q(work_format__isnull=True) | Q(work_format=customer_cv.work_format),
job_title__title__in=customer_cv.job_titles.values_list("title", flat=True), job_title__title__in=customer_cv.job_titles.values_list("title", flat=True),
min_salary_rub__gt=customer_cv.min_salary_rub, min_years_of_experience__lte=customer_cv.years_of_experience,
).first() ).first()
if vacancy: if vacancy:
customer_cv.customer.recommended_vacancies.create(vacancy=vacancy) customer_cv.customer.recommended_vacancies.create(vacancy=vacancy)