Compare commits

..

2 Commits

Author SHA1 Message Date
55ee3b7ba4 Implement simplified recommendations
All checks were successful
release / docker (push) Successful in 45s
2025-11-09 13:04:31 +03:00
b31ef06ec0 Update readme 2025-11-08 19:19:26 +03:00
15 changed files with 798 additions and 992 deletions

View File

@ -1,24 +1,40 @@
# vision-career-backend # vision-career
Sample `.env`: Sample `.env`:
```dotenv ```dotenv
DEEPINFRA_API_TOKEN=your-token-here
OPENAI_API_KEY=your-token-here OPENAI_API_KEY=your-token-here
OPENAI_PROXY=http://user:password@host:port
BOT_TOKEN=your-token-here BOT_TOKEN=your-token-here
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
SECRET_KEY=secret
DEBUG=true
``` ```
Commands: Commands:
```bash ```bash
docker compose up -d
KUBECONFIG=clickhouse-kubeconfig.yaml kubectl port-forward svc/clickhouse-clickhouse -n clickhouse 18123:8123
uv sync
uv run --env-file .env manage.py migrate uv run --env-file .env manage.py migrate
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
uv run --env-file .env manage.py collectstatic
uv run --env-file .env manage.py runserver uv run --env-file .env manage.py runserver
uv run --env-file .env manage.py generate_recommended_vacancies uv run --env-file .env manage.py generate_recommended_vacancies
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
uv run --env-file .env manage.py runbot uv run --env-file .env manage.py runbot
``` ```
Production port-forwards:
```bash
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/main-cluster-rw -n postgresql-cluster 5432
```

View File

@ -1,11 +1,4 @@
services: services:
qdrant:
image: qdrant/qdrant:latest
restart: always
ports:
- "127.0.0.1:6333:6333"
volumes:
- "/srv/vision-career/qdrant:/qdrant/storage"
postgres: postgres:
image: postgres:17-alpine3.20 image: postgres:17-alpine3.20
restart: always restart: always

View File

@ -10,7 +10,6 @@ dependencies = [
"gunicorn>=23.0.0", "gunicorn>=23.0.0",
"langchain>=0.3.27", "langchain>=0.3.27",
"langchain-openai>=0.3.35", "langchain-openai>=0.3.35",
"langchain-qdrant>=1.1.0",
"langgraph-checkpoint-postgres>=3.0.0", "langgraph-checkpoint-postgres>=3.0.0",
"psycopg[binary]>=3.2.12", "psycopg[binary]>=3.2.12",
"pydantic>=2.0", "pydantic>=2.0",

1256
uv.lock

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +1,27 @@
from django.contrib import admin from django.contrib import admin
from vacancies.main import models from vacancies.main import models
@admin.register(models.Customer) @admin.register(models.Customer)
class CustomerAdmin(admin.ModelAdmin): class CustomerAdmin(admin.ModelAdmin):
pass pass
@admin.register(models.CustomerCV) @admin.register(models.CustomerCV)
class CustomerCVADMIN(admin.ModelAdmin): class CustomerCVAdmin(admin.ModelAdmin):
pass pass
@admin.register(models.RecommendedVacancy) @admin.register(models.RecommendedVacancy)
class RecommendedVacancyAdmin(admin.ModelAdmin): class RecommendedVacancyAdmin(admin.ModelAdmin):
pass pass
@admin.register(models.Vacancy)
class VacancyAdmin(admin.ModelAdmin):
pass
@admin.register(models.JobTitle)
class JobTitleAdmin(admin.ModelAdmin):
pass

View File

@ -1,8 +1,8 @@
import io import io
import asyncio
import os import os
import traceback import traceback
from asgiref.sync import sync_to_async
from langchain.agents import create_agent from langchain.agents import create_agent
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
@ -22,14 +22,11 @@ from telegram.ext import (
filters, filters,
) )
from pydantic import BaseModel
from typing import Literal
from vacancies.conf.settings import DB_URI from vacancies.conf.settings import DB_URI
from vacancies.main.models import Customer, CustomerCV from vacancies.main.models import Customer, CustomerCV, JobTitle
from vacancies.main.vector_store import ( from vacancies.main.recommendations import get_next_vacancy
add_vectors,
batch_extract_features,
get_next_vacancy,
embed_features,
)
SYSTEM_PROMPT = """ SYSTEM_PROMPT = """
Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры. Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
@ -69,19 +66,17 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id, text=message) await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return return
result = get_next_vacancy(customer_cv) vacancy = get_next_vacancy(customer_cv)
if not result: if not vacancy:
message = "Вакансии закончились, возвращайтесь позже!" message = "Вакансии закончились, возвращайтесь позже!"
await context.bot.send_message(chat_id=update.effective_chat.id, text=message) await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return return
recommendation, vacancy_content, link = result
await context.bot.send_message( await context.bot.send_message(
chat_id=update.effective_chat.id, chat_id=update.effective_chat.id,
text=vacancy_content, text=vacancy.content,
reply_markup=InlineKeyboardMarkup([[ reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=link), InlineKeyboardButton("Откликнуться", url=vacancy.link),
]]), ]]),
) )
@ -123,22 +118,34 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
reader = PdfReader(buffer) reader = PdfReader(buffer)
resume = "\n".join(page.extract_text() for page in reader.pages) resume = "\n".join(page.extract_text() for page in reader.pages)
db_job_titles = await sync_to_async(list)(JobTitle.objects.values_list('title', flat=True))
job_title_map = await sync_to_async(dict)(JobTitle.objects.values_list('title', 'id'))
class Structure(BaseModel):
job_titles: list[Literal[tuple(db_job_titles)]]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
prompt = f"""
Ты HR-классификатор. Ниже приведён список допустимых профессий.
Твоя задача выбрать наиболее подходящие по смыслу.
Качество классификации - самое важное.
Игнорируй орфографические и стилистические различия.
Резюме:
{resume}
"""
response = await structured_llm.ainvoke(prompt)
customer = await Customer.objects.aget(telegram_id=update.effective_user.id) customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict( customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
content=resume, content=resume,
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
)) ))
await customer_cv.job_titles.aset([job_title_map[job_title] for job_title in response.job_titles])
def upload_vectors():
features = batch_extract_features([customer_cv.content])[0]
add_vectors(
"cvs",
customer_cv.id,
features.model_dump(),
{'content': customer_cv.content, 'features_json': features.model_dump()},
embed_features(features.model_dump()),
)
await asyncio.to_thread(upload_vectors)
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id) await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)

View File

@ -1,18 +1,14 @@
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from itertools import batched from itertools import batched
from datetime import timedelta
from django.utils import timezone
from pydantic import BaseModel
from typing import Literal
from vacancies.main.models import Vacancy, JobTitle
from langchain_openai import ChatOpenAI
import clickhouse_connect import clickhouse_connect
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.conf import settings from django.conf import settings
from qdrant_client.models import OrderBy
from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
embed_features,
qdrant_client,
)
query = """ query = """
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
@ -38,23 +34,49 @@ class Command(BaseCommand):
help = "Collect vacancies from telegram messages" help = "Collect vacancies from telegram messages"
def handle(self, *args, **options): def handle(self, *args, **options):
response = qdrant_client.scroll(collection_name="vacancies", limit=1, order_by=OrderBy(key="timestamp", direction="desc")) job_titles = JobTitle.objects.values_list('title', flat=True)
last_point_timestamp = datetime.now() - timedelta(days=30) job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
if response[0]:
last_point_timestamp = response[0][0].payload["timestamp"] class Structure(BaseModel):
job_title: Literal[tuple(job_titles)]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
last_timestamp = timezone.now() - timedelta(days=30)
if last_vacancy := Vacancy.objects.order_by("-timestamp").first():
last_timestamp = last_vacancy.timestamp
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT) clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_point_timestamp}).result_rows result_rows = clickhouse_client.query(query, parameters={"timestamp": last_timestamp}).result_rows
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)): batches = list(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
vacancies_features = batch_extract_features([row[3] for row in rows]) for index, rows in enumerate(batches):
prompts = [
print(f"Processing {index+1}/{len(result_rows)//settings.COLLECT_VACANCIES_BATCH_SIZE}") f"""
with ThreadPoolExecutor() as pool: Ты HR-классификатор. Ниже приведён список допустимых профессий.
vacancies_vectors = pool.map(embed_features, [vacancy_features.model_dump() for vacancy_features in vacancies_features]) Твоя задача выбрать наиболее подходящую по смыслу.
Качество классификации - самое важное.
for row, vacancy_features, vacancy_vectors in zip(rows, vacancies_features, vacancies_vectors): Игнорируй орфографические и стилистические различия.
Вакансия:
{row[3]}
"""
for row in rows
]
responses = structured_llm.batch(prompts)
vacancies = []
for row, response in zip(rows, responses):
(id, chat_username, telegram_id, message, timestamp) = row (id, chat_username, telegram_id, message, timestamp) = row
link = f"https://t.me/{chat_username}/{telegram_id}" vacancies.append(Vacancy(
payload = {'content': message, 'features_json': vacancy_features.model_dump(), "link": link, "timestamp": timestamp} external_id=id,
add_vectors("vacancies", id, vacancy_features.model_dump(), payload, vacancy_vectors) job_title_id=job_title_map[response.job_title],
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
content=message,
timestamp=timezone.make_aware(timestamp),
link=f"https://t.me/{chat_username}/{telegram_id}",
))
Vacancy.objects.bulk_create(vacancies, ignore_conflicts=True)
print(f"Processed {index+1}/{len(batches)}")

View File

@ -3,7 +3,7 @@ import asyncio
from django.core.management import BaseCommand from django.core.management import BaseCommand
from vacancies.main.models import CustomerCV from vacancies.main.models import CustomerCV
from vacancies.main.bot import application from vacancies.main.bot import application
from vacancies.main.vector_store import get_next_vacancy from vacancies.main.recommendations import get_next_vacancy
from telegram import InlineKeyboardButton, InlineKeyboardMarkup from telegram import InlineKeyboardButton, InlineKeyboardMarkup
@ -15,16 +15,11 @@ class Command(BaseCommand):
async def ahandle(self, *args, **options): async def ahandle(self, *args, **options):
for customer_cv in CustomerCV.objects.all(): for customer_cv in CustomerCV.objects.all():
result = get_next_vacancy(customer_cv) if vacancy := get_next_vacancy(customer_cv):
if not result:
continue
recommendation, vacancy_content, link = result
await application.bot.send_message( await application.bot.send_message(
chat_id=recommendation.customer.chat_id, chat_id=customer_cv.customer.chat_id,
text=vacancy_content, text=vacancy.content,
reply_markup=InlineKeyboardMarkup([[ reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=link), InlineKeyboardButton("Откликнуться", url=vacancy.link),
]]), ]]),
) )

View File

@ -0,0 +1,55 @@
# Generated by Django 5.2.7 on 2025-11-08 19:11
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0008_alter_recommendedvacancy_vacancy_id'),
]
operations = [
migrations.CreateModel(
name='JobTitle',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=255, unique=True)),
],
),
migrations.AddField(
model_name='customercv',
name='max_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='min_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='job_title',
field=models.ForeignKey(default=0, on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle'),
preserve_default=False,
),
migrations.CreateModel(
name='Vacancy',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('external_id', models.CharField(max_length=255, unique=True)),
('min_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('max_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('content', models.TextField()),
('timestamp', models.DateTimeField()),
('link', models.URLField()),
('job_title', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle')),
],
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy_id',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.vacancy'),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 5.2.7 on 2025-11-09 08:06
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('main', '0009_jobtitle_customercv_max_salary_rub_and_more'),
]
operations = [
migrations.RenameField(
model_name='recommendedvacancy',
old_name='vacancy_id',
new_name='vacancy',
),
]

View File

@ -0,0 +1,33 @@
# Generated by Django 5.2.7 on 2025-11-09 09:35
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0010_rename_vacancy_id_recommendedvacancy_vacancy'),
]
operations = [
migrations.RemoveField(
model_name='customercv',
name='job_title',
),
migrations.AddField(
model_name='customercv',
name='job_titles',
field=models.ManyToManyField(related_name='vacancies', to='main.jobtitle'),
),
migrations.AlterField(
model_name='recommendedvacancy',
name='customer',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.customer'),
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.vacancy'),
),
]

View File

@ -1,5 +1,4 @@
from django.db import models from django.db import models
from pydantic import BaseModel
class Customer(models.Model): class Customer(models.Model):
@ -17,8 +16,18 @@ class Customer(models.Model):
db_table = "customers" db_table = "customers"
class JobTitle(models.Model):
title = models.CharField(max_length=255, unique=True)
def __str__(self):
return self.title
class CustomerCV(models.Model): class CustomerCV(models.Model):
customer = models.OneToOneField(Customer, on_delete=models.CASCADE) customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
job_titles = models.ManyToManyField(JobTitle, related_name="vacancies")
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField() content = models.TextField()
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
@ -31,9 +40,22 @@ class CustomerCV(models.Model):
db_table = "customer_vcs" db_table = "customer_vcs"
class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField()
timestamp = models.DateTimeField()
link = models.URLField()
def __str__(self):
return self.job_title.title
class RecommendedVacancy(models.Model): class RecommendedVacancy(models.Model):
customer = models.ForeignKey(Customer, on_delete=models.CASCADE) customer = models.ForeignKey(Customer, on_delete=models.CASCADE, related_name="recommended_vacancies")
vacancy_id = models.BigIntegerField() vacancy = models.ForeignKey(Vacancy, on_delete=models.CASCADE, related_name="recommended_vacancies")
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
objects = models.Manager() objects = models.Manager()
@ -44,19 +66,3 @@ class RecommendedVacancy(models.Model):
class Meta: class Meta:
verbose_name_plural = 'Recommended Vacancies' verbose_name_plural = 'Recommended Vacancies'
db_table = "recommended_vacancies" db_table = "recommended_vacancies"
class VacancyFeatures(BaseModel):
job_title: str | None = None # Должность
employment_type: str | None = None # Тип занятости
work_format: str | None = None # Формат работы
experience: str | None = None # Опыт работы
position_level: str | None = None # Уровень позиции
industry: str | None = None # Отрасль / Сфера деятельности
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
location: str | None = None # География
salary_range: str | None = None # Зарплатные ожидания / вилка
languages: list[str] | None = None # Языки
education: str | None = None # Образование
schedule: str | None = None # График работы
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования

View File

@ -0,0 +1,14 @@
from vacancies.main.models import Vacancy
def get_next_vacancy(customer_cv):
vacancy = Vacancy.objects.exclude(
id__in=customer_cv.customer.recommended_vacancies.values_list("vacancy_id", flat=True),
).filter(
job_title__title__in=customer_cv.job_titles.values_list("title", flat=True),
min_salary_rub__gt=customer_cv.min_salary_rub,
).first()
if vacancy:
customer_cv.customer.recommended_vacancies.create(vacancy=vacancy)
return vacancy

View File

@ -1,171 +0,0 @@
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from qdrant_client import QdrantClient, models
from qdrant_client.models import Filter, HasIdCondition
from vacancies.conf.settings import QDRANT_URL
from vacancies.main.models import RecommendedVacancy, VacancyFeatures
qdrant_client = QdrantClient(url=QDRANT_URL)
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
]
weights = {
"job_title": 70,
"tech_stack": 10,
"salary_range": 10,
}
vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not qdrant_client.collection_exists("vacancies"):
qdrant_client.create_collection(
collection_name="vacancies",
vectors_config=vectors_config,
)
qdrant_client.create_payload_index(
collection_name="vacancies",
field_name="timestamp",
field_schema="datetime",
)
if not qdrant_client.collection_exists("cvs"):
qdrant_client.create_collection(
collection_name="cvs",
vectors_config=vectors_config,
)
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
def _prepare_texts(features):
texts = {}
for name in FEATURE_NAMES:
value = features.get(name)
if isinstance(value, list):
text = " ".join(value) if value else ""
else:
text = str(value) if value else ""
texts[name] = text
return texts
def embed_features(features):
features = {key: value for key, value in features.items() if value}
features_texts = _prepare_texts(features)
names, texts = features_texts.keys(), features_texts.values()
vectors = dict(zip(names, embedding.embed_documents(texts)))
return vectors
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors):
max_similarities = {}
for name, vec in vectors.items():
results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100)
for res in results.points:
max_similarities.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True)
if scored and scored[0]["score"] > 80: # threshold
return
qdrant_client.upsert(
collection_name=collection_name,
points=[models.PointStruct(id=_id, vector=vectors, payload=payload)]
)
def search_similarities(query_filter: Filter, cv_id: int):
cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0]
max_similarities, vacancies_content = {}, {}
for name, vec in cv.vector.items():
results = qdrant_client.query_points(
collection_name="vacancies",
query=vec,
using=name,
limit=100000,
with_payload=True,
query_filter=query_filter,
)
for res in results.points:
max_similarities.setdefault(res.id, {})
vacancies_content.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
vacancies_content[res.id]["content"] = res.payload["content"]
vacancies_content[res.id]["features_json"] = res.payload["features_json"]
vacancies_content[res.id]["link"] = res.payload["link"]
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({
"id": vid,
"score": total,
"content": vacancies_content[vid]["content"],
"features_json": vacancies_content[vid]["features_json"],
"link": vacancies_content[vid]["link"],
"sims": feature_sims,
})
scored.sort(key=lambda x: x["score"], reverse=True)
return scored[0]["id"], scored[0]["content"], scored[0]["link"]
def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]:
prompts = [
f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- job_title: Должность (e.g., DevOps, Python программист)
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
for content in contents
]
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.batch(prompts)
return response
def get_next_vacancy(customer_cv):
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
customer=customer_cv.customer,
).values_list('vacancy_id', flat=True)
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
result = search_similarities(query_filter, customer_cv.id)
if not result:
return None
search_result_id, vacancy_content, link = result
recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer,
vacancy_id=search_result_id,
)
return recommendation, vacancy_content, link