Compare commits

...

20 Commits

Author SHA1 Message Date
d3d4766abb Add context from vector search to RAG pipeline
All checks were successful
release / docker (push) Successful in 23s
2025-12-02 23:38:08 +03:00
3d9e1f2239 Add sync_clickhouse_and_qdrant command
All checks were successful
release / docker (push) Successful in 35s
2025-12-02 22:55:58 +03:00
af7dbf7246 Add missing company field to external vacancy model
All checks were successful
release / docker (push) Successful in 22s
2025-12-02 20:39:01 +03:00
5e7b44f611 Make hh parser models editable in admin
All checks were successful
release / docker (push) Successful in 24s
2025-12-02 20:19:10 +03:00
9fd6c9ae97 Improve all system prompts 2025-12-02 20:18:41 +03:00
b4310bba55 Sync vacancies from hh parser
All checks were successful
release / docker (push) Successful in 35s
2025-12-01 22:28:10 +03:00
70366f8f15 Add missing migration for main app 2025-12-01 19:26:09 +03:00
2cd2862db1 Improve admin for hh parser models 2025-12-01 19:25:43 +03:00
dab73dc4c1 Add external database with parser vacancies
All checks were successful
release / docker (push) Successful in 20s
2025-11-30 14:30:34 +03:00
c053a5676d Turn on web search for RAG
All checks were successful
release / docker (push) Successful in 22s
2025-11-30 13:56:30 +03:00
80ddee9378 Use openrouter instead of vanilla openai 2025-11-30 13:56:11 +03:00
5d80864674 Extend filter for get_next_vacancy function
All checks were successful
release / docker (push) Successful in 25s
2025-11-29 16:54:02 +03:00
b224ef29d3 Improve vacancy rendering in messages
All checks were successful
release / docker (push) Successful in 38s
2025-11-09 23:41:52 +03:00
6ff25281e2 Order recommendations by timestamp 2025-11-09 22:52:05 +03:00
41b3a250a7 Clean invalid vacancies from clickhouse query
All checks were successful
release / docker (push) Successful in 35s
2025-11-09 15:35:34 +03:00
96d8621d49 Remove blocking call in bot method 2025-11-09 15:30:32 +03:00
b23502ee6a Improve quality for classification 2025-11-09 15:28:51 +03:00
55ee3b7ba4 Implement simplified recommendations
All checks were successful
release / docker (push) Successful in 45s
2025-11-09 13:04:31 +03:00
b31ef06ec0 Update readme 2025-11-08 19:19:26 +03:00
750683fb5c Fix bot cv uploading
All checks were successful
release / docker (push) Successful in 42s
2025-11-08 16:32:20 +03:00
26 changed files with 1479 additions and 918 deletions

View File

@ -1,24 +1,40 @@
# vision-career-backend
# vision-career
Sample `.env`:
```dotenv
DEEPINFRA_API_TOKEN=your-token-here
OPENAI_API_KEY=your-token-here
OPENAI_PROXY=http://user:password@host:port
BOT_TOKEN=your-token-here
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
SECRET_KEY=secret
DEBUG=true
```
Commands:
```bash
docker compose up -d
KUBECONFIG=clickhouse-kubeconfig.yaml kubectl port-forward svc/clickhouse-clickhouse -n clickhouse 18123:8123
uv sync
uv run --env-file .env manage.py migrate
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
uv run --env-file .env manage.py collectstatic
uv run --env-file .env manage.py runserver
uv run --env-file .env manage.py generate_recommended_vacancies
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
uv run --env-file .env manage.py runbot
```
Production port-forwards:
```bash
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/main-cluster-rw -n postgresql-cluster 5432
```

View File

@ -1,11 +1,4 @@
services:
qdrant:
image: qdrant/qdrant:latest
restart: always
ports:
- "127.0.0.1:6333:6333"
volumes:
- "/srv/vision-career/qdrant:/qdrant/storage"
postgres:
image: postgres:17-alpine3.20
restart: always
@ -16,3 +9,10 @@ services:
- "127.0.0.1:5432:5432"
volumes:
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
qdrant:
image: qdrant/qdrant:latest
restart: always
ports:
- "127.0.0.1:6333:6333"
volumes:
- "/srv/vision-career/qdrant:/qdrant/storage"

View File

@ -7,15 +7,16 @@ requires-python = ">=3.13"
dependencies = [
"clickhouse-connect>=0.9.2",
"django>=5.2.7",
"flashrank>=0.2.10",
"gunicorn>=23.0.0",
"langchain>=0.3.27",
"langchain-openai>=0.3.35",
"langchain-qdrant>=1.1.0",
"langgraph-checkpoint-postgres>=3.0.0",
"psycopg[binary]>=3.2.12",
"pydantic>=2.0",
"pypdf>=6.1.2",
"python-telegram-bot>=22.5",
"qdrant-client>=1.16.1",
"sentry-sdk>=2.43.0",
"whitenoise>=6.11.0",
]

1497
uv.lock

File diff suppressed because it is too large Load Diff

View File

@ -43,6 +43,7 @@ INSTALLED_APPS = [
'django.contrib.messages',
'django.contrib.staticfiles',
'vacancies.main',
'vacancies.hh_parser',
]
MIDDLEWARE = [
@ -89,8 +90,19 @@ DATABASES = {
"PORT": os.getenv("PG_PORT", "5432"),
"CONN_HEALTH_CHECKS": True,
},
"hh_parser": {
"ENGINE": "django.db.backends.postgresql",
"NAME": os.getenv("HH_PARSER_PG_NAME", "postgres"),
"USER": os.getenv("HH_PARSER_PG_USER", "postgres"),
"PASSWORD": os.getenv("HH_PARSER_PG_PASSWORD", "postgres"),
"HOST": os.getenv("HH_PARSER_PG_HOST", "localhost"),
"PORT": os.getenv("HH_PARSER_PG_PORT", "5432"),
"CONN_HEALTH_CHECKS": True,
},
}
DATABASE_ROUTERS = ["vacancies.hh_parser.routers.AppRouter"]
DB_URI = f"postgres://{DATABASES['default']['USER']}:{quote(DATABASES['default']['PASSWORD'])}@{DATABASES['default']['HOST']}:{DATABASES['default']['PORT']}/{DATABASES['default']['NAME']}?sslmode=disable"

View File

View File

@ -0,0 +1,9 @@
from django.contrib import admin
from vacancies.hh_parser import models
@admin.register(models.Vacancy)
class VacancyAdmin(admin.ModelAdmin):
list_filter = ("source", "created_at")
search_fields = ("title",)
list_display = ("company", "title", "source", "link", "created_at")

View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class HHParserConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'vacancies.hh_parser'

View File

@ -0,0 +1,32 @@
from django.db import models
class Vacancy(models.Model):
id = models.CharField(max_length=255, primary_key=True)
title = models.CharField(max_length=255, null=True, blank=True)
description = models.TextField(null=True, blank=True)
min_payment = models.IntegerField(null=True, blank=True)
max_payment = models.IntegerField(null=True, blank=True)
payment_currency = models.CharField(max_length=255, null=True, blank=True)
payment_type = models.CharField(max_length=255, null=True, blank=True)
payment_period = models.CharField(max_length=255, null=True, blank=True)
work_experience = models.CharField(max_length=255, null=True, blank=True)
type_of_employment = models.CharField(max_length=255, null=True, blank=True)
work_schedule = models.CharField(max_length=255, null=True, blank=True)
work_format = models.CharField(max_length=255, null=True, blank=True)
link = models.CharField(max_length=255, null=True, blank=True)
address = models.CharField(max_length=255, null=True, blank=True)
created_at = models.DateTimeField(null=True, blank=True)
company = models.CharField(max_length=255, null=True, blank=True)
processed = models.BooleanField()
source = models.CharField()
objects = models.Manager()
def __str__(self):
return f'{self.source}: {self.company} -> {self.id} {self.title}'
class Meta:
managed = False
db_table = "vacancies"
verbose_name_plural = 'Vacancies'

View File

@ -0,0 +1,23 @@
class AppRouter:
route_app_labels = {"hh_parser"}
db_name = "hh_parser"
def db_for_read(self, model, **hints):
if model._meta.app_label in self.route_app_labels:
return self.db_name
return None
def db_for_write(self, model, **hints):
if model._meta.app_label in self.route_app_labels:
return self.db_name
return None
def allow_relation(self, obj1, obj2, **hints):
if obj1._meta.app_label in self.route_app_labels and obj2._meta.app_label in self.route_app_labels:
return True
return None
def allow_migrate(self, db, app_label, model_name=None, **hints):
if app_label in self.route_app_labels:
return db == self.db_name
return None

View File

@ -1,16 +1,27 @@
from django.contrib import admin
from vacancies.main import models
@admin.register(models.Customer)
class CustomerAdmin(admin.ModelAdmin):
pass
@admin.register(models.CustomerCV)
class CustomerCVADMIN(admin.ModelAdmin):
class CustomerCVAdmin(admin.ModelAdmin):
pass
@admin.register(models.RecommendedVacancy)
class RecommendedVacancyAdmin(admin.ModelAdmin):
pass
@admin.register(models.Vacancy)
class VacancyAdmin(admin.ModelAdmin):
pass
@admin.register(models.JobTitle)
class JobTitleAdmin(admin.ModelAdmin):
pass

View File

@ -1,10 +1,14 @@
import asyncio
import io
import os
import traceback
from typing import Literal
from asgiref.sync import sync_to_async
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from pydantic import BaseModel
from pypdf import PdfReader
from telegram import (
InlineKeyboardButton,
@ -22,21 +26,34 @@ from telegram.ext import (
)
from vacancies.conf.settings import DB_URI
from vacancies.main.models import Customer, CustomerCV
from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
get_next_vacancy,
embed_features,
)
from vacancies.main import prompts
from vacancies.main.models import Customer, CustomerCV, JobTitle
from vacancies.main.recommendations import get_next_vacancy
from django.conf import settings
from qdrant_client import AsyncQdrantClient
from openai import AsyncOpenAI
SYSTEM_PROMPT = """
Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
У тебя есть доступ к резюме пользователя при необходимости.
Пиши кратко (до 56 строк, буллеты приветствуются).
После полезного ответа предложи что-нибудь, чем ты можешь помочь еще.
Отвечай простым текстом, не используй форматирование markdown.
"""
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
async def get_relevant_messages(query: str):
"""Получает релевантные сообщения по запросу пользователя."""
embedding = await openai_client.embeddings.create(
model="qwen/qwen3-embedding-8b",
input=query,
encoding_format="float",
)
response = await qdrant_client.query_points(
collection_name="messages",
query=embedding.data[0].embedding,
limit=20,
)
messages = [hit.payload["message"] for hit in response.points]
return "\n\n".join(messages)
async def get_user_resume(user_id: int):
@ -60,7 +77,7 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
await context.bot.send_message(update.effective_chat.id, " Обрабатываю твой запрос. Пожалуйста, подождите...")
customer_cv = await CustomerCV.objects.filter(customer__telegram_id=update.effective_user.id).afirst()
if not customer_cv:
@ -68,33 +85,36 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return
result = get_next_vacancy(customer_cv)
if not result:
vacancy = await asyncio.to_thread(get_next_vacancy, customer_cv)
if not vacancy:
message = "Вакансии закончились, возвращайтесь позже!"
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return
recommendation, vacancy_content, link = result
await context.bot.send_message(
chat_id=update.effective_chat.id,
text=vacancy_content,
parse_mode="Markdown",
text=vacancy.get_formatted_response(),
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=link),
InlineKeyboardButton("Откликнуться", url=vacancy.link),
]]),
)
async def prompt(update: Update, context: ContextTypes.DEFAULT_TYPE):
async with AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer:
chat_model = ChatOpenAI(
model_name="openai/gpt-5-mini:online",
openai_api_base="https://openrouter.ai/api/v1",
)
agent = create_agent(
model=ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal"),
tools=[get_user_resume],
system_prompt=SYSTEM_PROMPT,
model=chat_model,
tools=[get_user_resume, get_relevant_messages],
system_prompt=prompts.BOT_SYSTEM_PROMPT,
checkpointer=checkpointer,
)
message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
message = await context.bot.send_message(update.effective_chat.id, " Обрабатываю твой запрос. Пожалуйста, подождите...")
response = await agent.ainvoke(
input={"messages": [{"role": "user", "content": f'user_id = {update.effective_user.id}\n{update.message.text}'}]},
@ -110,7 +130,7 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
message = await context.bot.send_message(update.effective_chat.id, " Обрабатываю твой запрос. Пожалуйста, подождите...")
if not update.message.document:
await context.bot.send_message(chat_id=update.effective_chat.id, text="Не удалось прочитать информацию из файла! Попробуйте другой формат.")
@ -122,17 +142,27 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
reader = PdfReader(buffer)
resume = "\n".join(page.extract_text() for page in reader.pages)
db_job_titles = await sync_to_async(list)(JobTitle.objects.values_list('title', flat=True))
job_title_map = await sync_to_async(dict)(JobTitle.objects.values_list('title', 'id'))
class Structure(BaseModel):
job_titles: list[Literal[tuple(db_job_titles)]]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'
response = await structured_llm.ainvoke(prompt)
customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
content=resume,
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
))
features = batch_extract_features(customer_cv.content)[0]
add_vectors(
"cvs",
customer_cv.id,
embed_features(features.model_dump())[0],
{'content': customer_cv.content, 'features_json': features.model_dump()},
)
await customer_cv.job_titles.aset([job_title_map[job_title] for job_title in response.job_titles])
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)

View File

@ -0,0 +1,45 @@
from django.core.management import BaseCommand
from django.utils import timezone
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
from vacancies.main.models import Vacancy, JobTitle
from flashrank import Ranker, RerankRequest
import re
tags_regex = re.compile('<.*?>')
reranker = Ranker("ms-marco-TinyBERT-L-2-v2")
class Command(BaseCommand):
help = "Collect vacancies from hh.ru parser"
def _remove_tags(self, text):
return re.sub(tags_regex, "", text)
def handle(self, *args, **options):
job_titles = list(JobTitle.objects.values("id", "title"))
passages = [{"text": job_title["title"], **job_title} for job_title in job_titles]
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
total_vacancies = queryset.count()
for index, vacancy in enumerate(queryset):
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
job_title_id = ordered_results[0]["id"]
vacancy, created = Vacancy.objects.get_or_create(
external_id=vacancy.id,
defaults=dict(
job_title_id=job_title_id,
min_salary_rub=vacancy.min_payment,
max_salary_rub=vacancy.max_payment,
company_name=vacancy.company,
requirements=self._remove_tags(vacancy.description),
content=self._remove_tags(vacancy.description),
timestamp=timezone.make_aware(vacancy.created_at),
link=vacancy.link,
),
)
print(f"{index+1}/{total_vacancies} Vacancy: {vacancy}, created: {created}")

View File

@ -1,18 +1,16 @@
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from datetime import timedelta
from itertools import batched
from typing import Literal
import clickhouse_connect
from django.core.management import BaseCommand
from django.conf import settings
from qdrant_client.models import OrderBy
from django.core.management import BaseCommand
from django.utils import timezone
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
embed_features,
qdrant_client,
)
from vacancies.main import prompts
from vacancies.main.models import JobTitle, Vacancy
query = """
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
@ -28,7 +26,8 @@ WHERE timestamp >= %(timestamp)s
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
]) >= 5
AND arrayCount(x -> position(lower(message), x) > 0, [
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж'
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж',
'не будет опубликовано'
]) = 0
ORDER BY timestamp ASC
"""
@ -38,23 +37,49 @@ class Command(BaseCommand):
help = "Collect vacancies from telegram messages"
def handle(self, *args, **options):
response = qdrant_client.scroll(collection_name="vacancies", limit=1, order_by=OrderBy(key="timestamp", direction="desc"))
last_point_timestamp = datetime.now() - timedelta(days=30)
if response[0]:
last_point_timestamp = response[0][0].payload["timestamp"]
job_titles = JobTitle.objects.values_list('title', flat=True)
job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
class Structure(BaseModel):
job_title: Literal[tuple(job_titles)]
min_salary_rub: int | None
max_salary_rub: int | None
company_name: str
requirements: str
openai_client = ChatOpenAI(
model_name="openai/gpt-5-mini",
openai_api_base="https://openrouter.ai/api/v1",
temperature=0,
seed=42,
top_p=1,
)
structured_llm = openai_client.with_structured_output(Structure)
last_timestamp = timezone.now() - timedelta(days=30)
if last_vacancy := Vacancy.objects.order_by("-timestamp").first():
last_timestamp = last_vacancy.timestamp
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_point_timestamp}).result_rows
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_timestamp}).result_rows
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)):
vacancies_features = batch_extract_features([row[3] for row in rows])
print(f"Processing {index+1}/{len(result_rows)//settings.COLLECT_VACANCIES_BATCH_SIZE}")
with ThreadPoolExecutor() as pool:
vacancies_vectors = pool.map(embed_features, [vacancy_features.model_dump() for vacancy_features in vacancies_features])
for row, vacancy_features, vacancy_vectors in zip(rows, vacancies_features, vacancies_vectors):
batches = list(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
for index, rows in enumerate(batches):
prompts = [f"{prompts.STRUCTURED_OUTPUT_PROMPT} {row[3]}" for row in rows]
responses = structured_llm.batch(prompts)
vacancies = []
for row, response in zip(rows, responses):
(id, chat_username, telegram_id, message, timestamp) = row
link = f"https://t.me/{chat_username}/{telegram_id}"
payload = {'content': message, 'features_json': vacancy_features.model_dump(), "link": link, "timestamp": timestamp}
add_vectors("vacancies", id, vacancy_features.model_dump(), payload, vacancy_vectors)
vacancies.append(Vacancy(
external_id=id,
job_title_id=job_title_map[response.job_title],
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
company_name=response.company_name,
requirements=response.requirements,
content=message,
timestamp=timezone.make_aware(timestamp),
link=f"https://t.me/{chat_username}/{telegram_id}",
))
Vacancy.objects.bulk_create(vacancies, ignore_conflicts=True)
print(f"Processed {index+1}/{len(batches)}")

View File

@ -1,11 +1,12 @@
import asyncio
from django.core.management import BaseCommand
from vacancies.main.models import CustomerCV
from vacancies.main.bot import application
from vacancies.main.vector_store import get_next_vacancy
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
from vacancies.main.bot import application
from vacancies.main.models import CustomerCV
from vacancies.main.recommendations import get_next_vacancy
class Command(BaseCommand):
help = "Generates new recommended vacancies"
@ -15,16 +16,12 @@ class Command(BaseCommand):
async def ahandle(self, *args, **options):
for customer_cv in CustomerCV.objects.all():
result = get_next_vacancy(customer_cv)
if not result:
continue
recommendation, vacancy_content, link = result
if vacancy := get_next_vacancy(customer_cv):
await application.bot.send_message(
chat_id=recommendation.customer.chat_id,
text=vacancy_content,
chat_id=customer_cv.customer.chat_id,
text=vacancy.get_formatted_response(),
parse_mode="Markdown",
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=link),
InlineKeyboardButton("Откликнуться", url=vacancy.link),
]]),
)

View File

@ -0,0 +1,48 @@
import clickhouse_connect
from django.conf import settings
from django.core.management import BaseCommand
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
from openai import OpenAI
from itertools import batched
query = """
SELECT DISTINCT ON (message) id, message
FROM telegram_parser_chatmessage
WHERE timestamp >= now() - INTERVAL 30 DAYS AND length(message) > 200
AND position(message, '?') = 0 AND position(message, 'spam') = 0
ORDER BY timestamp ASC
"""
class Command(BaseCommand):
help = "Sync clickhouse and qdrant"
def handle(self, *args, **options):
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
qdrant_client = QdrantClient(url=settings.QDRANT_URL)
if not qdrant_client.collection_exists("messages"):
qdrant_client.create_collection(
collection_name="messages",
vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
)
openai_client = OpenAI(base_url="https://openrouter.ai/api/v1")
result_rows = clickhouse_client.query(query).result_rows
batches = list(batched(result_rows, 100))
batches_quantity = len(batches)
for index, batch in enumerate(batches):
ids, messages = list(zip(*batch))
embedding = openai_client.embeddings.create(model="qwen/qwen3-embedding-8b", input=messages, encoding_format="float")
embeddings = [row.embedding for row in embedding.data]
qdrant_client.upsert(
collection_name="messages",
points=[
PointStruct(id=idx, vector=vector, payload={"message": message})
for idx, message, vector in zip(ids, messages, embeddings)
]
)
print(f"{index+1}/{batches_quantity} processed")

View File

@ -0,0 +1,55 @@
# Generated by Django 5.2.7 on 2025-11-08 19:11
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0008_alter_recommendedvacancy_vacancy_id'),
]
operations = [
migrations.CreateModel(
name='JobTitle',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=255, unique=True)),
],
),
migrations.AddField(
model_name='customercv',
name='max_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='min_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='job_title',
field=models.ForeignKey(default=0, on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle'),
preserve_default=False,
),
migrations.CreateModel(
name='Vacancy',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('external_id', models.CharField(max_length=255, unique=True)),
('min_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('max_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('content', models.TextField()),
('timestamp', models.DateTimeField()),
('link', models.URLField()),
('job_title', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle')),
],
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy_id',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.vacancy'),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 5.2.7 on 2025-11-09 08:06
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('main', '0009_jobtitle_customercv_max_salary_rub_and_more'),
]
operations = [
migrations.RenameField(
model_name='recommendedvacancy',
old_name='vacancy_id',
new_name='vacancy',
),
]

View File

@ -0,0 +1,33 @@
# Generated by Django 5.2.7 on 2025-11-09 09:35
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0010_rename_vacancy_id_recommendedvacancy_vacancy'),
]
operations = [
migrations.RemoveField(
model_name='customercv',
name='job_title',
),
migrations.AddField(
model_name='customercv',
name='job_titles',
field=models.ManyToManyField(related_name='vacancies', to='main.jobtitle'),
),
migrations.AlterField(
model_name='recommendedvacancy',
name='customer',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.customer'),
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.vacancy'),
),
]

View File

@ -0,0 +1,25 @@
# Generated by Django 5.2.7 on 2025-11-09 19:56
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0011_remove_customercv_job_title_customercv_job_titles_and_more'),
]
operations = [
migrations.AddField(
model_name='vacancy',
name='company_name',
field=models.CharField(default='test', max_length=255),
preserve_default=False,
),
migrations.AddField(
model_name='vacancy',
name='requirements',
field=models.TextField(default='test'),
preserve_default=False,
),
]

View File

@ -0,0 +1,17 @@
# Generated by Django 5.2.7 on 2025-11-30 11:32
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('main', '0012_vacancy_company_name_vacancy_requirements'),
]
operations = [
migrations.AlterModelOptions(
name='vacancy',
options={'verbose_name_plural': 'Vacancies'},
),
]

View File

@ -1,5 +1,4 @@
from django.db import models
from pydantic import BaseModel
class Customer(models.Model):
@ -17,8 +16,18 @@ class Customer(models.Model):
db_table = "customers"
class JobTitle(models.Model):
title = models.CharField(max_length=255, unique=True)
def __str__(self):
return self.title
class CustomerCV(models.Model):
customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
job_titles = models.ManyToManyField(JobTitle, related_name="vacancies")
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
@ -31,9 +40,43 @@ class CustomerCV(models.Model):
db_table = "customer_vcs"
class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
company_name = models.CharField(max_length=255)
requirements = models.TextField()
content = models.TextField()
timestamp = models.DateTimeField()
link = models.URLField()
def __str__(self):
return self.job_title.title
def get_formatted_response(self):
response = f"""
💼 **Вакансия**: {self.job_title}
\n🏢 **Компания**: {self.company_name}
\n📝 **Требования**: {self.requirements}
"""
if self.min_salary_rub:
if self.max_salary_rub:
response += f"\n💸 **ЗП**: от {self.min_salary_rub} т.р."
else:
response += f"\n💸 **ЗП**: {self.min_salary_rub} т.р. - {self.max_salary_rub} т.р."
elif self.max_salary_rub:
response += f"\n💸 **ЗП**: до {self.max_salary_rub} т.р."
return response
class Meta:
verbose_name_plural = 'Vacancies'
class RecommendedVacancy(models.Model):
customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
vacancy_id = models.BigIntegerField()
customer = models.ForeignKey(Customer, on_delete=models.CASCADE, related_name="recommended_vacancies")
vacancy = models.ForeignKey(Vacancy, on_delete=models.CASCADE, related_name="recommended_vacancies")
created_at = models.DateTimeField(auto_now_add=True)
objects = models.Manager()
@ -44,19 +87,3 @@ class RecommendedVacancy(models.Model):
class Meta:
verbose_name_plural = 'Recommended Vacancies'
db_table = "recommended_vacancies"
class VacancyFeatures(BaseModel):
job_title: str | None = None # Должность
employment_type: str | None = None # Тип занятости
work_format: str | None = None # Формат работы
experience: str | None = None # Опыт работы
position_level: str | None = None # Уровень позиции
industry: str | None = None # Отрасль / Сфера деятельности
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
location: str | None = None # География
salary_range: str | None = None # Зарплатные ожидания / вилка
languages: list[str] | None = None # Языки
education: str | None = None # Образование
schedule: str | None = None # График работы
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования

68
vacancies/main/prompts.py Normal file
View File

@ -0,0 +1,68 @@
BOT_SYSTEM_PROMPT = """
Ты IT Career Copilot, высококвалифицированный HR-советчик и эксперт по рынку труда в ИТ. Твоя главная задача предоставлять пользователю практические и стратегические рекомендации по любым вопросам, связанным с карьерой в технологической сфере (поиск работы, развитие навыков, переговоры по зарплате, адаптация, увольнение).
Твой стиль общения: Фокус на действии (Action-Oriented): Отвечай максимально конкретно и применимо, предлагая шаги, которые пользователь может предпринять немедленно.
Консультация: Используй инсайты и данные о текущих трендах ИТ-рынка.
Краткость: Пиши лаконично, не более 4-5 содержательных пунктов или предложений. Используй маркированные списки (буллеты) для структурирования информации.
Формат: Отвечай простым текстом, не используя форматирование Markdown (заголовки, жирный шрифт, курсив, кодблоки).
Использование данных: У тебя есть дополнительный доступ к резюме пользователя (или фрагментам его карьерной истории) для персонализации ответов, а также ты можешь обратиться к базе данных с релевантными сообщениями, если тебе требуются дополнительные данные, например реальные данные о зарплатах или мнения людей.
Завершение: После ответа обязательно предложи один-два релевантных следующих шага, чтобы продолжить карьерное планирование.
"""
STRUCTURED_OUTPUT_PROMPT = """
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
You also need to analyze vacansies and structure the information from them according to the scheme.
You don't need to change or invent anything in the job posting below. You only need to structure the information provided.
Example vacancy:
'Network Security Team lead - Infrastructure Security, Wildberries 💜
ЗП: до 500 000 рублей net
Уровень: Lead
Формат работы: удалёнка или гибрид
🚀 Вместе с масштабным развитием IT направления Wildberries и Russ развивает информационную безопасность. Мы решаем сложные и разнообразные задачи: от повышения защищенности каждого сервиса до развития безопасности в рамках всей нашей инфраструктуры.
Мы ищем эксперта, который вместе с командой займется разработкой сложных технических решений и внедрением практик безопасности для повышения защищенности нашей сетевой инфраструктуры.
📝 Основные задачи:
Составление проектных планов и управление командой
Взаимодействие с сетевыми командами для сбора потребностей и согласования технических решений
Построение плана развития безопасности сетевой инфраструктуры
Внедрение практик ИБ и контроль состояния защищенности внешнего и внутреннего сетевого периметра
Разработка и внедрение мер по повышению прозрачности и контролируемости сетевых доступов в компании
💫 Необходимый опыт и навыки:
Опыт в организации командной работы
Опыт в построении сложных кросс-командных процессов
Умение разрабатывать комплексные решения по безопасности для серверной инфраструктуры на базе Linux
Хорошее знание сетевых технологий
Опыт решения проблем ИБ в сетевой инфраструктуре
Знание сетевых атак и способов защиты от них
Опыт работы с NGFW
🔥 Что мы предлагаем:
Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге
IT-ипотека и оформление в аккредитованную IT-компанию
Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия
💘 Контакты: @Alens_HR'
Structured output of the example vacansy:
{
job_title: "Network Security Team lead - Infrastructure Security",
company_name: "Wildberries",
min_salary_rub: None,
max_salary_rub: 500000,
requirements: "Опыт в организации командной работы. Опыт в построении сложных кросс-командных процессов. Умение разрабатывать комплексные решения по безопасности для серверной инфраструктуры на базе Linux, Хорошее знание сетевых технологий. Опыт решения проблем ИБ в сетевой инфраструктуре. Знание сетевых атак и способов защиты от них. Опыт работы с NGFW]
}
Vacancy:
"""

View File

@ -0,0 +1,17 @@
from django.db.models import Q
from vacancies.main.models import Vacancy
def get_next_vacancy(customer_cv):
vacancy = Vacancy.objects.filter(
~Q(id__in=customer_cv.customer.recommended_vacancies.values_list("vacancy_id", flat=True)),
job_title__title__in=customer_cv.job_titles.values_list("title", flat=True),
).filter(
(Q(min_salary_rub__isnull=True) | Q(min_salary_rub__gt=customer_cv.min_salary_rub)) |
(Q(max_salary_rub__isnull=False) & Q(max_salary_rub__gte=customer_cv.min_salary_rub)),
).order_by("-timestamp").first()
if vacancy:
customer_cv.customer.recommended_vacancies.create(vacancy=vacancy)
return vacancy

View File

@ -1,171 +0,0 @@
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from qdrant_client import QdrantClient, models
from qdrant_client.models import Filter, HasIdCondition
from vacancies.conf.settings import QDRANT_URL
from vacancies.main.models import RecommendedVacancy, VacancyFeatures
qdrant_client = QdrantClient(url=QDRANT_URL)
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
]
weights = {
"job_title": 70,
"tech_stack": 10,
"salary_range": 10,
}
vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not qdrant_client.collection_exists("vacancies"):
qdrant_client.create_collection(
collection_name="vacancies",
vectors_config=vectors_config,
)
qdrant_client.create_payload_index(
collection_name="vacancies",
field_name="timestamp",
field_schema="datetime",
)
if not qdrant_client.collection_exists("cvs"):
qdrant_client.create_collection(
collection_name="cvs",
vectors_config=vectors_config,
)
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
def _prepare_texts(features):
texts = {}
for name in FEATURE_NAMES:
value = features.get(name)
if isinstance(value, list):
text = " ".join(value) if value else ""
else:
text = str(value) if value else ""
texts[name] = text
return texts
def embed_features(features):
features = {key: value for key, value in features.items() if value}
features_texts = _prepare_texts(features)
names, texts = features_texts.keys(), features_texts.values()
vectors = dict(zip(names, embedding.embed_documents(texts)))
return vectors
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors):
max_similarities = {}
for name, vec in vectors.items():
results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100)
for res in results.points:
max_similarities.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True)
if scored and scored[0]["score"] > 80: # threshold
return
qdrant_client.upsert(
collection_name=collection_name,
points=[models.PointStruct(id=_id, vector=vectors, payload=payload)]
)
def search_similarities(query_filter: Filter, cv_id: int):
cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0]
max_similarities, vacancies_content = {}, {}
for name, vec in cv.vector.items():
results = qdrant_client.query_points(
collection_name="vacancies",
query=vec,
using=name,
limit=100000,
with_payload=True,
query_filter=query_filter,
)
for res in results.points:
max_similarities.setdefault(res.id, {})
vacancies_content.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
vacancies_content[res.id]["content"] = res.payload["content"]
vacancies_content[res.id]["features_json"] = res.payload["features_json"]
vacancies_content[res.id]["link"] = res.payload["link"]
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({
"id": vid,
"score": total,
"content": vacancies_content[vid]["content"],
"features_json": vacancies_content[vid]["features_json"],
"link": vacancies_content[vid]["link"],
"sims": feature_sims,
})
scored.sort(key=lambda x: x["score"], reverse=True)
return scored[0]["id"], scored[0]["content"], scored[0]["link"]
def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]:
prompts = [
f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- job_title: Должность (e.g., DevOps, Python программист)
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
for content in contents
]
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.batch(prompts)
return response
def get_next_vacancy(customer_cv):
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
customer=customer_cv.customer,
).values_list('vacancy_id', flat=True)
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
result = search_similarities(query_filter, customer_cv.id)
if not result:
return None
search_result_id, vacancy_content, link = result
recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer,
vacancy_id=search_result_id,
)
return recommendation, vacancy_content, link