Compare commits

..

No commits in common. "55ee3b7ba43757cd76c93cc6b901aab21a2543a4" and "750683fb5c572da92f832ed414b2446553206dc1" have entirely different histories.

15 changed files with 992 additions and 798 deletions

View File

@ -1,40 +1,24 @@
# vision-career
# vision-career-backend
Sample `.env`:
```dotenv
DEEPINFRA_API_TOKEN=your-token-here
OPENAI_API_KEY=your-token-here
OPENAI_PROXY=http://user:password@host:port
BOT_TOKEN=your-token-here
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
SECRET_KEY=secret
DEBUG=true
```
Commands:
```bash
docker compose up -d
KUBECONFIG=clickhouse-kubeconfig.yaml kubectl port-forward svc/clickhouse-clickhouse -n clickhouse 18123:8123
uv sync
uv run --env-file .env manage.py migrate
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
uv run --env-file .env manage.py collectstatic
uv run --env-file .env manage.py runserver
uv run --env-file .env manage.py generate_recommended_vacancies
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
uv run --env-file .env manage.py runbot
```
Production port-forwards:
```bash
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/main-cluster-rw -n postgresql-cluster 5432
```

View File

@ -1,4 +1,11 @@
services:
qdrant:
image: qdrant/qdrant:latest
restart: always
ports:
- "127.0.0.1:6333:6333"
volumes:
- "/srv/vision-career/qdrant:/qdrant/storage"
postgres:
image: postgres:17-alpine3.20
restart: always

View File

@ -10,6 +10,7 @@ dependencies = [
"gunicorn>=23.0.0",
"langchain>=0.3.27",
"langchain-openai>=0.3.35",
"langchain-qdrant>=1.1.0",
"langgraph-checkpoint-postgres>=3.0.0",
"psycopg[binary]>=3.2.12",
"pydantic>=2.0",

1256
uv.lock

File diff suppressed because it is too large Load Diff

View File

@ -1,27 +1,16 @@
from django.contrib import admin
from vacancies.main import models
@admin.register(models.Customer)
class CustomerAdmin(admin.ModelAdmin):
pass
@admin.register(models.CustomerCV)
class CustomerCVAdmin(admin.ModelAdmin):
class CustomerCVADMIN(admin.ModelAdmin):
pass
@admin.register(models.RecommendedVacancy)
class RecommendedVacancyAdmin(admin.ModelAdmin):
pass
@admin.register(models.Vacancy)
class VacancyAdmin(admin.ModelAdmin):
pass
@admin.register(models.JobTitle)
class JobTitleAdmin(admin.ModelAdmin):
pass

View File

@ -1,8 +1,8 @@
import io
import asyncio
import os
import traceback
from asgiref.sync import sync_to_async
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
@ -22,11 +22,14 @@ from telegram.ext import (
filters,
)
from pydantic import BaseModel
from typing import Literal
from vacancies.conf.settings import DB_URI
from vacancies.main.models import Customer, CustomerCV, JobTitle
from vacancies.main.recommendations import get_next_vacancy
from vacancies.main.models import Customer, CustomerCV
from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
get_next_vacancy,
embed_features,
)
SYSTEM_PROMPT = """
Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
@ -66,17 +69,19 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return
vacancy = get_next_vacancy(customer_cv)
if not vacancy:
result = get_next_vacancy(customer_cv)
if not result:
message = "Вакансии закончились, возвращайтесь позже!"
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return
recommendation, vacancy_content, link = result
await context.bot.send_message(
chat_id=update.effective_chat.id,
text=vacancy.content,
text=vacancy_content,
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=vacancy.link),
InlineKeyboardButton("Откликнуться", url=link),
]]),
)
@ -118,34 +123,22 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
reader = PdfReader(buffer)
resume = "\n".join(page.extract_text() for page in reader.pages)
db_job_titles = await sync_to_async(list)(JobTitle.objects.values_list('title', flat=True))
job_title_map = await sync_to_async(dict)(JobTitle.objects.values_list('title', 'id'))
class Structure(BaseModel):
job_titles: list[Literal[tuple(db_job_titles)]]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
prompt = f"""
Ты HR-классификатор. Ниже приведён список допустимых профессий.
Твоя задача выбрать наиболее подходящие по смыслу.
Качество классификации - самое важное.
Игнорируй орфографические и стилистические различия.
Резюме:
{resume}
"""
response = await structured_llm.ainvoke(prompt)
customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
content=resume,
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
))
await customer_cv.job_titles.aset([job_title_map[job_title] for job_title in response.job_titles])
def upload_vectors():
features = batch_extract_features([customer_cv.content])[0]
add_vectors(
"cvs",
customer_cv.id,
features.model_dump(),
{'content': customer_cv.content, 'features_json': features.model_dump()},
embed_features(features.model_dump()),
)
await asyncio.to_thread(upload_vectors)
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)

View File

@ -1,14 +1,18 @@
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from itertools import batched
from datetime import timedelta
from django.utils import timezone
from pydantic import BaseModel
from typing import Literal
from vacancies.main.models import Vacancy, JobTitle
from langchain_openai import ChatOpenAI
import clickhouse_connect
from django.core.management import BaseCommand
from django.conf import settings
from qdrant_client.models import OrderBy
from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
embed_features,
qdrant_client,
)
query = """
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
@ -34,49 +38,23 @@ class Command(BaseCommand):
help = "Collect vacancies from telegram messages"
def handle(self, *args, **options):
job_titles = JobTitle.objects.values_list('title', flat=True)
job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
class Structure(BaseModel):
job_title: Literal[tuple(job_titles)]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
last_timestamp = timezone.now() - timedelta(days=30)
if last_vacancy := Vacancy.objects.order_by("-timestamp").first():
last_timestamp = last_vacancy.timestamp
response = qdrant_client.scroll(collection_name="vacancies", limit=1, order_by=OrderBy(key="timestamp", direction="desc"))
last_point_timestamp = datetime.now() - timedelta(days=30)
if response[0]:
last_point_timestamp = response[0][0].payload["timestamp"]
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_timestamp}).result_rows
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_point_timestamp}).result_rows
batches = list(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
for index, rows in enumerate(batches):
prompts = [
f"""
Ты HR-классификатор. Ниже приведён список допустимых профессий.
Твоя задача выбрать наиболее подходящую по смыслу.
Качество классификации - самое важное.
Игнорируй орфографические и стилистические различия.
Вакансия:
{row[3]}
"""
for row in rows
]
responses = structured_llm.batch(prompts)
vacancies = []
for row, response in zip(rows, responses):
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)):
vacancies_features = batch_extract_features([row[3] for row in rows])
print(f"Processing {index+1}/{len(result_rows)//settings.COLLECT_VACANCIES_BATCH_SIZE}")
with ThreadPoolExecutor() as pool:
vacancies_vectors = pool.map(embed_features, [vacancy_features.model_dump() for vacancy_features in vacancies_features])
for row, vacancy_features, vacancy_vectors in zip(rows, vacancies_features, vacancies_vectors):
(id, chat_username, telegram_id, message, timestamp) = row
vacancies.append(Vacancy(
external_id=id,
job_title_id=job_title_map[response.job_title],
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
content=message,
timestamp=timezone.make_aware(timestamp),
link=f"https://t.me/{chat_username}/{telegram_id}",
))
Vacancy.objects.bulk_create(vacancies, ignore_conflicts=True)
print(f"Processed {index+1}/{len(batches)}")
link = f"https://t.me/{chat_username}/{telegram_id}"
payload = {'content': message, 'features_json': vacancy_features.model_dump(), "link": link, "timestamp": timestamp}
add_vectors("vacancies", id, vacancy_features.model_dump(), payload, vacancy_vectors)

View File

@ -3,7 +3,7 @@ import asyncio
from django.core.management import BaseCommand
from vacancies.main.models import CustomerCV
from vacancies.main.bot import application
from vacancies.main.recommendations import get_next_vacancy
from vacancies.main.vector_store import get_next_vacancy
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
@ -15,11 +15,16 @@ class Command(BaseCommand):
async def ahandle(self, *args, **options):
for customer_cv in CustomerCV.objects.all():
if vacancy := get_next_vacancy(customer_cv):
result = get_next_vacancy(customer_cv)
if not result:
continue
recommendation, vacancy_content, link = result
await application.bot.send_message(
chat_id=customer_cv.customer.chat_id,
text=vacancy.content,
chat_id=recommendation.customer.chat_id,
text=vacancy_content,
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=vacancy.link),
InlineKeyboardButton("Откликнуться", url=link),
]]),
)

View File

@ -1,55 +0,0 @@
# Generated by Django 5.2.7 on 2025-11-08 19:11
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0008_alter_recommendedvacancy_vacancy_id'),
]
operations = [
migrations.CreateModel(
name='JobTitle',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=255, unique=True)),
],
),
migrations.AddField(
model_name='customercv',
name='max_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='min_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='job_title',
field=models.ForeignKey(default=0, on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle'),
preserve_default=False,
),
migrations.CreateModel(
name='Vacancy',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('external_id', models.CharField(max_length=255, unique=True)),
('min_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('max_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('content', models.TextField()),
('timestamp', models.DateTimeField()),
('link', models.URLField()),
('job_title', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle')),
],
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy_id',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.vacancy'),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 5.2.7 on 2025-11-09 08:06
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('main', '0009_jobtitle_customercv_max_salary_rub_and_more'),
]
operations = [
migrations.RenameField(
model_name='recommendedvacancy',
old_name='vacancy_id',
new_name='vacancy',
),
]

View File

@ -1,33 +0,0 @@
# Generated by Django 5.2.7 on 2025-11-09 09:35
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0010_rename_vacancy_id_recommendedvacancy_vacancy'),
]
operations = [
migrations.RemoveField(
model_name='customercv',
name='job_title',
),
migrations.AddField(
model_name='customercv',
name='job_titles',
field=models.ManyToManyField(related_name='vacancies', to='main.jobtitle'),
),
migrations.AlterField(
model_name='recommendedvacancy',
name='customer',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.customer'),
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.vacancy'),
),
]

View File

@ -1,4 +1,5 @@
from django.db import models
from pydantic import BaseModel
class Customer(models.Model):
@ -16,18 +17,8 @@ class Customer(models.Model):
db_table = "customers"
class JobTitle(models.Model):
title = models.CharField(max_length=255, unique=True)
def __str__(self):
return self.title
class CustomerCV(models.Model):
customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
job_titles = models.ManyToManyField(JobTitle, related_name="vacancies")
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
@ -40,22 +31,9 @@ class CustomerCV(models.Model):
db_table = "customer_vcs"
class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField()
timestamp = models.DateTimeField()
link = models.URLField()
def __str__(self):
return self.job_title.title
class RecommendedVacancy(models.Model):
customer = models.ForeignKey(Customer, on_delete=models.CASCADE, related_name="recommended_vacancies")
vacancy = models.ForeignKey(Vacancy, on_delete=models.CASCADE, related_name="recommended_vacancies")
customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
vacancy_id = models.BigIntegerField()
created_at = models.DateTimeField(auto_now_add=True)
objects = models.Manager()
@ -66,3 +44,19 @@ class RecommendedVacancy(models.Model):
class Meta:
verbose_name_plural = 'Recommended Vacancies'
db_table = "recommended_vacancies"
class VacancyFeatures(BaseModel):
job_title: str | None = None # Должность
employment_type: str | None = None # Тип занятости
work_format: str | None = None # Формат работы
experience: str | None = None # Опыт работы
position_level: str | None = None # Уровень позиции
industry: str | None = None # Отрасль / Сфера деятельности
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
location: str | None = None # География
salary_range: str | None = None # Зарплатные ожидания / вилка
languages: list[str] | None = None # Языки
education: str | None = None # Образование
schedule: str | None = None # График работы
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования

View File

@ -1,14 +0,0 @@
from vacancies.main.models import Vacancy
def get_next_vacancy(customer_cv):
vacancy = Vacancy.objects.exclude(
id__in=customer_cv.customer.recommended_vacancies.values_list("vacancy_id", flat=True),
).filter(
job_title__title__in=customer_cv.job_titles.values_list("title", flat=True),
min_salary_rub__gt=customer_cv.min_salary_rub,
).first()
if vacancy:
customer_cv.customer.recommended_vacancies.create(vacancy=vacancy)
return vacancy

View File

@ -0,0 +1,171 @@
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from qdrant_client import QdrantClient, models
from qdrant_client.models import Filter, HasIdCondition
from vacancies.conf.settings import QDRANT_URL
from vacancies.main.models import RecommendedVacancy, VacancyFeatures
qdrant_client = QdrantClient(url=QDRANT_URL)
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
]
weights = {
"job_title": 70,
"tech_stack": 10,
"salary_range": 10,
}
vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not qdrant_client.collection_exists("vacancies"):
qdrant_client.create_collection(
collection_name="vacancies",
vectors_config=vectors_config,
)
qdrant_client.create_payload_index(
collection_name="vacancies",
field_name="timestamp",
field_schema="datetime",
)
if not qdrant_client.collection_exists("cvs"):
qdrant_client.create_collection(
collection_name="cvs",
vectors_config=vectors_config,
)
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
def _prepare_texts(features):
texts = {}
for name in FEATURE_NAMES:
value = features.get(name)
if isinstance(value, list):
text = " ".join(value) if value else ""
else:
text = str(value) if value else ""
texts[name] = text
return texts
def embed_features(features):
features = {key: value for key, value in features.items() if value}
features_texts = _prepare_texts(features)
names, texts = features_texts.keys(), features_texts.values()
vectors = dict(zip(names, embedding.embed_documents(texts)))
return vectors
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors):
max_similarities = {}
for name, vec in vectors.items():
results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100)
for res in results.points:
max_similarities.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True)
if scored and scored[0]["score"] > 80: # threshold
return
qdrant_client.upsert(
collection_name=collection_name,
points=[models.PointStruct(id=_id, vector=vectors, payload=payload)]
)
def search_similarities(query_filter: Filter, cv_id: int):
cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0]
max_similarities, vacancies_content = {}, {}
for name, vec in cv.vector.items():
results = qdrant_client.query_points(
collection_name="vacancies",
query=vec,
using=name,
limit=100000,
with_payload=True,
query_filter=query_filter,
)
for res in results.points:
max_similarities.setdefault(res.id, {})
vacancies_content.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
vacancies_content[res.id]["content"] = res.payload["content"]
vacancies_content[res.id]["features_json"] = res.payload["features_json"]
vacancies_content[res.id]["link"] = res.payload["link"]
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({
"id": vid,
"score": total,
"content": vacancies_content[vid]["content"],
"features_json": vacancies_content[vid]["features_json"],
"link": vacancies_content[vid]["link"],
"sims": feature_sims,
})
scored.sort(key=lambda x: x["score"], reverse=True)
return scored[0]["id"], scored[0]["content"], scored[0]["link"]
def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]:
prompts = [
f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- job_title: Должность (e.g., DevOps, Python программист)
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
for content in contents
]
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.batch(prompts)
return response
def get_next_vacancy(customer_cv):
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
customer=customer_cv.customer,
).values_list('vacancy_id', flat=True)
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
result = search_similarities(query_filter, customer_cv.id)
if not result:
return None
search_result_id, vacancy_content, link = result
recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer,
vacancy_id=search_result_id,
)
return recommendation, vacancy_content, link