Compare commits

..

No commits in common. "45e89be6d029c9cf7c1849e26681a88c0ffd8dbd" and "750683fb5c572da92f832ed414b2446553206dc1" have entirely different histories.

10 changed files with 246 additions and 182 deletions

View File

@ -1,41 +1,24 @@
# vision-career # vision-career-backend
Sample `.env`: Sample `.env`:
```dotenv ```dotenv
DEEPINFRA_API_TOKEN=your-token-here
OPENAI_API_KEY=your-token-here OPENAI_API_KEY=your-token-here
OPENAI_PROXY=http://user:password@host:port
BOT_TOKEN=your-token-here BOT_TOKEN=your-token-here
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
SECRET_KEY=secret
DEBUG=true
``` ```
Commands: Commands:
```bash ```bash
docker compose up -d
KUBECONFIG=clickhouse-kubeconfig.yaml kubectl port-forward svc/clickhouse-clickhouse -n clickhouse 18123:8123
uv sync
uv run --env-file .env manage.py migrate uv run --env-file .env manage.py migrate
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
uv run --env-file .env manage.py collectstatic
uv run --env-file .env manage.py runserver uv run --env-file .env manage.py runserver
uv run --env-file .env manage.py generate_recommended_vacancies uv run --env-file .env manage.py generate_recommended_vacancies
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
uv run --env-file .env manage.py runbot uv run --env-file .env manage.py runbot
``` ```
Production port-forwards:
```bash
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/qdrant -n qdrant 6333:6333
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/main-cluster-rw -n postgresql-cluster 5432
```

View File

@ -1,4 +1,11 @@
services: services:
qdrant:
image: qdrant/qdrant:latest
restart: always
ports:
- "127.0.0.1:6333:6333"
volumes:
- "/srv/vision-career/qdrant:/qdrant/storage"
postgres: postgres:
image: postgres:17-alpine3.20 image: postgres:17-alpine3.20
restart: always restart: always

View File

@ -14,13 +14,3 @@ class CustomerCVADMIN(admin.ModelAdmin):
@admin.register(models.RecommendedVacancy) @admin.register(models.RecommendedVacancy)
class RecommendedVacancyAdmin(admin.ModelAdmin): class RecommendedVacancyAdmin(admin.ModelAdmin):
pass pass
@admin.register(models.Vacancy)
class VacancyAdmin(admin.ModelAdmin):
pass
@admin.register(models.JobTitle)
class JobTitleAdmin(admin.ModelAdmin):
pass

View File

@ -1,4 +1,5 @@
import io import io
import asyncio
import os import os
import traceback import traceback
@ -21,11 +22,14 @@ from telegram.ext import (
filters, filters,
) )
from pydantic import BaseModel
from typing import Literal
from vacancies.conf.settings import DB_URI from vacancies.conf.settings import DB_URI
from vacancies.main.models import Customer, CustomerCV, JobTitle from vacancies.main.models import Customer, CustomerCV
from vacancies.main.vector_store import get_next_vacancy from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
get_next_vacancy,
embed_features,
)
SYSTEM_PROMPT = """ SYSTEM_PROMPT = """
Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры. Ты карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
@ -65,17 +69,19 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
await context.bot.send_message(chat_id=update.effective_chat.id, text=message) await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return return
vacancy = get_next_vacancy(customer_cv) result = get_next_vacancy(customer_cv)
if not vacancy: if not result:
message = "Вакансии закончились, возвращайтесь позже!" message = "Вакансии закончились, возвращайтесь позже!"
await context.bot.send_message(chat_id=update.effective_chat.id, text=message) await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
return return
recommendation, vacancy_content, link = result
await context.bot.send_message( await context.bot.send_message(
chat_id=update.effective_chat.id, chat_id=update.effective_chat.id,
text=vacancy.content, text=vacancy_content,
reply_markup=InlineKeyboardMarkup([[ reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=vacancy.link), InlineKeyboardButton("Откликнуться", url=link),
]]), ]]),
) )
@ -117,28 +123,23 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
reader = PdfReader(buffer) reader = PdfReader(buffer)
resume = "\n".join(page.extract_text() for page in reader.pages) resume = "\n".join(page.extract_text() for page in reader.pages)
job_titles = JobTitle.objects.values_list('title', flat=True)
job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
class Structure(BaseModel):
job_title: Literal[tuple(job_titles)]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
prompt = f"Extract fields from following CV: {resume}"
response = await structured_llm.ainvoke(prompt)
customer = await Customer.objects.aget(telegram_id=update.effective_user.id) customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict( customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
content=resume, content=resume,
job_title_id=job_title_map[response.job_title],
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
)) ))
def upload_vectors():
features = batch_extract_features([customer_cv.content])[0]
add_vectors(
"cvs",
customer_cv.id,
features.model_dump(),
{'content': customer_cv.content, 'features_json': features.model_dump()},
embed_features(features.model_dump()),
)
await asyncio.to_thread(upload_vectors)
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id) await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)

View File

@ -1,14 +1,18 @@
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from itertools import batched from itertools import batched
from datetime import timedelta
from django.utils import timezone
from pydantic import BaseModel
from typing import Literal
from vacancies.main.models import Vacancy, JobTitle
from langchain_openai import ChatOpenAI
import clickhouse_connect import clickhouse_connect
from django.core.management import BaseCommand from django.core.management import BaseCommand
from django.conf import settings from django.conf import settings
from qdrant_client.models import OrderBy
from vacancies.main.vector_store import (
add_vectors,
batch_extract_features,
embed_features,
qdrant_client,
)
query = """ query = """
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
@ -34,38 +38,23 @@ class Command(BaseCommand):
help = "Collect vacancies from telegram messages" help = "Collect vacancies from telegram messages"
def handle(self, *args, **options): def handle(self, *args, **options):
job_titles = JobTitle.objects.values_list('title', flat=True) response = qdrant_client.scroll(collection_name="vacancies", limit=1, order_by=OrderBy(key="timestamp", direction="desc"))
job_title_map = dict(JobTitle.objects.values_list('title', 'id')) last_point_timestamp = datetime.now() - timedelta(days=30)
if response[0]:
class Structure(BaseModel): last_point_timestamp = response[0][0].payload["timestamp"]
job_title: Literal[tuple(job_titles)]
min_salary_rub: int | None
max_salary_rub: int | None
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(Structure)
last_timestamp = timezone.now() - timedelta(days=30)
if last_vacancy := Vacancy.objects.order_by("-timestamp").first():
last_timestamp = last_vacancy.timestamp
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT) clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_timestamp}).result_rows result_rows = clickhouse_client.query(query, parameters={"timestamp": last_point_timestamp}).result_rows
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)): for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)):
prompts = [f"Extract fields from following vacancies: {row[3]}" for row in rows] vacancies_features = batch_extract_features([row[3] for row in rows])
responses = structured_llm.batch(prompts)
vacancies = [] print(f"Processing {index+1}/{len(result_rows)//settings.COLLECT_VACANCIES_BATCH_SIZE}")
for row, response in zip(rows, responses): with ThreadPoolExecutor() as pool:
print(response) vacancies_vectors = pool.map(embed_features, [vacancy_features.model_dump() for vacancy_features in vacancies_features])
for row, vacancy_features, vacancy_vectors in zip(rows, vacancies_features, vacancies_vectors):
(id, chat_username, telegram_id, message, timestamp) = row (id, chat_username, telegram_id, message, timestamp) = row
vacancies.append(Vacancy( link = f"https://t.me/{chat_username}/{telegram_id}"
external_id=id, payload = {'content': message, 'features_json': vacancy_features.model_dump(), "link": link, "timestamp": timestamp}
job_title_id=job_title_map[response.job_title], add_vectors("vacancies", id, vacancy_features.model_dump(), payload, vacancy_vectors)
min_salary_rub=response.min_salary_rub,
max_salary_rub=response.max_salary_rub,
content=message,
timestamp=timestamp,
link=f"https://t.me/{chat_username}/{telegram_id}",
))
print(Vacancy.objects.bulk_create(vacancies, ignore_conflicts=True))

View File

@ -15,14 +15,16 @@ class Command(BaseCommand):
async def ahandle(self, *args, **options): async def ahandle(self, *args, **options):
for customer_cv in CustomerCV.objects.all(): for customer_cv in CustomerCV.objects.all():
vacancy = get_next_vacancy(customer_cv) result = get_next_vacancy(customer_cv)
if not vacancy: if not result:
continue continue
recommendation, vacancy_content, link = result
await application.bot.send_message( await application.bot.send_message(
chat_id=customer_cv.customer.chat_id, chat_id=recommendation.customer.chat_id,
text=vacancy.content, text=vacancy_content,
reply_markup=InlineKeyboardMarkup([[ reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=vacancy.link), InlineKeyboardButton("Откликнуться", url=link),
]]), ]]),
) )

View File

@ -14,6 +14,6 @@ class Command(BaseCommand):
checkpointer.setup() checkpointer.setup()
if sys.platform == "win32": if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
application.run_polling() application.run_polling()

View File

@ -1,55 +0,0 @@
# Generated by Django 5.2.7 on 2025-11-08 19:11
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('main', '0008_alter_recommendedvacancy_vacancy_id'),
]
operations = [
migrations.CreateModel(
name='JobTitle',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=255, unique=True)),
],
),
migrations.AddField(
model_name='customercv',
name='max_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='min_salary_rub',
field=models.PositiveIntegerField(blank=True, default=None, null=True),
),
migrations.AddField(
model_name='customercv',
name='job_title',
field=models.ForeignKey(default=0, on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle'),
preserve_default=False,
),
migrations.CreateModel(
name='Vacancy',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('external_id', models.CharField(max_length=255, unique=True)),
('min_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('max_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
('content', models.TextField()),
('timestamp', models.DateTimeField()),
('link', models.URLField()),
('job_title', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle')),
],
),
migrations.AlterField(
model_name='recommendedvacancy',
name='vacancy_id',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.vacancy'),
),
]

View File

@ -1,4 +1,5 @@
from django.db import models from django.db import models
from pydantic import BaseModel
class Customer(models.Model): class Customer(models.Model):
@ -16,18 +17,8 @@ class Customer(models.Model):
db_table = "customers" db_table = "customers"
class JobTitle(models.Model):
title = models.CharField(max_length=255, unique=True)
def __str__(self):
return self.title
class CustomerCV(models.Model): class CustomerCV(models.Model):
customer = models.OneToOneField(Customer, on_delete=models.CASCADE) customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField() content = models.TextField()
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
@ -40,21 +31,9 @@ class CustomerCV(models.Model):
db_table = "customer_vcs" db_table = "customer_vcs"
class Vacancy(models.Model):
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
external_id = models.CharField(max_length=255, unique=True)
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
content = models.TextField()
timestamp = models.DateTimeField()
link = models.URLField()
def __str__(self):
return self.job_title.title
class RecommendedVacancy(models.Model): class RecommendedVacancy(models.Model):
customer = models.ForeignKey(Customer, on_delete=models.CASCADE) customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
vacancy_id = models.ForeignKey(Vacancy, on_delete=models.CASCADE) vacancy_id = models.BigIntegerField()
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
objects = models.Manager() objects = models.Manager()
@ -65,3 +44,19 @@ class RecommendedVacancy(models.Model):
class Meta: class Meta:
verbose_name_plural = 'Recommended Vacancies' verbose_name_plural = 'Recommended Vacancies'
db_table = "recommended_vacancies" db_table = "recommended_vacancies"
class VacancyFeatures(BaseModel):
job_title: str | None = None # Должность
employment_type: str | None = None # Тип занятости
work_format: str | None = None # Формат работы
experience: str | None = None # Опыт работы
position_level: str | None = None # Уровень позиции
industry: str | None = None # Отрасль / Сфера деятельности
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
location: str | None = None # География
salary_range: str | None = None # Зарплатные ожидания / вилка
languages: list[str] | None = None # Языки
education: str | None = None # Образование
schedule: str | None = None # График работы
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования

View File

@ -1,4 +1,154 @@
from vacancies.main.models import RecommendedVacancy, Vacancy from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from qdrant_client import QdrantClient, models
from qdrant_client.models import Filter, HasIdCondition
from vacancies.conf.settings import QDRANT_URL
from vacancies.main.models import RecommendedVacancy, VacancyFeatures
qdrant_client = QdrantClient(url=QDRANT_URL)
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
]
weights = {
"job_title": 70,
"tech_stack": 10,
"salary_range": 10,
}
vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not qdrant_client.collection_exists("vacancies"):
qdrant_client.create_collection(
collection_name="vacancies",
vectors_config=vectors_config,
)
qdrant_client.create_payload_index(
collection_name="vacancies",
field_name="timestamp",
field_schema="datetime",
)
if not qdrant_client.collection_exists("cvs"):
qdrant_client.create_collection(
collection_name="cvs",
vectors_config=vectors_config,
)
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
def _prepare_texts(features):
texts = {}
for name in FEATURE_NAMES:
value = features.get(name)
if isinstance(value, list):
text = " ".join(value) if value else ""
else:
text = str(value) if value else ""
texts[name] = text
return texts
def embed_features(features):
features = {key: value for key, value in features.items() if value}
features_texts = _prepare_texts(features)
names, texts = features_texts.keys(), features_texts.values()
vectors = dict(zip(names, embedding.embed_documents(texts)))
return vectors
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors):
max_similarities = {}
for name, vec in vectors.items():
results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100)
for res in results.points:
max_similarities.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True)
if scored and scored[0]["score"] > 80: # threshold
return
qdrant_client.upsert(
collection_name=collection_name,
points=[models.PointStruct(id=_id, vector=vectors, payload=payload)]
)
def search_similarities(query_filter: Filter, cv_id: int):
cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0]
max_similarities, vacancies_content = {}, {}
for name, vec in cv.vector.items():
results = qdrant_client.query_points(
collection_name="vacancies",
query=vec,
using=name,
limit=100000,
with_payload=True,
query_filter=query_filter,
)
for res in results.points:
max_similarities.setdefault(res.id, {})
vacancies_content.setdefault(res.id, {})
max_similarities[res.id][name] = res.score
vacancies_content[res.id]["content"] = res.payload["content"]
vacancies_content[res.id]["features_json"] = res.payload["features_json"]
vacancies_content[res.id]["link"] = res.payload["link"]
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({
"id": vid,
"score": total,
"content": vacancies_content[vid]["content"],
"features_json": vacancies_content[vid]["features_json"],
"link": vacancies_content[vid]["link"],
"sims": feature_sims,
})
scored.sort(key=lambda x: x["score"], reverse=True)
return scored[0]["id"], scored[0]["content"], scored[0]["link"]
def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]:
prompts = [
f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- job_title: Должность (e.g., DevOps, Python программист)
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
for content in contents
]
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.batch(prompts)
return response
def get_next_vacancy(customer_cv): def get_next_vacancy(customer_cv):
@ -6,14 +156,16 @@ def get_next_vacancy(customer_cv):
customer=customer_cv.customer, customer=customer_cv.customer,
).values_list('vacancy_id', flat=True) ).values_list('vacancy_id', flat=True)
vacancy = Vacancy.objects.exclude(id__in=recommended_vacancy_ids).filter( query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
job_title=customer_cv.job_title, result = search_similarities(query_filter, customer_cv.id)
min_salary_rub__gt=customer_cv.min_salary_rub, if not result:
).first() return None
RecommendedVacancy.objects.create( search_result_id, vacancy_content, link = result
recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer, customer=customer_cv.customer,
vacancy=vacancy, vacancy_id=search_result_id,
) )
return vacancy return recommendation, vacancy_content, link