Compare commits
No commits in common. "45e89be6d029c9cf7c1849e26681a88c0ffd8dbd" and "750683fb5c572da92f832ed414b2446553206dc1" have entirely different histories.
45e89be6d0
...
750683fb5c
21
README.md
21
README.md
@ -1,41 +1,24 @@
|
|||||||
# vision-career
|
# vision-career-backend
|
||||||
|
|
||||||
Sample `.env`:
|
Sample `.env`:
|
||||||
|
|
||||||
```dotenv
|
```dotenv
|
||||||
|
DEEPINFRA_API_TOKEN=your-token-here
|
||||||
OPENAI_API_KEY=your-token-here
|
OPENAI_API_KEY=your-token-here
|
||||||
OPENAI_PROXY=http://user:password@host:port
|
|
||||||
|
|
||||||
BOT_TOKEN=your-token-here
|
BOT_TOKEN=your-token-here
|
||||||
|
|
||||||
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
|
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
|
||||||
|
|
||||||
SECRET_KEY=secret
|
|
||||||
DEBUG=true
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Commands:
|
Commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d
|
|
||||||
|
|
||||||
KUBECONFIG=clickhouse-kubeconfig.yaml kubectl port-forward svc/clickhouse-clickhouse -n clickhouse 18123:8123
|
|
||||||
|
|
||||||
uv sync
|
|
||||||
|
|
||||||
uv run --env-file .env manage.py migrate
|
uv run --env-file .env manage.py migrate
|
||||||
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
|
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
|
||||||
uv run --env-file .env manage.py collectstatic
|
|
||||||
uv run --env-file .env manage.py runserver
|
uv run --env-file .env manage.py runserver
|
||||||
|
|
||||||
uv run --env-file .env manage.py generate_recommended_vacancies
|
uv run --env-file .env manage.py generate_recommended_vacancies
|
||||||
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
|
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
|
||||||
uv run --env-file .env manage.py runbot
|
uv run --env-file .env manage.py runbot
|
||||||
```
|
```
|
||||||
|
|
||||||
Production port-forwards:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/qdrant -n qdrant 6333:6333
|
|
||||||
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/main-cluster-rw -n postgresql-cluster 5432
|
|
||||||
```
|
|
||||||
|
|||||||
@ -1,4 +1,11 @@
|
|||||||
services:
|
services:
|
||||||
|
qdrant:
|
||||||
|
image: qdrant/qdrant:latest
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:6333:6333"
|
||||||
|
volumes:
|
||||||
|
- "/srv/vision-career/qdrant:/qdrant/storage"
|
||||||
postgres:
|
postgres:
|
||||||
image: postgres:17-alpine3.20
|
image: postgres:17-alpine3.20
|
||||||
restart: always
|
restart: always
|
||||||
|
|||||||
@ -14,13 +14,3 @@ class CustomerCVADMIN(admin.ModelAdmin):
|
|||||||
@admin.register(models.RecommendedVacancy)
|
@admin.register(models.RecommendedVacancy)
|
||||||
class RecommendedVacancyAdmin(admin.ModelAdmin):
|
class RecommendedVacancyAdmin(admin.ModelAdmin):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@admin.register(models.Vacancy)
|
|
||||||
class VacancyAdmin(admin.ModelAdmin):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@admin.register(models.JobTitle)
|
|
||||||
class JobTitleAdmin(admin.ModelAdmin):
|
|
||||||
pass
|
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import io
|
import io
|
||||||
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
@ -21,11 +22,14 @@ from telegram.ext import (
|
|||||||
filters,
|
filters,
|
||||||
)
|
)
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import Literal
|
|
||||||
from vacancies.conf.settings import DB_URI
|
from vacancies.conf.settings import DB_URI
|
||||||
from vacancies.main.models import Customer, CustomerCV, JobTitle
|
from vacancies.main.models import Customer, CustomerCV
|
||||||
from vacancies.main.vector_store import get_next_vacancy
|
from vacancies.main.vector_store import (
|
||||||
|
add_vectors,
|
||||||
|
batch_extract_features,
|
||||||
|
get_next_vacancy,
|
||||||
|
embed_features,
|
||||||
|
)
|
||||||
|
|
||||||
SYSTEM_PROMPT = """
|
SYSTEM_PROMPT = """
|
||||||
Ты — карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
|
Ты — карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
|
||||||
@ -65,17 +69,19 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||||||
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
|
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
|
||||||
return
|
return
|
||||||
|
|
||||||
vacancy = get_next_vacancy(customer_cv)
|
result = get_next_vacancy(customer_cv)
|
||||||
if not vacancy:
|
if not result:
|
||||||
message = "Вакансии закончились, возвращайтесь позже!"
|
message = "Вакансии закончились, возвращайтесь позже!"
|
||||||
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
|
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
recommendation, vacancy_content, link = result
|
||||||
|
|
||||||
await context.bot.send_message(
|
await context.bot.send_message(
|
||||||
chat_id=update.effective_chat.id,
|
chat_id=update.effective_chat.id,
|
||||||
text=vacancy.content,
|
text=vacancy_content,
|
||||||
reply_markup=InlineKeyboardMarkup([[
|
reply_markup=InlineKeyboardMarkup([[
|
||||||
InlineKeyboardButton("Откликнуться", url=vacancy.link),
|
InlineKeyboardButton("Откликнуться", url=link),
|
||||||
]]),
|
]]),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -117,28 +123,23 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||||||
reader = PdfReader(buffer)
|
reader = PdfReader(buffer)
|
||||||
resume = "\n".join(page.extract_text() for page in reader.pages)
|
resume = "\n".join(page.extract_text() for page in reader.pages)
|
||||||
|
|
||||||
job_titles = JobTitle.objects.values_list('title', flat=True)
|
|
||||||
job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
|
|
||||||
|
|
||||||
class Structure(BaseModel):
|
|
||||||
job_title: Literal[tuple(job_titles)]
|
|
||||||
min_salary_rub: int | None
|
|
||||||
max_salary_rub: int | None
|
|
||||||
|
|
||||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
|
||||||
structured_llm = openai_client.with_structured_output(Structure)
|
|
||||||
|
|
||||||
prompt = f"Extract fields from following CV: {resume}"
|
|
||||||
response = await structured_llm.ainvoke(prompt)
|
|
||||||
|
|
||||||
customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
|
customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
|
||||||
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
|
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
|
||||||
content=resume,
|
content=resume,
|
||||||
job_title_id=job_title_map[response.job_title],
|
|
||||||
min_salary_rub=response.min_salary_rub,
|
|
||||||
max_salary_rub=response.max_salary_rub,
|
|
||||||
))
|
))
|
||||||
|
|
||||||
|
def upload_vectors():
|
||||||
|
features = batch_extract_features([customer_cv.content])[0]
|
||||||
|
add_vectors(
|
||||||
|
"cvs",
|
||||||
|
customer_cv.id,
|
||||||
|
features.model_dump(),
|
||||||
|
{'content': customer_cv.content, 'features_json': features.model_dump()},
|
||||||
|
embed_features(features.model_dump()),
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.to_thread(upload_vectors)
|
||||||
|
|
||||||
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)
|
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,14 +1,18 @@
|
|||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from datetime import datetime, timedelta
|
||||||
from itertools import batched
|
from itertools import batched
|
||||||
from datetime import timedelta
|
|
||||||
from django.utils import timezone
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import Literal
|
|
||||||
from vacancies.main.models import Vacancy, JobTitle
|
|
||||||
from langchain_openai import ChatOpenAI
|
|
||||||
|
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from qdrant_client.models import OrderBy
|
||||||
|
|
||||||
|
from vacancies.main.vector_store import (
|
||||||
|
add_vectors,
|
||||||
|
batch_extract_features,
|
||||||
|
embed_features,
|
||||||
|
qdrant_client,
|
||||||
|
)
|
||||||
|
|
||||||
query = """
|
query = """
|
||||||
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
|
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
|
||||||
@ -34,38 +38,23 @@ class Command(BaseCommand):
|
|||||||
help = "Collect vacancies from telegram messages"
|
help = "Collect vacancies from telegram messages"
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
job_titles = JobTitle.objects.values_list('title', flat=True)
|
response = qdrant_client.scroll(collection_name="vacancies", limit=1, order_by=OrderBy(key="timestamp", direction="desc"))
|
||||||
job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
|
last_point_timestamp = datetime.now() - timedelta(days=30)
|
||||||
|
if response[0]:
|
||||||
class Structure(BaseModel):
|
last_point_timestamp = response[0][0].payload["timestamp"]
|
||||||
job_title: Literal[tuple(job_titles)]
|
|
||||||
min_salary_rub: int | None
|
|
||||||
max_salary_rub: int | None
|
|
||||||
|
|
||||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
|
||||||
structured_llm = openai_client.with_structured_output(Structure)
|
|
||||||
|
|
||||||
last_timestamp = timezone.now() - timedelta(days=30)
|
|
||||||
if last_vacancy := Vacancy.objects.order_by("-timestamp").first():
|
|
||||||
last_timestamp = last_vacancy.timestamp
|
|
||||||
|
|
||||||
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
|
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
|
||||||
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_timestamp}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_point_timestamp}).result_rows
|
||||||
|
|
||||||
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)):
|
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)):
|
||||||
prompts = [f"Extract fields from following vacancies: {row[3]}" for row in rows]
|
vacancies_features = batch_extract_features([row[3] for row in rows])
|
||||||
responses = structured_llm.batch(prompts)
|
|
||||||
vacancies = []
|
print(f"Processing {index+1}/{len(result_rows)//settings.COLLECT_VACANCIES_BATCH_SIZE}")
|
||||||
for row, response in zip(rows, responses):
|
with ThreadPoolExecutor() as pool:
|
||||||
print(response)
|
vacancies_vectors = pool.map(embed_features, [vacancy_features.model_dump() for vacancy_features in vacancies_features])
|
||||||
|
|
||||||
|
for row, vacancy_features, vacancy_vectors in zip(rows, vacancies_features, vacancies_vectors):
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
vacancies.append(Vacancy(
|
link = f"https://t.me/{chat_username}/{telegram_id}"
|
||||||
external_id=id,
|
payload = {'content': message, 'features_json': vacancy_features.model_dump(), "link": link, "timestamp": timestamp}
|
||||||
job_title_id=job_title_map[response.job_title],
|
add_vectors("vacancies", id, vacancy_features.model_dump(), payload, vacancy_vectors)
|
||||||
min_salary_rub=response.min_salary_rub,
|
|
||||||
max_salary_rub=response.max_salary_rub,
|
|
||||||
content=message,
|
|
||||||
timestamp=timestamp,
|
|
||||||
link=f"https://t.me/{chat_username}/{telegram_id}",
|
|
||||||
))
|
|
||||||
print(Vacancy.objects.bulk_create(vacancies, ignore_conflicts=True))
|
|
||||||
|
|||||||
@ -15,14 +15,16 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
async def ahandle(self, *args, **options):
|
async def ahandle(self, *args, **options):
|
||||||
for customer_cv in CustomerCV.objects.all():
|
for customer_cv in CustomerCV.objects.all():
|
||||||
vacancy = get_next_vacancy(customer_cv)
|
result = get_next_vacancy(customer_cv)
|
||||||
if not vacancy:
|
if not result:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
recommendation, vacancy_content, link = result
|
||||||
|
|
||||||
await application.bot.send_message(
|
await application.bot.send_message(
|
||||||
chat_id=customer_cv.customer.chat_id,
|
chat_id=recommendation.customer.chat_id,
|
||||||
text=vacancy.content,
|
text=vacancy_content,
|
||||||
reply_markup=InlineKeyboardMarkup([[
|
reply_markup=InlineKeyboardMarkup([[
|
||||||
InlineKeyboardButton("Откликнуться", url=vacancy.link),
|
InlineKeyboardButton("Откликнуться", url=link),
|
||||||
]]),
|
]]),
|
||||||
)
|
)
|
||||||
|
|||||||
@ -14,6 +14,6 @@ class Command(BaseCommand):
|
|||||||
checkpointer.setup()
|
checkpointer.setup()
|
||||||
|
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||||
|
|
||||||
application.run_polling()
|
application.run_polling()
|
||||||
|
|||||||
@ -1,55 +0,0 @@
|
|||||||
# Generated by Django 5.2.7 on 2025-11-08 19:11
|
|
||||||
|
|
||||||
import django.db.models.deletion
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('main', '0008_alter_recommendedvacancy_vacancy_id'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.CreateModel(
|
|
||||||
name='JobTitle',
|
|
||||||
fields=[
|
|
||||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
||||||
('title', models.CharField(max_length=255, unique=True)),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='customercv',
|
|
||||||
name='max_salary_rub',
|
|
||||||
field=models.PositiveIntegerField(blank=True, default=None, null=True),
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='customercv',
|
|
||||||
name='min_salary_rub',
|
|
||||||
field=models.PositiveIntegerField(blank=True, default=None, null=True),
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='customercv',
|
|
||||||
name='job_title',
|
|
||||||
field=models.ForeignKey(default=0, on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle'),
|
|
||||||
preserve_default=False,
|
|
||||||
),
|
|
||||||
migrations.CreateModel(
|
|
||||||
name='Vacancy',
|
|
||||||
fields=[
|
|
||||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
||||||
('external_id', models.CharField(max_length=255, unique=True)),
|
|
||||||
('min_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
|
|
||||||
('max_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
|
|
||||||
('content', models.TextField()),
|
|
||||||
('timestamp', models.DateTimeField()),
|
|
||||||
('link', models.URLField()),
|
|
||||||
('job_title', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle')),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name='recommendedvacancy',
|
|
||||||
name='vacancy_id',
|
|
||||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.vacancy'),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
@ -1,4 +1,5 @@
|
|||||||
from django.db import models
|
from django.db import models
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
class Customer(models.Model):
|
class Customer(models.Model):
|
||||||
@ -16,18 +17,8 @@ class Customer(models.Model):
|
|||||||
db_table = "customers"
|
db_table = "customers"
|
||||||
|
|
||||||
|
|
||||||
class JobTitle(models.Model):
|
|
||||||
title = models.CharField(max_length=255, unique=True)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.title
|
|
||||||
|
|
||||||
|
|
||||||
class CustomerCV(models.Model):
|
class CustomerCV(models.Model):
|
||||||
customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
|
customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
|
||||||
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
|
|
||||||
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
|
||||||
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
|
||||||
content = models.TextField()
|
content = models.TextField()
|
||||||
created_at = models.DateTimeField(auto_now_add=True)
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
@ -40,21 +31,9 @@ class CustomerCV(models.Model):
|
|||||||
db_table = "customer_vcs"
|
db_table = "customer_vcs"
|
||||||
|
|
||||||
|
|
||||||
class Vacancy(models.Model):
|
|
||||||
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
|
|
||||||
external_id = models.CharField(max_length=255, unique=True)
|
|
||||||
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
|
||||||
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
|
||||||
content = models.TextField()
|
|
||||||
timestamp = models.DateTimeField()
|
|
||||||
link = models.URLField()
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.job_title.title
|
|
||||||
|
|
||||||
class RecommendedVacancy(models.Model):
|
class RecommendedVacancy(models.Model):
|
||||||
customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
|
customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
|
||||||
vacancy_id = models.ForeignKey(Vacancy, on_delete=models.CASCADE)
|
vacancy_id = models.BigIntegerField()
|
||||||
created_at = models.DateTimeField(auto_now_add=True)
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
objects = models.Manager()
|
objects = models.Manager()
|
||||||
@ -65,3 +44,19 @@ class RecommendedVacancy(models.Model):
|
|||||||
class Meta:
|
class Meta:
|
||||||
verbose_name_plural = 'Recommended Vacancies'
|
verbose_name_plural = 'Recommended Vacancies'
|
||||||
db_table = "recommended_vacancies"
|
db_table = "recommended_vacancies"
|
||||||
|
|
||||||
|
|
||||||
|
class VacancyFeatures(BaseModel):
|
||||||
|
job_title: str | None = None # Должность
|
||||||
|
employment_type: str | None = None # Тип занятости
|
||||||
|
work_format: str | None = None # Формат работы
|
||||||
|
experience: str | None = None # Опыт работы
|
||||||
|
position_level: str | None = None # Уровень позиции
|
||||||
|
industry: str | None = None # Отрасль / Сфера деятельности
|
||||||
|
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
|
||||||
|
location: str | None = None # География
|
||||||
|
salary_range: str | None = None # Зарплатные ожидания / вилка
|
||||||
|
languages: list[str] | None = None # Языки
|
||||||
|
education: str | None = None # Образование
|
||||||
|
schedule: str | None = None # График работы
|
||||||
|
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования
|
||||||
|
|||||||
@ -1,4 +1,154 @@
|
|||||||
from vacancies.main.models import RecommendedVacancy, Vacancy
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||||
|
from qdrant_client import QdrantClient, models
|
||||||
|
from qdrant_client.models import Filter, HasIdCondition
|
||||||
|
|
||||||
|
from vacancies.conf.settings import QDRANT_URL
|
||||||
|
from vacancies.main.models import RecommendedVacancy, VacancyFeatures
|
||||||
|
|
||||||
|
qdrant_client = QdrantClient(url=QDRANT_URL)
|
||||||
|
|
||||||
|
FEATURE_NAMES = [
|
||||||
|
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
||||||
|
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
|
||||||
|
]
|
||||||
|
|
||||||
|
weights = {
|
||||||
|
"job_title": 70,
|
||||||
|
"tech_stack": 10,
|
||||||
|
"salary_range": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
vectors_config = {
|
||||||
|
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
||||||
|
}
|
||||||
|
|
||||||
|
if not qdrant_client.collection_exists("vacancies"):
|
||||||
|
qdrant_client.create_collection(
|
||||||
|
collection_name="vacancies",
|
||||||
|
vectors_config=vectors_config,
|
||||||
|
)
|
||||||
|
qdrant_client.create_payload_index(
|
||||||
|
collection_name="vacancies",
|
||||||
|
field_name="timestamp",
|
||||||
|
field_schema="datetime",
|
||||||
|
)
|
||||||
|
if not qdrant_client.collection_exists("cvs"):
|
||||||
|
qdrant_client.create_collection(
|
||||||
|
collection_name="cvs",
|
||||||
|
vectors_config=vectors_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
|
||||||
|
|
||||||
|
def _prepare_texts(features):
|
||||||
|
texts = {}
|
||||||
|
for name in FEATURE_NAMES:
|
||||||
|
value = features.get(name)
|
||||||
|
if isinstance(value, list):
|
||||||
|
text = " ".join(value) if value else ""
|
||||||
|
else:
|
||||||
|
text = str(value) if value else ""
|
||||||
|
texts[name] = text
|
||||||
|
return texts
|
||||||
|
|
||||||
|
|
||||||
|
def embed_features(features):
|
||||||
|
features = {key: value for key, value in features.items() if value}
|
||||||
|
features_texts = _prepare_texts(features)
|
||||||
|
names, texts = features_texts.keys(), features_texts.values()
|
||||||
|
vectors = dict(zip(names, embedding.embed_documents(texts)))
|
||||||
|
return vectors
|
||||||
|
|
||||||
|
|
||||||
|
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors):
|
||||||
|
max_similarities = {}
|
||||||
|
for name, vec in vectors.items():
|
||||||
|
results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100)
|
||||||
|
for res in results.points:
|
||||||
|
max_similarities.setdefault(res.id, {})
|
||||||
|
max_similarities[res.id][name] = res.score
|
||||||
|
|
||||||
|
scored = []
|
||||||
|
for vid, feature_sims in max_similarities.items():
|
||||||
|
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
|
||||||
|
scored.append({"id": vid, "score": total})
|
||||||
|
|
||||||
|
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
if scored and scored[0]["score"] > 80: # threshold
|
||||||
|
return
|
||||||
|
|
||||||
|
qdrant_client.upsert(
|
||||||
|
collection_name=collection_name,
|
||||||
|
points=[models.PointStruct(id=_id, vector=vectors, payload=payload)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def search_similarities(query_filter: Filter, cv_id: int):
|
||||||
|
cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0]
|
||||||
|
|
||||||
|
max_similarities, vacancies_content = {}, {}
|
||||||
|
for name, vec in cv.vector.items():
|
||||||
|
results = qdrant_client.query_points(
|
||||||
|
collection_name="vacancies",
|
||||||
|
query=vec,
|
||||||
|
using=name,
|
||||||
|
limit=100000,
|
||||||
|
with_payload=True,
|
||||||
|
query_filter=query_filter,
|
||||||
|
)
|
||||||
|
for res in results.points:
|
||||||
|
max_similarities.setdefault(res.id, {})
|
||||||
|
vacancies_content.setdefault(res.id, {})
|
||||||
|
|
||||||
|
max_similarities[res.id][name] = res.score
|
||||||
|
vacancies_content[res.id]["content"] = res.payload["content"]
|
||||||
|
vacancies_content[res.id]["features_json"] = res.payload["features_json"]
|
||||||
|
vacancies_content[res.id]["link"] = res.payload["link"]
|
||||||
|
|
||||||
|
scored = []
|
||||||
|
for vid, feature_sims in max_similarities.items():
|
||||||
|
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
|
||||||
|
scored.append({
|
||||||
|
"id": vid,
|
||||||
|
"score": total,
|
||||||
|
"content": vacancies_content[vid]["content"],
|
||||||
|
"features_json": vacancies_content[vid]["features_json"],
|
||||||
|
"link": vacancies_content[vid]["link"],
|
||||||
|
"sims": feature_sims,
|
||||||
|
})
|
||||||
|
|
||||||
|
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
|
||||||
|
return scored[0]["id"], scored[0]["content"], scored[0]["link"]
|
||||||
|
|
||||||
|
|
||||||
|
def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]:
|
||||||
|
prompts = [
|
||||||
|
f"""
|
||||||
|
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
|
||||||
|
Features:
|
||||||
|
- job_title: Должность (e.g., DevOps, Python программист)
|
||||||
|
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
|
||||||
|
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
|
||||||
|
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
|
||||||
|
- position_level: Уровень позиции (e.g., Junior, Senior)
|
||||||
|
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
|
||||||
|
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
|
||||||
|
- location: География (e.g., Москва, Россия)
|
||||||
|
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
|
||||||
|
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
|
||||||
|
- education: Образование (e.g., Высшее, Среднее специальное)
|
||||||
|
- schedule: График работы (e.g., Полный день, Сменный)
|
||||||
|
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
|
||||||
|
Vacancy content:
|
||||||
|
{content}
|
||||||
|
"""
|
||||||
|
for content in contents
|
||||||
|
]
|
||||||
|
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
||||||
|
structured_llm = openai_client.with_structured_output(VacancyFeatures)
|
||||||
|
response = structured_llm.batch(prompts)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
def get_next_vacancy(customer_cv):
|
def get_next_vacancy(customer_cv):
|
||||||
@ -6,14 +156,16 @@ def get_next_vacancy(customer_cv):
|
|||||||
customer=customer_cv.customer,
|
customer=customer_cv.customer,
|
||||||
).values_list('vacancy_id', flat=True)
|
).values_list('vacancy_id', flat=True)
|
||||||
|
|
||||||
vacancy = Vacancy.objects.exclude(id__in=recommended_vacancy_ids).filter(
|
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
|
||||||
job_title=customer_cv.job_title,
|
result = search_similarities(query_filter, customer_cv.id)
|
||||||
min_salary_rub__gt=customer_cv.min_salary_rub,
|
if not result:
|
||||||
).first()
|
return None
|
||||||
|
|
||||||
RecommendedVacancy.objects.create(
|
search_result_id, vacancy_content, link = result
|
||||||
|
|
||||||
|
recommendation = RecommendedVacancy.objects.create(
|
||||||
customer=customer_cv.customer,
|
customer=customer_cv.customer,
|
||||||
vacancy=vacancy,
|
vacancy_id=search_result_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
return vacancy
|
return recommendation, vacancy_content, link
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user