Compare commits
20 Commits
change-rec
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| d3d4766abb | |||
| 3d9e1f2239 | |||
| af7dbf7246 | |||
| 5e7b44f611 | |||
| 9fd6c9ae97 | |||
| b4310bba55 | |||
| 70366f8f15 | |||
| 2cd2862db1 | |||
| dab73dc4c1 | |||
| c053a5676d | |||
| 80ddee9378 | |||
| 5d80864674 | |||
| b224ef29d3 | |||
| 6ff25281e2 | |||
| 41b3a250a7 | |||
| 96d8621d49 | |||
| b23502ee6a | |||
| 55ee3b7ba4 | |||
| b31ef06ec0 | |||
| 750683fb5c |
20
README.md
20
README.md
@ -1,24 +1,40 @@
|
||||
# vision-career-backend
|
||||
# vision-career
|
||||
|
||||
Sample `.env`:
|
||||
|
||||
```dotenv
|
||||
DEEPINFRA_API_TOKEN=your-token-here
|
||||
OPENAI_API_KEY=your-token-here
|
||||
OPENAI_PROXY=http://user:password@host:port
|
||||
|
||||
BOT_TOKEN=your-token-here
|
||||
|
||||
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
|
||||
|
||||
SECRET_KEY=secret
|
||||
DEBUG=true
|
||||
```
|
||||
|
||||
Commands:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
|
||||
KUBECONFIG=clickhouse-kubeconfig.yaml kubectl port-forward svc/clickhouse-clickhouse -n clickhouse 18123:8123
|
||||
|
||||
uv sync
|
||||
|
||||
uv run --env-file .env manage.py migrate
|
||||
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
|
||||
uv run --env-file .env manage.py collectstatic
|
||||
uv run --env-file .env manage.py runserver
|
||||
|
||||
uv run --env-file .env manage.py generate_recommended_vacancies
|
||||
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
|
||||
uv run --env-file .env manage.py runbot
|
||||
```
|
||||
|
||||
Production port-forwards:
|
||||
|
||||
```bash
|
||||
KUBECONFIG=production-kubeconfig.yaml kubectl port-forward svc/main-cluster-rw -n postgresql-cluster 5432
|
||||
```
|
||||
|
||||
14
compose.yaml
14
compose.yaml
@ -1,11 +1,4 @@
|
||||
services:
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
restart: always
|
||||
ports:
|
||||
- "127.0.0.1:6333:6333"
|
||||
volumes:
|
||||
- "/srv/vision-career/qdrant:/qdrant/storage"
|
||||
postgres:
|
||||
image: postgres:17-alpine3.20
|
||||
restart: always
|
||||
@ -16,3 +9,10 @@ services:
|
||||
- "127.0.0.1:5432:5432"
|
||||
volumes:
|
||||
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
restart: always
|
||||
ports:
|
||||
- "127.0.0.1:6333:6333"
|
||||
volumes:
|
||||
- "/srv/vision-career/qdrant:/qdrant/storage"
|
||||
|
||||
@ -7,15 +7,16 @@ requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"clickhouse-connect>=0.9.2",
|
||||
"django>=5.2.7",
|
||||
"flashrank>=0.2.10",
|
||||
"gunicorn>=23.0.0",
|
||||
"langchain>=0.3.27",
|
||||
"langchain-openai>=0.3.35",
|
||||
"langchain-qdrant>=1.1.0",
|
||||
"langgraph-checkpoint-postgres>=3.0.0",
|
||||
"psycopg[binary]>=3.2.12",
|
||||
"pydantic>=2.0",
|
||||
"pypdf>=6.1.2",
|
||||
"python-telegram-bot>=22.5",
|
||||
"qdrant-client>=1.16.1",
|
||||
"sentry-sdk>=2.43.0",
|
||||
"whitenoise>=6.11.0",
|
||||
]
|
||||
|
||||
@ -43,6 +43,7 @@ INSTALLED_APPS = [
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
'vacancies.main',
|
||||
'vacancies.hh_parser',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
@ -89,8 +90,19 @@ DATABASES = {
|
||||
"PORT": os.getenv("PG_PORT", "5432"),
|
||||
"CONN_HEALTH_CHECKS": True,
|
||||
},
|
||||
"hh_parser": {
|
||||
"ENGINE": "django.db.backends.postgresql",
|
||||
"NAME": os.getenv("HH_PARSER_PG_NAME", "postgres"),
|
||||
"USER": os.getenv("HH_PARSER_PG_USER", "postgres"),
|
||||
"PASSWORD": os.getenv("HH_PARSER_PG_PASSWORD", "postgres"),
|
||||
"HOST": os.getenv("HH_PARSER_PG_HOST", "localhost"),
|
||||
"PORT": os.getenv("HH_PARSER_PG_PORT", "5432"),
|
||||
"CONN_HEALTH_CHECKS": True,
|
||||
},
|
||||
}
|
||||
|
||||
DATABASE_ROUTERS = ["vacancies.hh_parser.routers.AppRouter"]
|
||||
|
||||
DB_URI = f"postgres://{DATABASES['default']['USER']}:{quote(DATABASES['default']['PASSWORD'])}@{DATABASES['default']['HOST']}:{DATABASES['default']['PORT']}/{DATABASES['default']['NAME']}?sslmode=disable"
|
||||
|
||||
|
||||
|
||||
0
vacancies/hh_parser/__init__.py
Normal file
0
vacancies/hh_parser/__init__.py
Normal file
9
vacancies/hh_parser/admin.py
Normal file
9
vacancies/hh_parser/admin.py
Normal file
@ -0,0 +1,9 @@
|
||||
from django.contrib import admin
|
||||
from vacancies.hh_parser import models
|
||||
|
||||
|
||||
@admin.register(models.Vacancy)
|
||||
class VacancyAdmin(admin.ModelAdmin):
|
||||
list_filter = ("source", "created_at")
|
||||
search_fields = ("title",)
|
||||
list_display = ("company", "title", "source", "link", "created_at")
|
||||
6
vacancies/hh_parser/apps.py
Normal file
6
vacancies/hh_parser/apps.py
Normal file
@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class HHParserConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'vacancies.hh_parser'
|
||||
32
vacancies/hh_parser/models.py
Normal file
32
vacancies/hh_parser/models.py
Normal file
@ -0,0 +1,32 @@
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Vacancy(models.Model):
|
||||
id = models.CharField(max_length=255, primary_key=True)
|
||||
title = models.CharField(max_length=255, null=True, blank=True)
|
||||
description = models.TextField(null=True, blank=True)
|
||||
min_payment = models.IntegerField(null=True, blank=True)
|
||||
max_payment = models.IntegerField(null=True, blank=True)
|
||||
payment_currency = models.CharField(max_length=255, null=True, blank=True)
|
||||
payment_type = models.CharField(max_length=255, null=True, blank=True)
|
||||
payment_period = models.CharField(max_length=255, null=True, blank=True)
|
||||
work_experience = models.CharField(max_length=255, null=True, blank=True)
|
||||
type_of_employment = models.CharField(max_length=255, null=True, blank=True)
|
||||
work_schedule = models.CharField(max_length=255, null=True, blank=True)
|
||||
work_format = models.CharField(max_length=255, null=True, blank=True)
|
||||
link = models.CharField(max_length=255, null=True, blank=True)
|
||||
address = models.CharField(max_length=255, null=True, blank=True)
|
||||
created_at = models.DateTimeField(null=True, blank=True)
|
||||
company = models.CharField(max_length=255, null=True, blank=True)
|
||||
processed = models.BooleanField()
|
||||
source = models.CharField()
|
||||
|
||||
objects = models.Manager()
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.source}: {self.company} -> {self.id} {self.title}'
|
||||
|
||||
class Meta:
|
||||
managed = False
|
||||
db_table = "vacancies"
|
||||
verbose_name_plural = 'Vacancies'
|
||||
23
vacancies/hh_parser/routers.py
Normal file
23
vacancies/hh_parser/routers.py
Normal file
@ -0,0 +1,23 @@
|
||||
class AppRouter:
|
||||
route_app_labels = {"hh_parser"}
|
||||
db_name = "hh_parser"
|
||||
|
||||
def db_for_read(self, model, **hints):
|
||||
if model._meta.app_label in self.route_app_labels:
|
||||
return self.db_name
|
||||
return None
|
||||
|
||||
def db_for_write(self, model, **hints):
|
||||
if model._meta.app_label in self.route_app_labels:
|
||||
return self.db_name
|
||||
return None
|
||||
|
||||
def allow_relation(self, obj1, obj2, **hints):
|
||||
if obj1._meta.app_label in self.route_app_labels and obj2._meta.app_label in self.route_app_labels:
|
||||
return True
|
||||
return None
|
||||
|
||||
def allow_migrate(self, db, app_label, model_name=None, **hints):
|
||||
if app_label in self.route_app_labels:
|
||||
return db == self.db_name
|
||||
return None
|
||||
@ -1,16 +1,27 @@
|
||||
from django.contrib import admin
|
||||
from vacancies.main import models
|
||||
|
||||
|
||||
@admin.register(models.Customer)
|
||||
class CustomerAdmin(admin.ModelAdmin):
|
||||
pass
|
||||
|
||||
|
||||
@admin.register(models.CustomerCV)
|
||||
class CustomerCVADMIN(admin.ModelAdmin):
|
||||
class CustomerCVAdmin(admin.ModelAdmin):
|
||||
pass
|
||||
|
||||
|
||||
@admin.register(models.RecommendedVacancy)
|
||||
class RecommendedVacancyAdmin(admin.ModelAdmin):
|
||||
pass
|
||||
|
||||
|
||||
@admin.register(models.Vacancy)
|
||||
class VacancyAdmin(admin.ModelAdmin):
|
||||
pass
|
||||
|
||||
|
||||
@admin.register(models.JobTitle)
|
||||
class JobTitleAdmin(admin.ModelAdmin):
|
||||
pass
|
||||
|
||||
@ -1,10 +1,14 @@
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
import traceback
|
||||
from typing import Literal
|
||||
|
||||
from asgiref.sync import sync_to_async
|
||||
from langchain.agents import create_agent
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
|
||||
from pydantic import BaseModel
|
||||
from pypdf import PdfReader
|
||||
from telegram import (
|
||||
InlineKeyboardButton,
|
||||
@ -22,21 +26,34 @@ from telegram.ext import (
|
||||
)
|
||||
|
||||
from vacancies.conf.settings import DB_URI
|
||||
from vacancies.main.models import Customer, CustomerCV
|
||||
from vacancies.main.vector_store import (
|
||||
add_vectors,
|
||||
batch_extract_features,
|
||||
get_next_vacancy,
|
||||
embed_features,
|
||||
)
|
||||
from vacancies.main import prompts
|
||||
from vacancies.main.models import Customer, CustomerCV, JobTitle
|
||||
from vacancies.main.recommendations import get_next_vacancy
|
||||
from django.conf import settings
|
||||
from qdrant_client import AsyncQdrantClient
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
Ты — карьерный копилот для ИТ. Ты можешь отвечать на любые вопросы по тематике карьеры.
|
||||
У тебя есть доступ к резюме пользователя при необходимости.
|
||||
Пиши кратко (до 5–6 строк, буллеты приветствуются).
|
||||
После полезного ответа предложи что-нибудь, чем ты можешь помочь еще.
|
||||
Отвечай простым текстом, не используй форматирование markdown.
|
||||
"""
|
||||
qdrant_client = AsyncQdrantClient(url=settings.QDRANT_URL)
|
||||
openai_client = AsyncOpenAI(base_url="https://openrouter.ai/api/v1")
|
||||
|
||||
|
||||
async def get_relevant_messages(query: str):
|
||||
"""Получает релевантные сообщения по запросу пользователя."""
|
||||
|
||||
embedding = await openai_client.embeddings.create(
|
||||
model="qwen/qwen3-embedding-8b",
|
||||
input=query,
|
||||
encoding_format="float",
|
||||
)
|
||||
|
||||
response = await qdrant_client.query_points(
|
||||
collection_name="messages",
|
||||
query=embedding.data[0].embedding,
|
||||
limit=20,
|
||||
)
|
||||
messages = [hit.payload["message"] for hit in response.points]
|
||||
|
||||
return "\n\n".join(messages)
|
||||
|
||||
|
||||
async def get_user_resume(user_id: int):
|
||||
@ -60,7 +77,7 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
|
||||
|
||||
async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
|
||||
await context.bot.send_message(update.effective_chat.id, "⏳ Обрабатываю твой запрос. Пожалуйста, подождите...")
|
||||
|
||||
customer_cv = await CustomerCV.objects.filter(customer__telegram_id=update.effective_user.id).afirst()
|
||||
if not customer_cv:
|
||||
@ -68,33 +85,36 @@ async def next_vacancy(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
|
||||
return
|
||||
|
||||
result = get_next_vacancy(customer_cv)
|
||||
if not result:
|
||||
vacancy = await asyncio.to_thread(get_next_vacancy, customer_cv)
|
||||
if not vacancy:
|
||||
message = "Вакансии закончились, возвращайтесь позже!"
|
||||
await context.bot.send_message(chat_id=update.effective_chat.id, text=message)
|
||||
return
|
||||
|
||||
recommendation, vacancy_content, link = result
|
||||
|
||||
await context.bot.send_message(
|
||||
chat_id=update.effective_chat.id,
|
||||
text=vacancy_content,
|
||||
parse_mode="Markdown",
|
||||
text=vacancy.get_formatted_response(),
|
||||
reply_markup=InlineKeyboardMarkup([[
|
||||
InlineKeyboardButton("Откликнуться", url=link),
|
||||
InlineKeyboardButton("Откликнуться", url=vacancy.link),
|
||||
]]),
|
||||
)
|
||||
|
||||
|
||||
async def prompt(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
async with AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer:
|
||||
chat_model = ChatOpenAI(
|
||||
model_name="openai/gpt-5-mini:online",
|
||||
openai_api_base="https://openrouter.ai/api/v1",
|
||||
)
|
||||
agent = create_agent(
|
||||
model=ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal"),
|
||||
tools=[get_user_resume],
|
||||
system_prompt=SYSTEM_PROMPT,
|
||||
model=chat_model,
|
||||
tools=[get_user_resume, get_relevant_messages],
|
||||
system_prompt=prompts.BOT_SYSTEM_PROMPT,
|
||||
checkpointer=checkpointer,
|
||||
)
|
||||
|
||||
message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
|
||||
message = await context.bot.send_message(update.effective_chat.id, "⏳ Обрабатываю твой запрос. Пожалуйста, подождите...")
|
||||
|
||||
response = await agent.ainvoke(
|
||||
input={"messages": [{"role": "user", "content": f'user_id = {update.effective_user.id}\n{update.message.text}'}]},
|
||||
@ -110,7 +130,7 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
|
||||
|
||||
|
||||
async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
message = await context.bot.send_message(update.effective_chat.id, "📝 Обрабатываю твой запрос. Пожалуйста, подождите...")
|
||||
message = await context.bot.send_message(update.effective_chat.id, "⏳ Обрабатываю твой запрос. Пожалуйста, подождите...")
|
||||
|
||||
if not update.message.document:
|
||||
await context.bot.send_message(chat_id=update.effective_chat.id, text="Не удалось прочитать информацию из файла! Попробуйте другой формат.")
|
||||
@ -122,17 +142,27 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
reader = PdfReader(buffer)
|
||||
resume = "\n".join(page.extract_text() for page in reader.pages)
|
||||
|
||||
db_job_titles = await sync_to_async(list)(JobTitle.objects.values_list('title', flat=True))
|
||||
job_title_map = await sync_to_async(dict)(JobTitle.objects.values_list('title', 'id'))
|
||||
|
||||
class Structure(BaseModel):
|
||||
job_titles: list[Literal[tuple(db_job_titles)]]
|
||||
min_salary_rub: int | None
|
||||
max_salary_rub: int | None
|
||||
|
||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", temperature=0, seed=42, top_p=1)
|
||||
structured_llm = openai_client.with_structured_output(Structure)
|
||||
|
||||
prompt = f'{prompts.STRUCTURED_OUTPUT_PROMPT} {resume}'
|
||||
response = await structured_llm.ainvoke(prompt)
|
||||
|
||||
customer = await Customer.objects.aget(telegram_id=update.effective_user.id)
|
||||
customer_cv, _ = await CustomerCV.objects.aupdate_or_create(customer=customer, defaults=dict(
|
||||
content=resume,
|
||||
min_salary_rub=response.min_salary_rub,
|
||||
max_salary_rub=response.max_salary_rub,
|
||||
))
|
||||
features = batch_extract_features(customer_cv.content)[0]
|
||||
add_vectors(
|
||||
"cvs",
|
||||
customer_cv.id,
|
||||
embed_features(features.model_dump())[0],
|
||||
{'content': customer_cv.content, 'features_json': features.model_dump()},
|
||||
)
|
||||
await customer_cv.job_titles.aset([job_title_map[job_title] for job_title in response.job_titles])
|
||||
|
||||
await context.bot.editMessageText("Отлично! Запомнил Ваше резюме.", update.effective_chat.id, message.id)
|
||||
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
from django.core.management import BaseCommand
|
||||
from django.utils import timezone
|
||||
from vacancies.hh_parser.models import Vacancy as ExternalVacancy
|
||||
from vacancies.main.models import Vacancy, JobTitle
|
||||
from flashrank import Ranker, RerankRequest
|
||||
import re
|
||||
|
||||
tags_regex = re.compile('<.*?>')
|
||||
|
||||
reranker = Ranker("ms-marco-TinyBERT-L-2-v2")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Collect vacancies from hh.ru parser"
|
||||
|
||||
def _remove_tags(self, text):
|
||||
return re.sub(tags_regex, "", text)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
job_titles = list(JobTitle.objects.values("id", "title"))
|
||||
passages = [{"text": job_title["title"], **job_title} for job_title in job_titles]
|
||||
|
||||
queryset = ExternalVacancy.objects.filter(title__isnull=False, description__isnull=False)
|
||||
total_vacancies = queryset.count()
|
||||
|
||||
for index, vacancy in enumerate(queryset):
|
||||
results = reranker.rerank(RerankRequest(query=vacancy.title, passages=passages))
|
||||
ordered_results = sorted(results, key=lambda i: i["score"], reverse=True)
|
||||
job_title_id = ordered_results[0]["id"]
|
||||
|
||||
vacancy, created = Vacancy.objects.get_or_create(
|
||||
external_id=vacancy.id,
|
||||
defaults=dict(
|
||||
job_title_id=job_title_id,
|
||||
min_salary_rub=vacancy.min_payment,
|
||||
max_salary_rub=vacancy.max_payment,
|
||||
company_name=vacancy.company,
|
||||
requirements=self._remove_tags(vacancy.description),
|
||||
content=self._remove_tags(vacancy.description),
|
||||
timestamp=timezone.make_aware(vacancy.created_at),
|
||||
link=vacancy.link,
|
||||
),
|
||||
)
|
||||
|
||||
print(f"{index+1}/{total_vacancies} Vacancy: {vacancy}, created: {created}")
|
||||
@ -1,18 +1,16 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import timedelta
|
||||
from itertools import batched
|
||||
from typing import Literal
|
||||
|
||||
import clickhouse_connect
|
||||
from django.core.management import BaseCommand
|
||||
from django.conf import settings
|
||||
from qdrant_client.models import OrderBy
|
||||
from django.core.management import BaseCommand
|
||||
from django.utils import timezone
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from vacancies.main.vector_store import (
|
||||
add_vectors,
|
||||
batch_extract_features,
|
||||
embed_features,
|
||||
qdrant_client,
|
||||
)
|
||||
from vacancies.main import prompts
|
||||
from vacancies.main.models import JobTitle, Vacancy
|
||||
|
||||
query = """
|
||||
SELECT DISTINCT ON (message) id, chat_username, telegram_id, message, timestamp
|
||||
@ -28,7 +26,8 @@ WHERE timestamp >= %(timestamp)s
|
||||
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
|
||||
]) >= 5
|
||||
AND arrayCount(x -> position(lower(message), x) > 0, [
|
||||
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж'
|
||||
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж',
|
||||
'не будет опубликовано'
|
||||
]) = 0
|
||||
ORDER BY timestamp ASC
|
||||
"""
|
||||
@ -38,23 +37,49 @@ class Command(BaseCommand):
|
||||
help = "Collect vacancies from telegram messages"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
response = qdrant_client.scroll(collection_name="vacancies", limit=1, order_by=OrderBy(key="timestamp", direction="desc"))
|
||||
last_point_timestamp = datetime.now() - timedelta(days=30)
|
||||
if response[0]:
|
||||
last_point_timestamp = response[0][0].payload["timestamp"]
|
||||
job_titles = JobTitle.objects.values_list('title', flat=True)
|
||||
job_title_map = dict(JobTitle.objects.values_list('title', 'id'))
|
||||
|
||||
class Structure(BaseModel):
|
||||
job_title: Literal[tuple(job_titles)]
|
||||
min_salary_rub: int | None
|
||||
max_salary_rub: int | None
|
||||
company_name: str
|
||||
requirements: str
|
||||
|
||||
openai_client = ChatOpenAI(
|
||||
model_name="openai/gpt-5-mini",
|
||||
openai_api_base="https://openrouter.ai/api/v1",
|
||||
temperature=0,
|
||||
seed=42,
|
||||
top_p=1,
|
||||
)
|
||||
structured_llm = openai_client.with_structured_output(Structure)
|
||||
|
||||
last_timestamp = timezone.now() - timedelta(days=30)
|
||||
if last_vacancy := Vacancy.objects.order_by("-timestamp").first():
|
||||
last_timestamp = last_vacancy.timestamp
|
||||
|
||||
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
|
||||
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_point_timestamp}).result_rows
|
||||
result_rows = clickhouse_client.query(query, parameters={"timestamp": last_timestamp}).result_rows
|
||||
|
||||
for index, rows in enumerate(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)):
|
||||
vacancies_features = batch_extract_features([row[3] for row in rows])
|
||||
|
||||
print(f"Processing {index+1}/{len(result_rows)//settings.COLLECT_VACANCIES_BATCH_SIZE}")
|
||||
with ThreadPoolExecutor() as pool:
|
||||
vacancies_vectors = pool.map(embed_features, [vacancy_features.model_dump() for vacancy_features in vacancies_features])
|
||||
|
||||
for row, vacancy_features, vacancy_vectors in zip(rows, vacancies_features, vacancies_vectors):
|
||||
batches = list(batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
|
||||
for index, rows in enumerate(batches):
|
||||
prompts = [f"{prompts.STRUCTURED_OUTPUT_PROMPT} {row[3]}" for row in rows]
|
||||
responses = structured_llm.batch(prompts)
|
||||
vacancies = []
|
||||
for row, response in zip(rows, responses):
|
||||
(id, chat_username, telegram_id, message, timestamp) = row
|
||||
link = f"https://t.me/{chat_username}/{telegram_id}"
|
||||
payload = {'content': message, 'features_json': vacancy_features.model_dump(), "link": link, "timestamp": timestamp}
|
||||
add_vectors("vacancies", id, vacancy_features.model_dump(), payload, vacancy_vectors)
|
||||
vacancies.append(Vacancy(
|
||||
external_id=id,
|
||||
job_title_id=job_title_map[response.job_title],
|
||||
min_salary_rub=response.min_salary_rub,
|
||||
max_salary_rub=response.max_salary_rub,
|
||||
company_name=response.company_name,
|
||||
requirements=response.requirements,
|
||||
content=message,
|
||||
timestamp=timezone.make_aware(timestamp),
|
||||
link=f"https://t.me/{chat_username}/{telegram_id}",
|
||||
))
|
||||
Vacancy.objects.bulk_create(vacancies, ignore_conflicts=True)
|
||||
print(f"Processed {index+1}/{len(batches)}")
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
import asyncio
|
||||
|
||||
from django.core.management import BaseCommand
|
||||
from vacancies.main.models import CustomerCV
|
||||
from vacancies.main.bot import application
|
||||
from vacancies.main.vector_store import get_next_vacancy
|
||||
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
|
||||
|
||||
from vacancies.main.bot import application
|
||||
from vacancies.main.models import CustomerCV
|
||||
from vacancies.main.recommendations import get_next_vacancy
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Generates new recommended vacancies"
|
||||
@ -15,16 +16,12 @@ class Command(BaseCommand):
|
||||
|
||||
async def ahandle(self, *args, **options):
|
||||
for customer_cv in CustomerCV.objects.all():
|
||||
result = get_next_vacancy(customer_cv)
|
||||
if not result:
|
||||
continue
|
||||
|
||||
recommendation, vacancy_content, link = result
|
||||
|
||||
if vacancy := get_next_vacancy(customer_cv):
|
||||
await application.bot.send_message(
|
||||
chat_id=recommendation.customer.chat_id,
|
||||
text=vacancy_content,
|
||||
chat_id=customer_cv.customer.chat_id,
|
||||
text=vacancy.get_formatted_response(),
|
||||
parse_mode="Markdown",
|
||||
reply_markup=InlineKeyboardMarkup([[
|
||||
InlineKeyboardButton("Откликнуться", url=link),
|
||||
InlineKeyboardButton("Откликнуться", url=vacancy.link),
|
||||
]]),
|
||||
)
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
import clickhouse_connect
|
||||
from django.conf import settings
|
||||
from django.core.management import BaseCommand
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import VectorParams, Distance, PointStruct
|
||||
from openai import OpenAI
|
||||
from itertools import batched
|
||||
|
||||
query = """
|
||||
SELECT DISTINCT ON (message) id, message
|
||||
FROM telegram_parser_chatmessage
|
||||
WHERE timestamp >= now() - INTERVAL 30 DAYS AND length(message) > 200
|
||||
AND position(message, '?') = 0 AND position(message, 'spam') = 0
|
||||
ORDER BY timestamp ASC
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Sync clickhouse and qdrant"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
clickhouse_client = clickhouse_connect.create_client(host=settings.CLICKHOUSE_HOST, port=settings.CLICKHOUSE_PORT)
|
||||
|
||||
qdrant_client = QdrantClient(url=settings.QDRANT_URL)
|
||||
if not qdrant_client.collection_exists("messages"):
|
||||
qdrant_client.create_collection(
|
||||
collection_name="messages",
|
||||
vectors_config=VectorParams(size=4096, distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
openai_client = OpenAI(base_url="https://openrouter.ai/api/v1")
|
||||
|
||||
result_rows = clickhouse_client.query(query).result_rows
|
||||
batches = list(batched(result_rows, 100))
|
||||
batches_quantity = len(batches)
|
||||
for index, batch in enumerate(batches):
|
||||
ids, messages = list(zip(*batch))
|
||||
embedding = openai_client.embeddings.create(model="qwen/qwen3-embedding-8b", input=messages, encoding_format="float")
|
||||
embeddings = [row.embedding for row in embedding.data]
|
||||
|
||||
qdrant_client.upsert(
|
||||
collection_name="messages",
|
||||
points=[
|
||||
PointStruct(id=idx, vector=vector, payload={"message": message})
|
||||
for idx, message, vector in zip(ids, messages, embeddings)
|
||||
]
|
||||
)
|
||||
print(f"{index+1}/{batches_quantity} processed")
|
||||
@ -0,0 +1,55 @@
|
||||
# Generated by Django 5.2.7 on 2025-11-08 19:11
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0008_alter_recommendedvacancy_vacancy_id'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='JobTitle',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('title', models.CharField(max_length=255, unique=True)),
|
||||
],
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='customercv',
|
||||
name='max_salary_rub',
|
||||
field=models.PositiveIntegerField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='customercv',
|
||||
name='min_salary_rub',
|
||||
field=models.PositiveIntegerField(blank=True, default=None, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='customercv',
|
||||
name='job_title',
|
||||
field=models.ForeignKey(default=0, on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle'),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Vacancy',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('external_id', models.CharField(max_length=255, unique=True)),
|
||||
('min_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
|
||||
('max_salary_rub', models.PositiveIntegerField(blank=True, default=None, null=True)),
|
||||
('content', models.TextField()),
|
||||
('timestamp', models.DateTimeField()),
|
||||
('link', models.URLField()),
|
||||
('job_title', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.jobtitle')),
|
||||
],
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='recommendedvacancy',
|
||||
name='vacancy_id',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='main.vacancy'),
|
||||
),
|
||||
]
|
||||
@ -0,0 +1,18 @@
|
||||
# Generated by Django 5.2.7 on 2025-11-09 08:06
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0009_jobtitle_customercv_max_salary_rub_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RenameField(
|
||||
model_name='recommendedvacancy',
|
||||
old_name='vacancy_id',
|
||||
new_name='vacancy',
|
||||
),
|
||||
]
|
||||
@ -0,0 +1,33 @@
|
||||
# Generated by Django 5.2.7 on 2025-11-09 09:35
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0010_rename_vacancy_id_recommendedvacancy_vacancy'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='customercv',
|
||||
name='job_title',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='customercv',
|
||||
name='job_titles',
|
||||
field=models.ManyToManyField(related_name='vacancies', to='main.jobtitle'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='recommendedvacancy',
|
||||
name='customer',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.customer'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='recommendedvacancy',
|
||||
name='vacancy',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='recommended_vacancies', to='main.vacancy'),
|
||||
),
|
||||
]
|
||||
@ -0,0 +1,25 @@
|
||||
# Generated by Django 5.2.7 on 2025-11-09 19:56
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0011_remove_customercv_job_title_customercv_job_titles_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='vacancy',
|
||||
name='company_name',
|
||||
field=models.CharField(default='test', max_length=255),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='vacancy',
|
||||
name='requirements',
|
||||
field=models.TextField(default='test'),
|
||||
preserve_default=False,
|
||||
),
|
||||
]
|
||||
17
vacancies/main/migrations/0013_alter_vacancy_options.py
Normal file
17
vacancies/main/migrations/0013_alter_vacancy_options.py
Normal file
@ -0,0 +1,17 @@
|
||||
# Generated by Django 5.2.7 on 2025-11-30 11:32
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('main', '0012_vacancy_company_name_vacancy_requirements'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='vacancy',
|
||||
options={'verbose_name_plural': 'Vacancies'},
|
||||
),
|
||||
]
|
||||
@ -1,5 +1,4 @@
|
||||
from django.db import models
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Customer(models.Model):
|
||||
@ -17,8 +16,18 @@ class Customer(models.Model):
|
||||
db_table = "customers"
|
||||
|
||||
|
||||
class JobTitle(models.Model):
|
||||
title = models.CharField(max_length=255, unique=True)
|
||||
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
|
||||
class CustomerCV(models.Model):
|
||||
customer = models.OneToOneField(Customer, on_delete=models.CASCADE)
|
||||
job_titles = models.ManyToManyField(JobTitle, related_name="vacancies")
|
||||
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
||||
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
||||
content = models.TextField()
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
@ -31,9 +40,43 @@ class CustomerCV(models.Model):
|
||||
db_table = "customer_vcs"
|
||||
|
||||
|
||||
class Vacancy(models.Model):
|
||||
job_title = models.ForeignKey(JobTitle, on_delete=models.CASCADE)
|
||||
external_id = models.CharField(max_length=255, unique=True)
|
||||
min_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
||||
max_salary_rub = models.PositiveIntegerField(null=True, blank=True, default=None)
|
||||
company_name = models.CharField(max_length=255)
|
||||
requirements = models.TextField()
|
||||
content = models.TextField()
|
||||
timestamp = models.DateTimeField()
|
||||
link = models.URLField()
|
||||
|
||||
def __str__(self):
|
||||
return self.job_title.title
|
||||
|
||||
def get_formatted_response(self):
|
||||
response = f"""
|
||||
💼 **Вакансия**: {self.job_title}
|
||||
\n🏢 **Компания**: {self.company_name}
|
||||
\n📝 **Требования**: {self.requirements}
|
||||
"""
|
||||
if self.min_salary_rub:
|
||||
if self.max_salary_rub:
|
||||
response += f"\n💸 **ЗП**: от {self.min_salary_rub} т.р."
|
||||
else:
|
||||
response += f"\n💸 **ЗП**: {self.min_salary_rub} т.р. - {self.max_salary_rub} т.р."
|
||||
elif self.max_salary_rub:
|
||||
response += f"\n💸 **ЗП**: до {self.max_salary_rub} т.р."
|
||||
|
||||
return response
|
||||
|
||||
class Meta:
|
||||
verbose_name_plural = 'Vacancies'
|
||||
|
||||
|
||||
class RecommendedVacancy(models.Model):
|
||||
customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
|
||||
vacancy_id = models.BigIntegerField()
|
||||
customer = models.ForeignKey(Customer, on_delete=models.CASCADE, related_name="recommended_vacancies")
|
||||
vacancy = models.ForeignKey(Vacancy, on_delete=models.CASCADE, related_name="recommended_vacancies")
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
objects = models.Manager()
|
||||
@ -44,19 +87,3 @@ class RecommendedVacancy(models.Model):
|
||||
class Meta:
|
||||
verbose_name_plural = 'Recommended Vacancies'
|
||||
db_table = "recommended_vacancies"
|
||||
|
||||
|
||||
class VacancyFeatures(BaseModel):
|
||||
job_title: str | None = None # Должность
|
||||
employment_type: str | None = None # Тип занятости
|
||||
work_format: str | None = None # Формат работы
|
||||
experience: str | None = None # Опыт работы
|
||||
position_level: str | None = None # Уровень позиции
|
||||
industry: str | None = None # Отрасль / Сфера деятельности
|
||||
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
|
||||
location: str | None = None # География
|
||||
salary_range: str | None = None # Зарплатные ожидания / вилка
|
||||
languages: list[str] | None = None # Языки
|
||||
education: str | None = None # Образование
|
||||
schedule: str | None = None # График работы
|
||||
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования
|
||||
|
||||
68
vacancies/main/prompts.py
Normal file
68
vacancies/main/prompts.py
Normal file
@ -0,0 +1,68 @@
|
||||
BOT_SYSTEM_PROMPT = """
|
||||
Ты — IT Career Copilot, высококвалифицированный HR-советчик и эксперт по рынку труда в ИТ. Твоя главная задача — предоставлять пользователю практические и стратегические рекомендации по любым вопросам, связанным с карьерой в технологической сфере (поиск работы, развитие навыков, переговоры по зарплате, адаптация, увольнение).
|
||||
|
||||
Твой стиль общения: Фокус на действии (Action-Oriented): Отвечай максимально конкретно и применимо, предлагая шаги, которые пользователь может предпринять немедленно.
|
||||
|
||||
Консультация: Используй инсайты и данные о текущих трендах ИТ-рынка.
|
||||
|
||||
Краткость: Пиши лаконично, не более 4-5 содержательных пунктов или предложений. Используй маркированные списки (буллеты) для структурирования информации.
|
||||
|
||||
Формат: Отвечай простым текстом, не используя форматирование Markdown (заголовки, жирный шрифт, курсив, кодблоки).
|
||||
|
||||
Использование данных: У тебя есть дополнительный доступ к резюме пользователя (или фрагментам его карьерной истории) для персонализации ответов, а также ты можешь обратиться к базе данных с релевантными сообщениями, если тебе требуются дополнительные данные, например реальные данные о зарплатах или мнения людей.
|
||||
|
||||
Завершение: После ответа обязательно предложи один-два релевантных следующих шага, чтобы продолжить карьерное планирование.
|
||||
"""
|
||||
|
||||
|
||||
STRUCTURED_OUTPUT_PROMPT = """
|
||||
You are an HR specialist. Your task is to review vacansies and independently select a suitable topic (e.g., DevSecOps, Java Developer, Information Security Specialist, etc.).
|
||||
You also need to analyze vacansies and structure the information from them according to the scheme.
|
||||
|
||||
You don't need to change or invent anything in the job posting below. You only need to structure the information provided.
|
||||
Example vacancy:
|
||||
'Network Security Team lead - Infrastructure Security, Wildberries 💜
|
||||
|
||||
ЗП: до 500 000 рублей net
|
||||
Уровень: Lead
|
||||
Формат работы: удалёнка или гибрид
|
||||
|
||||
🚀 Вместе с масштабным развитием IT направления Wildberries и Russ развивает информационную безопасность. Мы решаем сложные и разнообразные задачи: от повышения защищенности каждого сервиса до развития безопасности в рамках всей нашей инфраструктуры.
|
||||
|
||||
Мы ищем эксперта, который вместе с командой займется разработкой сложных технических решений и внедрением практик безопасности для повышения защищенности нашей сетевой инфраструктуры.
|
||||
|
||||
📝 Основные задачи:
|
||||
• Составление проектных планов и управление командой
|
||||
• Взаимодействие с сетевыми командами для сбора потребностей и согласования технических решений
|
||||
• Построение плана развития безопасности сетевой инфраструктуры
|
||||
• Внедрение практик ИБ и контроль состояния защищенности внешнего и внутреннего сетевого периметра
|
||||
• Разработка и внедрение мер по повышению прозрачности и контролируемости сетевых доступов в компании
|
||||
|
||||
💫 Необходимый опыт и навыки:
|
||||
• Опыт в организации командной работы
|
||||
• Опыт в построении сложных кросс-командных процессов
|
||||
• Умение разрабатывать комплексные решения по безопасности для серверной инфраструктуры на базе Linux
|
||||
• Хорошее знание сетевых технологий
|
||||
• Опыт решения проблем ИБ в сетевой инфраструктуре
|
||||
• Знание сетевых атак и способов защиты от них
|
||||
• Опыт работы с NGFW
|
||||
|
||||
🔥 Что мы предлагаем:
|
||||
• Полная удаленка или свободное посещение офисов в Москве и Санкт-Петербурге
|
||||
• IT-ипотека и оформление в аккредитованную IT-компанию
|
||||
• Бесплатное питание в офисах, ДМС со стоматологией (после испытательного срока)
|
||||
• Оплачиваемые Day Off, корпоративное обучение и IT-мероприятия
|
||||
|
||||
💘 Контакты: @Alens_HR'
|
||||
|
||||
Structured output of the example vacansy:
|
||||
{
|
||||
job_title: "Network Security Team lead - Infrastructure Security",
|
||||
company_name: "Wildberries",
|
||||
min_salary_rub: None,
|
||||
max_salary_rub: 500000,
|
||||
requirements: "Опыт в организации командной работы. Опыт в построении сложных кросс-командных процессов. Умение разрабатывать комплексные решения по безопасности для серверной инфраструктуры на базе Linux, Хорошее знание сетевых технологий. Опыт решения проблем ИБ в сетевой инфраструктуре. Знание сетевых атак и способов защиты от них. Опыт работы с NGFW]
|
||||
}
|
||||
|
||||
Vacancy:
|
||||
"""
|
||||
17
vacancies/main/recommendations.py
Normal file
17
vacancies/main/recommendations.py
Normal file
@ -0,0 +1,17 @@
|
||||
from django.db.models import Q
|
||||
|
||||
from vacancies.main.models import Vacancy
|
||||
|
||||
|
||||
def get_next_vacancy(customer_cv):
|
||||
vacancy = Vacancy.objects.filter(
|
||||
~Q(id__in=customer_cv.customer.recommended_vacancies.values_list("vacancy_id", flat=True)),
|
||||
job_title__title__in=customer_cv.job_titles.values_list("title", flat=True),
|
||||
).filter(
|
||||
(Q(min_salary_rub__isnull=True) | Q(min_salary_rub__gt=customer_cv.min_salary_rub)) |
|
||||
(Q(max_salary_rub__isnull=False) & Q(max_salary_rub__gte=customer_cv.min_salary_rub)),
|
||||
).order_by("-timestamp").first()
|
||||
if vacancy:
|
||||
customer_cv.customer.recommended_vacancies.create(vacancy=vacancy)
|
||||
|
||||
return vacancy
|
||||
@ -1,171 +0,0 @@
|
||||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||
from qdrant_client import QdrantClient, models
|
||||
from qdrant_client.models import Filter, HasIdCondition
|
||||
|
||||
from vacancies.conf.settings import QDRANT_URL
|
||||
from vacancies.main.models import RecommendedVacancy, VacancyFeatures
|
||||
|
||||
qdrant_client = QdrantClient(url=QDRANT_URL)
|
||||
|
||||
FEATURE_NAMES = [
|
||||
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
||||
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
|
||||
]
|
||||
|
||||
weights = {
|
||||
"job_title": 70,
|
||||
"tech_stack": 10,
|
||||
"salary_range": 10,
|
||||
}
|
||||
|
||||
vectors_config = {
|
||||
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
||||
}
|
||||
|
||||
if not qdrant_client.collection_exists("vacancies"):
|
||||
qdrant_client.create_collection(
|
||||
collection_name="vacancies",
|
||||
vectors_config=vectors_config,
|
||||
)
|
||||
qdrant_client.create_payload_index(
|
||||
collection_name="vacancies",
|
||||
field_name="timestamp",
|
||||
field_schema="datetime",
|
||||
)
|
||||
if not qdrant_client.collection_exists("cvs"):
|
||||
qdrant_client.create_collection(
|
||||
collection_name="cvs",
|
||||
vectors_config=vectors_config,
|
||||
)
|
||||
|
||||
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
|
||||
|
||||
def _prepare_texts(features):
|
||||
texts = {}
|
||||
for name in FEATURE_NAMES:
|
||||
value = features.get(name)
|
||||
if isinstance(value, list):
|
||||
text = " ".join(value) if value else ""
|
||||
else:
|
||||
text = str(value) if value else ""
|
||||
texts[name] = text
|
||||
return texts
|
||||
|
||||
|
||||
def embed_features(features):
|
||||
features = {key: value for key, value in features.items() if value}
|
||||
features_texts = _prepare_texts(features)
|
||||
names, texts = features_texts.keys(), features_texts.values()
|
||||
vectors = dict(zip(names, embedding.embed_documents(texts)))
|
||||
return vectors
|
||||
|
||||
|
||||
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors):
|
||||
max_similarities = {}
|
||||
for name, vec in vectors.items():
|
||||
results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100)
|
||||
for res in results.points:
|
||||
max_similarities.setdefault(res.id, {})
|
||||
max_similarities[res.id][name] = res.score
|
||||
|
||||
scored = []
|
||||
for vid, feature_sims in max_similarities.items():
|
||||
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
|
||||
scored.append({"id": vid, "score": total})
|
||||
|
||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||
if scored and scored[0]["score"] > 80: # threshold
|
||||
return
|
||||
|
||||
qdrant_client.upsert(
|
||||
collection_name=collection_name,
|
||||
points=[models.PointStruct(id=_id, vector=vectors, payload=payload)]
|
||||
)
|
||||
|
||||
|
||||
def search_similarities(query_filter: Filter, cv_id: int):
|
||||
cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0]
|
||||
|
||||
max_similarities, vacancies_content = {}, {}
|
||||
for name, vec in cv.vector.items():
|
||||
results = qdrant_client.query_points(
|
||||
collection_name="vacancies",
|
||||
query=vec,
|
||||
using=name,
|
||||
limit=100000,
|
||||
with_payload=True,
|
||||
query_filter=query_filter,
|
||||
)
|
||||
for res in results.points:
|
||||
max_similarities.setdefault(res.id, {})
|
||||
vacancies_content.setdefault(res.id, {})
|
||||
|
||||
max_similarities[res.id][name] = res.score
|
||||
vacancies_content[res.id]["content"] = res.payload["content"]
|
||||
vacancies_content[res.id]["features_json"] = res.payload["features_json"]
|
||||
vacancies_content[res.id]["link"] = res.payload["link"]
|
||||
|
||||
scored = []
|
||||
for vid, feature_sims in max_similarities.items():
|
||||
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
|
||||
scored.append({
|
||||
"id": vid,
|
||||
"score": total,
|
||||
"content": vacancies_content[vid]["content"],
|
||||
"features_json": vacancies_content[vid]["features_json"],
|
||||
"link": vacancies_content[vid]["link"],
|
||||
"sims": feature_sims,
|
||||
})
|
||||
|
||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||
|
||||
return scored[0]["id"], scored[0]["content"], scored[0]["link"]
|
||||
|
||||
|
||||
def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]:
|
||||
prompts = [
|
||||
f"""
|
||||
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
|
||||
Features:
|
||||
- job_title: Должность (e.g., DevOps, Python программист)
|
||||
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
|
||||
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
|
||||
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
|
||||
- position_level: Уровень позиции (e.g., Junior, Senior)
|
||||
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
|
||||
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
|
||||
- location: География (e.g., Москва, Россия)
|
||||
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
|
||||
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
|
||||
- education: Образование (e.g., Высшее, Среднее специальное)
|
||||
- schedule: График работы (e.g., Полный день, Сменный)
|
||||
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
|
||||
Vacancy content:
|
||||
{content}
|
||||
"""
|
||||
for content in contents
|
||||
]
|
||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
||||
structured_llm = openai_client.with_structured_output(VacancyFeatures)
|
||||
response = structured_llm.batch(prompts)
|
||||
return response
|
||||
|
||||
|
||||
def get_next_vacancy(customer_cv):
|
||||
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
|
||||
customer=customer_cv.customer,
|
||||
).values_list('vacancy_id', flat=True)
|
||||
|
||||
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
|
||||
result = search_similarities(query_filter, customer_cv.id)
|
||||
if not result:
|
||||
return None
|
||||
|
||||
search_result_id, vacancy_content, link = result
|
||||
|
||||
recommendation = RecommendedVacancy.objects.create(
|
||||
customer=customer_cv.customer,
|
||||
vacancy_id=search_result_id,
|
||||
)
|
||||
|
||||
return recommendation, vacancy_content, link
|
||||
Loading…
Reference in New Issue
Block a user