Simplify vacancy recommendations

This commit is contained in:
estromenko 2025-10-26 18:06:45 +03:00
parent b72460110e
commit fb95e6e799
8 changed files with 53 additions and 69 deletions

View File

@ -18,7 +18,6 @@ uv run --env-file .env manage.py migrate
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
uv run --env-file .env manage.py runserver uv run --env-file .env manage.py runserver
uv run --env-file .env manage.py process_vacancies
uv run --env-file .env manage.py generate_recommended_vacancies uv run --env-file .env manage.py generate_recommended_vacancies
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
uv run --env-file .env manage.py runbot uv run --env-file .env manage.py runbot

View File

@ -1,29 +0,0 @@
from langchain_openai import ChatOpenAI
from vacancies.main.models import VacancyFeatures
def extract_vacancy_features(content: str) -> VacancyFeatures:
"""Extract features from vacancy content using structured output."""
prompt = f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal")
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.invoke(prompt)
return response

View File

@ -1,8 +1,7 @@
from django.core.management import BaseCommand from django.core.management import BaseCommand
from vacancies.main.models import Vacancy from vacancies.main.models import Vacancy
import clickhouse_connect import clickhouse_connect
from vacancies.main.vector_store import add_vacancy_vectors from vacancies.main.vector_store import add_vacancy_vectors, extract_vacancy_features
from vacancies.main.features_extractor import extract_vacancy_features
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123) clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
@ -29,10 +28,11 @@ class Command(BaseCommand):
(id, chat_username, telegram_id, message, timestamp) = row (id, chat_username, telegram_id, message, timestamp) = row
link = f"https://t.me/{chat_username}/{telegram_id}" link = f"https://t.me/{chat_username}/{telegram_id}"
features = extract_vacancy_features(message)
vacancy, created = Vacancy.objects.get_or_create( vacancy, created = Vacancy.objects.get_or_create(
link=link, link=link,
defaults={'content': message} defaults={'content': message, 'features_json': features.model_dump()}
) )
print(index, link) print(index, link)
add_vacancy_vectors(vacancy.id, extract_vacancy_features(message).model_dump(), {"link": link}) add_vacancy_vectors(vacancy.id, features.model_dump(), {"link": link})

View File

@ -15,19 +15,14 @@ class Command(BaseCommand):
for customer_cv in customer_cvs: for customer_cv in customer_cvs:
features = extract_vacancy_features(customer_cv.content) features = extract_vacancy_features(customer_cv.content)
print(features)
recommended_vacancy_ids = RecommendedVacancy.objects.filter( recommended_vacancy_ids = RecommendedVacancy.objects.filter(
customer=customer_cv.customer customer=customer_cv.customer,
).values_list('vacancy_id', flat=True) ).values_list('vacancy_id', flat=True)
query_filter = Filter( query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
must_not = [
HasIdCondition(has_id=recommended_vacancy_ids),
]
)
search_result_id = search_similarities(features.model_dump(), query_filter) search_result_id = search_similarities(features.model_dump(), query_filter)
recommendation, _ = RecommendedVacancy.objects.get_or_create( recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer, customer=customer_cv.customer,
vacancy_id=search_result_id, vacancy_id=search_result_id,
) )

View File

@ -1,20 +0,0 @@
from django.core.management import BaseCommand
from vacancies.main.models import Vacancy
from vacancies.main.features_extractor import extract_vacancy_features
from vacancies.main.vector_store import add_vacancy_vectors
class Command(BaseCommand):
help = "Process vacancies: extract features and index in vector store"
def handle(self, *args, **options):
vacancies = Vacancy.objects.filter(is_processed=False)
len_vacancies = len(vacancies)
for index, vacancy in enumerate(vacancies):
print(f"Processing {index}/{len_vacancies} {vacancy}")
features = extract_vacancy_features(vacancy.content)
vacancy.features_json = features.model_dump()
vacancy.is_processed = True
vacancy.save()
add_vacancy_vectors(vacancy.id, features.model_dump(), {"link": vacancy.link})

View File

@ -0,0 +1,17 @@
# Generated by Django 5.2.7 on 2025-10-26 15:06
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('main', '0004_vacancy_features_json_vacancy_is_processed'),
]
operations = [
migrations.RemoveField(
model_name='vacancy',
name='is_processed',
),
]

View File

@ -35,7 +35,6 @@ class Vacancy(models.Model):
content = models.TextField() content = models.TextField()
link = models.URLField(unique=True) link = models.URLField(unique=True)
features_json = models.JSONField(null=True, blank=True) features_json = models.JSONField(null=True, blank=True)
is_processed = models.BooleanField(default=False)
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
objects = models.Manager() objects = models.Manager()

View File

@ -1,10 +1,12 @@
from qdrant_client import models from qdrant_client import models
from langchain_openai import OpenAIEmbeddings from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from qdrant_client import QdrantClient from qdrant_client import QdrantClient
from qdrant_client.models import Filter from qdrant_client.models import Filter
from vacancies.main.models import VacancyFeatures
# client = QdrantClient(path="./embeddings") client = QdrantClient(path="./embeddings")
client = QdrantClient(url="http://localhost:6333") #client = QdrantClient(url="http://localhost:6333")
FEATURE_NAMES = [ FEATURE_NAMES = [
"employment_type", "work_format", "experience", "position_level", "industry", "tech_stack", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
@ -59,10 +61,6 @@ def add_vacancy_vectors(vacancy_id: int, features: dict, payload: dict):
def search_similarities(query_features: dict, query_filter: Filter) -> list[dict]: def search_similarities(query_features: dict, query_filter: Filter) -> list[dict]:
"""Search vacancies using sum of max similarities.
For each feature, compute similarities, then for each vacancy, take max per feature, sum.
Return top vacancies.
"""
texts = _prepare_texts(query_features) texts = _prepare_texts(query_features)
vectors = {} vectors = {}
for name, text in texts.items(): for name, text in texts.items():
@ -95,3 +93,28 @@ def search_similarities(query_features: dict, query_filter: Filter) -> list[dict
scored.sort(key=lambda x: x["score"], reverse=True) scored.sort(key=lambda x: x["score"], reverse=True)
return scored[0]["id"] return scored[0]["id"]
def extract_vacancy_features(content: str) -> VacancyFeatures:
prompt = f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal")
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.invoke(prompt)
return response