Compare commits
No commits in common. "b72460110ef82fe023c064ac8ae87458b0b4b5ee" and "d107d69d776ec7488685f994f0855306e43a7bc8" have entirely different histories.
b72460110e
...
d107d69d77
@ -18,7 +18,6 @@ uv run --env-file .env manage.py migrate
|
|||||||
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
|
uv run --env-file .env manage.py createsuperuser --username stromenko_es --email estromenko@mail.ru
|
||||||
uv run --env-file .env manage.py runserver
|
uv run --env-file .env manage.py runserver
|
||||||
|
|
||||||
uv run --env-file .env manage.py process_vacancies
|
|
||||||
uv run --env-file .env manage.py generate_recommended_vacancies
|
uv run --env-file .env manage.py generate_recommended_vacancies
|
||||||
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
|
uv run --env-file .env manage.py collect_vacancies_from_telegram_messages
|
||||||
uv run --env-file .env manage.py runbot
|
uv run --env-file .env manage.py runbot
|
||||||
|
|||||||
@ -10,7 +10,6 @@ dependencies = [
|
|||||||
"langchain>=0.3.27",
|
"langchain>=0.3.27",
|
||||||
"langchain-openai>=0.3.35",
|
"langchain-openai>=0.3.35",
|
||||||
"langchain-qdrant>=1.1.0",
|
"langchain-qdrant>=1.1.0",
|
||||||
"pydantic>=2.0",
|
|
||||||
"pypdf>=6.1.2",
|
"pypdf>=6.1.2",
|
||||||
"python-telegram-bot>=22.5",
|
"python-telegram-bot>=22.5",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -1,29 +0,0 @@
|
|||||||
from langchain_openai import ChatOpenAI
|
|
||||||
from vacancies.main.models import VacancyFeatures
|
|
||||||
|
|
||||||
|
|
||||||
def extract_vacancy_features(content: str) -> VacancyFeatures:
|
|
||||||
"""Extract features from vacancy content using structured output."""
|
|
||||||
|
|
||||||
prompt = f"""
|
|
||||||
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
|
|
||||||
Features:
|
|
||||||
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
|
|
||||||
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
|
|
||||||
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
|
|
||||||
- position_level: Уровень позиции (e.g., Junior, Senior)
|
|
||||||
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
|
|
||||||
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
|
|
||||||
- location: География (e.g., Москва, Россия)
|
|
||||||
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
|
|
||||||
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
|
|
||||||
- education: Образование (e.g., Высшее, Среднее специальное)
|
|
||||||
- schedule: График работы (e.g., Полный день, Сменный)
|
|
||||||
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
|
|
||||||
Vacancy content:
|
|
||||||
{content}
|
|
||||||
"""
|
|
||||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal")
|
|
||||||
structured_llm = openai_client.with_structured_output(VacancyFeatures)
|
|
||||||
response = structured_llm.invoke(prompt)
|
|
||||||
return response
|
|
||||||
@ -1,8 +1,8 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from vacancies.main.models import Vacancy
|
from vacancies.main.models import Vacancy
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vacancy_vectors
|
from langchain_core.documents import Document
|
||||||
from vacancies.main.features_extractor import extract_vacancy_features
|
from vacancies.main.vector_store import vector_store
|
||||||
|
|
||||||
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
||||||
|
|
||||||
@ -25,14 +25,21 @@ class Command(BaseCommand):
|
|||||||
help = "Collect vacancies from telegram messages"
|
help = "Collect vacancies from telegram messages"
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
ids, documents = [], []
|
||||||
|
|
||||||
for index, row in enumerate(clickhouse_client.query(query).result_rows):
|
for index, row in enumerate(clickhouse_client.query(query).result_rows):
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|
||||||
link = f"https://t.me/{chat_username}/{telegram_id}"
|
link = f"https://t.me/c/{chat_username}/{telegram_id}"
|
||||||
vacancy, created = Vacancy.objects.get_or_create(
|
vacancy, created = Vacancy.objects.get_or_create(
|
||||||
link=link,
|
link=link,
|
||||||
defaults={'content': message}
|
defaults={'content': message}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
metadata = {"link": link}
|
||||||
|
ids.append(vacancy.id)
|
||||||
|
documents.append(Document(page_content=message, metadata=metadata))
|
||||||
|
|
||||||
print(index, link)
|
print(index, link)
|
||||||
add_vacancy_vectors(vacancy.id, extract_vacancy_features(message).model_dump(), {"link": link})
|
|
||||||
|
vector_store.add_documents(documents, ids=ids)
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from vacancies.main.vector_store import search_similarities
|
import asyncio
|
||||||
|
from vacancies.main.vector_store import vector_store
|
||||||
from vacancies.main.models import CustomerCV, RecommendedVacancy
|
from vacancies.main.models import CustomerCV, RecommendedVacancy
|
||||||
from vacancies.main.bot import application
|
from vacancies.main.bot import application
|
||||||
from vacancies.main.features_extractor import extract_vacancy_features
|
|
||||||
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
|
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
|
||||||
from qdrant_client.models import Filter, HasIdCondition
|
from qdrant_client.models import Filter, HasIdCondition
|
||||||
|
|
||||||
@ -12,10 +12,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
customer_cvs = CustomerCV.objects.all()
|
customer_cvs = CustomerCV.objects.all()
|
||||||
|
|
||||||
for customer_cv in customer_cvs:
|
for customer_cv in customer_cvs:
|
||||||
features = extract_vacancy_features(customer_cv.content)
|
|
||||||
print(features)
|
|
||||||
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
|
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
|
||||||
customer=customer_cv.customer
|
customer=customer_cv.customer
|
||||||
).values_list('vacancy_id', flat=True)
|
).values_list('vacancy_id', flat=True)
|
||||||
@ -25,17 +22,22 @@ class Command(BaseCommand):
|
|||||||
HasIdCondition(has_id=recommended_vacancy_ids),
|
HasIdCondition(has_id=recommended_vacancy_ids),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
search_result_id = search_similarities(features.model_dump(), query_filter)
|
|
||||||
|
|
||||||
|
document = vector_store.similarity_search(
|
||||||
|
customer_cv.content,
|
||||||
|
k=1,
|
||||||
|
filter=query_filter,
|
||||||
|
)[0]
|
||||||
|
print(document.__dict__)
|
||||||
recommendation, _ = RecommendedVacancy.objects.get_or_create(
|
recommendation, _ = RecommendedVacancy.objects.get_or_create(
|
||||||
customer=customer_cv.customer,
|
customer=customer_cv.customer,
|
||||||
vacancy_id=search_result_id,
|
vacancy_id=document.metadata["_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
application.bot.send_message(
|
asyncio.run(application.bot.send_message(
|
||||||
chat_id=recommendation.customer.chat_id,
|
chat_id=recommendation.customer.chat_id,
|
||||||
text=recommendation.vacancy.content,
|
text=recommendation.vacancy.content,
|
||||||
reply_markup=InlineKeyboardMarkup([[
|
reply_markup=InlineKeyboardMarkup([[
|
||||||
InlineKeyboardButton("Откликнуться", url=recommendation.vacancy.link),
|
InlineKeyboardButton("Откликнуться", url=recommendation.vacancy.link),
|
||||||
]]),
|
]]),
|
||||||
)
|
))
|
||||||
|
|||||||
@ -1,20 +0,0 @@
|
|||||||
from django.core.management import BaseCommand
|
|
||||||
from vacancies.main.models import Vacancy
|
|
||||||
from vacancies.main.features_extractor import extract_vacancy_features
|
|
||||||
from vacancies.main.vector_store import add_vacancy_vectors
|
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
|
||||||
help = "Process vacancies: extract features and index in vector store"
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
|
||||||
vacancies = Vacancy.objects.filter(is_processed=False)
|
|
||||||
|
|
||||||
len_vacancies = len(vacancies)
|
|
||||||
for index, vacancy in enumerate(vacancies):
|
|
||||||
print(f"Processing {index}/{len_vacancies} {vacancy}")
|
|
||||||
features = extract_vacancy_features(vacancy.content)
|
|
||||||
vacancy.features_json = features.model_dump()
|
|
||||||
vacancy.is_processed = True
|
|
||||||
vacancy.save()
|
|
||||||
add_vacancy_vectors(vacancy.id, features.model_dump(), {"link": vacancy.link})
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
# Generated by Django 5.2.7 on 2025-10-26 11:36
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
('main', '0003_remove_recommendedvacancy_is_shown_and_more'),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='vacancy',
|
|
||||||
name='features_json',
|
|
||||||
field=models.JSONField(blank=True, null=True),
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name='vacancy',
|
|
||||||
name='is_processed',
|
|
||||||
field=models.BooleanField(default=False),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
@ -1,5 +1,4 @@
|
|||||||
from django.db import models
|
from django.db import models
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class Customer(models.Model):
|
class Customer(models.Model):
|
||||||
@ -34,8 +33,6 @@ class CustomerCV(models.Model):
|
|||||||
class Vacancy(models.Model):
|
class Vacancy(models.Model):
|
||||||
content = models.TextField()
|
content = models.TextField()
|
||||||
link = models.URLField(unique=True)
|
link = models.URLField(unique=True)
|
||||||
features_json = models.JSONField(null=True, blank=True)
|
|
||||||
is_processed = models.BooleanField(default=False)
|
|
||||||
created_at = models.DateTimeField(auto_now_add=True)
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
objects = models.Manager()
|
objects = models.Manager()
|
||||||
@ -61,18 +58,3 @@ class RecommendedVacancy(models.Model):
|
|||||||
class Meta:
|
class Meta:
|
||||||
verbose_name_plural = 'Recommended Vacancies'
|
verbose_name_plural = 'Recommended Vacancies'
|
||||||
db_table = "recommended_vacancies"
|
db_table = "recommended_vacancies"
|
||||||
|
|
||||||
|
|
||||||
class VacancyFeatures(BaseModel):
|
|
||||||
employment_type: str | None = None # Тип занятости
|
|
||||||
work_format: str | None = None # Формат работы
|
|
||||||
experience: str | None = None # Опыт работы
|
|
||||||
position_level: str | None = None # Уровень позиции
|
|
||||||
industry: str | None = None # Отрасль / Сфера деятельности
|
|
||||||
tech_stack: list[str] | None = None # Технологический стек / Ключевые навыки
|
|
||||||
location: str | None = None # География
|
|
||||||
salary_range: str | None = None # Зарплатные ожидания / вилка
|
|
||||||
languages: list[str] | None = None # Языки
|
|
||||||
education: str | None = None # Образование
|
|
||||||
schedule: str | None = None # График работы
|
|
||||||
additional_requirements: list[str] | None = None # Дополнительные предпочтения / требования
|
|
||||||
|
|||||||
@ -1,97 +1,18 @@
|
|||||||
from qdrant_client import models
|
from qdrant_client.models import Distance, VectorParams
|
||||||
|
from langchain_qdrant import QdrantVectorStore
|
||||||
from langchain_openai import OpenAIEmbeddings
|
from langchain_openai import OpenAIEmbeddings
|
||||||
from qdrant_client import QdrantClient
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.models import Filter
|
|
||||||
|
|
||||||
# client = QdrantClient(path="./embeddings")
|
client = QdrantClient(path="./embeddings")
|
||||||
client = QdrantClient(url="http://localhost:6333")
|
|
||||||
|
|
||||||
FEATURE_NAMES = [
|
if not client.collection_exists("vacancies"):
|
||||||
"employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
|
||||||
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
|
|
||||||
]
|
|
||||||
|
|
||||||
vectors_config = {
|
|
||||||
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
|
||||||
}
|
|
||||||
|
|
||||||
collection_name = "vacancies"
|
|
||||||
if not client.collection_exists(collection_name):
|
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
collection_name=collection_name,
|
collection_name="vacancies",
|
||||||
vectors_config=vectors_config
|
vectors_config=VectorParams(size=3072, distance=Distance.COSINE)
|
||||||
)
|
)
|
||||||
|
|
||||||
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
|
vector_store = QdrantVectorStore(
|
||||||
|
client=client,
|
||||||
def _prepare_texts(features):
|
collection_name="vacancies",
|
||||||
"""Prepare texts for each feature from features dict."""
|
embedding=OpenAIEmbeddings(model="text-embedding-3-large"),
|
||||||
texts = {}
|
|
||||||
for name in FEATURE_NAMES:
|
|
||||||
value = features.get(name)
|
|
||||||
if isinstance(value, list):
|
|
||||||
text = " ".join(value) if value else ""
|
|
||||||
else:
|
|
||||||
text = str(value) if value else ""
|
|
||||||
texts[name] = text
|
|
||||||
return texts
|
|
||||||
|
|
||||||
|
|
||||||
def add_vacancy_vectors(vacancy_id: int, features: dict, payload: dict):
|
|
||||||
"""Add vectors for a vacancy based on its features."""
|
|
||||||
texts = _prepare_texts(features)
|
|
||||||
vectors = {}
|
|
||||||
for name, text in texts.items():
|
|
||||||
vectors[name] = [0.0] * 3072
|
|
||||||
if text:
|
|
||||||
vec = embedding.embed_query(text)
|
|
||||||
vectors[name] = vec
|
|
||||||
client.upsert(
|
|
||||||
collection_name=collection_name,
|
|
||||||
points=[
|
|
||||||
models.PointStruct(
|
|
||||||
id=vacancy_id,
|
|
||||||
vector=vectors,
|
|
||||||
payload=payload,
|
|
||||||
)
|
)
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def search_similarities(query_features: dict, query_filter: Filter) -> list[dict]:
|
|
||||||
"""Search vacancies using sum of max similarities.
|
|
||||||
For each feature, compute similarities, then for each vacancy, take max per feature, sum.
|
|
||||||
Return top vacancies.
|
|
||||||
"""
|
|
||||||
texts = _prepare_texts(query_features)
|
|
||||||
vectors = {}
|
|
||||||
for name, text in texts.items():
|
|
||||||
vectors[name] = [0.0] * 3072
|
|
||||||
if text:
|
|
||||||
vec = embedding.embed_query(text)
|
|
||||||
vectors[name] = vec
|
|
||||||
|
|
||||||
max_similarities = {}
|
|
||||||
for name, vec in vectors.items():
|
|
||||||
if any(v != 0 for v in vec):
|
|
||||||
results = client.search(
|
|
||||||
collection_name=collection_name,
|
|
||||||
query_vector=(name, vec),
|
|
||||||
limit=1000,
|
|
||||||
with_payload=True,
|
|
||||||
query_filter=query_filter,
|
|
||||||
)
|
|
||||||
for res in results:
|
|
||||||
vid = res.id
|
|
||||||
sim = res.score
|
|
||||||
if vid not in max_similarities:
|
|
||||||
max_similarities[vid] = {}
|
|
||||||
max_similarities[vid][name] = sim
|
|
||||||
|
|
||||||
scored = []
|
|
||||||
for vid, feature_sims in max_similarities.items():
|
|
||||||
total = sum(feature_sims.values())
|
|
||||||
scored.append({"id": vid, "score": total})
|
|
||||||
|
|
||||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
|
||||||
return scored[0]["id"]
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user