vision-career/vacancies/main/vector_store.py
estromenko ffffa973a6
All checks were successful
release / docker (push) Successful in 55s
Add button to force send next vacancy
2025-11-03 18:38:11 +03:00

209 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from qdrant_client import models
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from qdrant_client import QdrantClient
from qdrant_client.models import Filter
from vacancies.main.models import VacancyFeatures
from vacancies.conf.settings import QDRANT_URL
from vacancies.main.models import CustomerCV, RecommendedVacancy
from qdrant_client.models import Filter, HasIdCondition
qdrant_client = QdrantClient(url=QDRANT_URL)
FEATURE_NAMES = [
"job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
]
weights = {
"job_title": 25,
"employment_type": 5,
"work_format": 5,
"experience": 8,
"position_level": 12,
"industry": 10,
"tech_stack": 14,
"location": 5,
"salary_range": 5,
"languages": 5,
"education": 2,
"schedule": 2,
"additional_requirements": 2,
}
vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
if not qdrant_client.collection_exists("vacancies"):
qdrant_client.create_collection(
collection_name="vacancies",
vectors_config=vectors_config
)
if not qdrant_client.collection_exists("cvs"):
qdrant_client.create_collection(
collection_name="cvs",
vectors_config=vectors_config
)
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
def _prepare_texts(features):
"""Prepare texts for each feature from features dict."""
texts = {}
for name in FEATURE_NAMES:
value = features.get(name)
if isinstance(value, list):
text = " ".join(value) if value else ""
else:
text = str(value) if value else ""
texts[name] = text
return texts
def add_vectors(collection_name: str, _id: int, features: dict, payload: dict):
"""Add vectors for a vacancy based on its features."""
texts = _prepare_texts(features)
vectors = {}
for name, text in texts.items():
vectors[name] = [0.0] * 3072
if text:
vec = embedding.embed_query(text)
vectors[name] = vec
max_similarities = {}
for name, vec in vectors.items():
if any(v != 0 for v in vec):
results = qdrant_client.query_points(
collection_name="vacancies",
query=vec,
using=name,
limit=100,
)
for res in results.points:
vid = res.id
sim = res.score
if vid not in max_similarities:
max_similarities[vid] = {}
max_similarities[vid][name] = sim
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True)
if scored and scored[0]["score"] > 98: # threshold
return
qdrant_client.upsert(
collection_name=collection_name,
points=[
models.PointStruct(
id=_id,
vector=vectors,
payload=payload,
)
]
)
def search_similarities(query_filter: Filter, cv_id: int):
cv = qdrant_client.retrieve(
collection_name="cvs",
ids=[cv_id],
with_vectors=True,
)[0]
max_similarities = {}
vacancies_content = {}
for name, vec in cv.vector.items():
if any(v != 0 for v in vec):
results = qdrant_client.query_points(
collection_name="vacancies",
query=vec,
using=name,
limit=100,
with_payload=True,
query_filter=query_filter,
)
for res in results.points:
vid = res.id
sim = res.score
if vid not in max_similarities:
max_similarities[vid] = {}
max_similarities[vid][name] = sim
if vid not in vacancies_content:
vacancies_content[vid] = {}
vacancies_content[vid]["content"] = res.payload["content"]
vacancies_content[vid]["link"] = res.payload["link"]
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total, "content": vacancies_content[vid]["content"], "link": vacancies_content[vid]["link"]})
scored.sort(key=lambda x: x["score"], reverse=True)
prompt = f"""
Резюме: {cv.payload['content']}
Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс(от 0 до 9).
Иногда могут попадаться чужие резюме вместо вакансий, их отдавать нельзя.
В ответе выведи только число. Если среди вакансий нет подходящих, то верни -1.
{scored[:10]}
"""
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
response = openai_client.invoke(prompt)
index = int(response.content)
if index == -1:
return None
return scored[index]["id"], scored[index]["content"], scored[index]["link"]
def extract_features(content: str) -> VacancyFeatures:
prompt = f"""
Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null.
Features:
- job_title: Должность (e.g., DevOps, Python программист)
- employment_type: Тип занятости (e.g., Полная занятость, Частичная)
- work_format: Формат работы (e.g., Офис, Удалённо, Гибрид)
- experience: Опыт работы (e.g., 3-5 лет, Нет опыта)
- position_level: Уровень позиции (e.g., Junior, Senior)
- industry: Отрасль / Сфера деятельности (e.g., IT, Финансы)
- tech_stack: Технологический стек / Ключевые навыки (list of strings)
- location: География (e.g., Москва, Россия)
- salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб)
- languages: Языки (list of strings, e.g., ["Русский", "Английский"])
- education: Образование (e.g., Высшее, Среднее специальное)
- schedule: График работы (e.g., Полный день, Сменный)
- additional_requirements: Дополнительные предпочтения / требования (list of strings)
Vacancy content:
{content}
"""
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
structured_llm = openai_client.with_structured_output(VacancyFeatures)
response = structured_llm.invoke(prompt)
return response
def get_next_vacancy(customer_cv):
recommended_vacancy_ids = RecommendedVacancy.objects.filter(
customer=customer_cv.customer,
).values_list('vacancy_id', flat=True)
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
result = search_similarities(query_filter, customer_cv.id)
if not result:
return None
search_result_id, vacancy_content, link = result
recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer,
vacancy_id=search_result_id,
)
return recommendation, vacancy_content, link