from qdrant_client import models from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI from qdrant_client import QdrantClient from qdrant_client.models import Filter from vacancies.main.models import VacancyFeatures # client = QdrantClient(path="./embeddings") client = QdrantClient(url="http://localhost:6333") FEATURE_NAMES = [ "job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack", "location", "salary_range", "languages", "education", "schedule", "additional_requirements" ] weights = { "job_title": 10, "employment_type": 2, "work_format": 2, "experience": 3, "position_level": 5, "industry": 4, "tech_stack": 5, "location": 2, "salary_range": 2, "languages": 2, "education": 1, "schedule": 1, "additional_requirements": 1, } vectors_config = { name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES } if not client.collection_exists("vacancies"): client.create_collection( collection_name="vacancies", vectors_config=vectors_config ) if not client.collection_exists("cvs"): client.create_collection( collection_name="cvs", vectors_config=vectors_config ) embedding = OpenAIEmbeddings(model="text-embedding-3-large") def _prepare_texts(features): """Prepare texts for each feature from features dict.""" texts = {} for name in FEATURE_NAMES: value = features.get(name) if isinstance(value, list): text = " ".join(value) if value else "" else: text = str(value) if value else "" texts[name] = text return texts def add_vectors(collection_name: str, _id: int, features: dict, payload: dict): """Add vectors for a vacancy based on its features.""" texts = _prepare_texts(features) vectors = {} for name, text in texts.items(): vectors[name] = [0.0] * 3072 if text: vec = embedding.embed_query(text) vectors[name] = vec max_similarities = {} for name, vec in vectors.items(): if any(v != 0 for v in vec): results = client.query_points( collection_name="vacancies", query=vec, using=name, limit=1000, ) for res in results.points: vid = res.id sim = res.score if vid not in max_similarities: max_similarities[vid] = {} max_similarities[vid][name] = sim scored = [] for vid, feature_sims in max_similarities.items(): total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims) scored.append({"id": vid, "score": total}) scored.sort(key=lambda x: x["score"], reverse=True) if scored and scored[0]["score"] > 33: # threshold return client.upsert( collection_name=collection_name, points=[ models.PointStruct( id=_id, vector=vectors, payload=payload, ) ] ) def search_similarities(query_filter: Filter, cv_id: int): vectors = client.retrieve( collection_name="cvs", ids=[cv_id], with_vectors=True, )[0].vector max_similarities = {} vacancies_content = {} for name, vec in vectors.items(): if any(v != 0 for v in vec): results = client.query_points( collection_name="vacancies", query=vec, using=name, limit=1000, with_payload=True, query_filter=query_filter, ) for res in results.points: vid = res.id sim = res.score if vid not in max_similarities: max_similarities[vid] = {} max_similarities[vid][name] = sim if vid not in vacancies_content: vacancies_content[vid] = {} vacancies_content[vid]["content"] = res.payload["content"] vacancies_content[vid]["link"] = res.payload["link"] scored = [] for vid, feature_sims in max_similarities.items(): total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims) scored.append({"id": vid, "score": total, "content": vacancies_content[vid]["content"], "link": vacancies_content[vid]["link"]}) prompt = f""" Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс. Если среди вакансий нет подходящих, то верни -1. В ответе выведи только число. {scored} """ openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1) response = openai_client.invoke(prompt) index = int(response.content) if index == -1: return None return scored[index]["id"], scored[index]["content"], scored[index]["link"] def extract_features(content: str) -> VacancyFeatures: prompt = f""" Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null. Features: - job_title: Должность (e.g., DevOps, Python программист) - employment_type: Тип занятости (e.g., Полная занятость, Частичная) - work_format: Формат работы (e.g., Офис, Удалённо, Гибрид) - experience: Опыт работы (e.g., 3-5 лет, Нет опыта) - position_level: Уровень позиции (e.g., Junior, Senior) - industry: Отрасль / Сфера деятельности (e.g., IT, Финансы) - tech_stack: Технологический стек / Ключевые навыки (list of strings) - location: География (e.g., Москва, Россия) - salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб) - languages: Языки (list of strings, e.g., ["Русский", "Английский"]) - education: Образование (e.g., Высшее, Среднее специальное) - schedule: График работы (e.g., Полный день, Сменный) - additional_requirements: Дополнительные предпочтения / требования (list of strings) Vacancy content: {content} """ openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1) structured_llm = openai_client.with_structured_output(VacancyFeatures) response = structured_llm.invoke(prompt) return response