from langchain_openai import ChatOpenAI, OpenAIEmbeddings from qdrant_client import QdrantClient, models from qdrant_client.models import Filter, HasIdCondition from vacancies.conf.settings import QDRANT_URL from vacancies.main.models import RecommendedVacancy, VacancyFeatures qdrant_client = QdrantClient(url=QDRANT_URL) FEATURE_NAMES = [ "job_title", "employment_type", "work_format", "experience", "position_level", "industry", "tech_stack", "location", "salary_range", "languages", "education", "schedule", "additional_requirements" ] weights = { "job_title": 70, "tech_stack": 10, "salary_range": 10, } vectors_config = { name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES } if not qdrant_client.collection_exists("vacancies"): qdrant_client.create_collection( collection_name="vacancies", vectors_config=vectors_config, ) qdrant_client.create_payload_index( collection_name="vacancies", field_name="timestamp", field_schema="datetime", ) if not qdrant_client.collection_exists("cvs"): qdrant_client.create_collection( collection_name="cvs", vectors_config=vectors_config, ) embedding = OpenAIEmbeddings(model="text-embedding-3-large") def _prepare_texts(features): texts = {} for name in FEATURE_NAMES: value = features.get(name) if isinstance(value, list): text = " ".join(value) if value else "" else: text = str(value) if value else "" texts[name] = text return texts def embed_features(features): features = {key: value for key, value in features.items() if value} features_texts = _prepare_texts(features) names, texts = features_texts.keys(), features_texts.values() vectors = dict(zip(names, embedding.embed_documents(texts))) return vectors def add_vectors(collection_name: str, _id: int, features: dict, payload: dict, vectors): max_similarities = {} for name, vec in vectors.items(): results = qdrant_client.query_points(collection_name="vacancies", query=vec, using=name, limit=100) for res in results.points: max_similarities.setdefault(res.id, {}) max_similarities[res.id][name] = res.score scored = [] for vid, feature_sims in max_similarities.items(): total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims) scored.append({"id": vid, "score": total}) scored.sort(key=lambda x: x["score"], reverse=True) if scored and scored[0]["score"] > 80: # threshold return qdrant_client.upsert( collection_name=collection_name, points=[models.PointStruct(id=_id, vector=vectors, payload=payload)] ) def search_similarities(query_filter: Filter, cv_id: int): cv = qdrant_client.retrieve(collection_name="cvs", ids=[cv_id], with_vectors=True)[0] max_similarities, vacancies_content = {}, {} for name, vec in cv.vector.items(): results = qdrant_client.query_points( collection_name="vacancies", query=vec, using=name, limit=100000, with_payload=True, query_filter=query_filter, ) for res in results.points: max_similarities.setdefault(res.id, {}) vacancies_content.setdefault(res.id, {}) max_similarities[res.id][name] = res.score vacancies_content[res.id]["content"] = res.payload["content"] vacancies_content[res.id]["features_json"] = res.payload["features_json"] vacancies_content[res.id]["link"] = res.payload["link"] scored = [] for vid, feature_sims in max_similarities.items(): total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims) scored.append({ "id": vid, "score": total, "content": vacancies_content[vid]["content"], "features_json": vacancies_content[vid]["features_json"], "link": vacancies_content[vid]["link"], "sims": feature_sims, }) scored.sort(key=lambda x: x["score"], reverse=True) return scored[0]["id"], scored[0]["content"], scored[0]["link"] def batch_extract_features(contents: list[str]) -> list[VacancyFeatures]: prompts = [ f""" Extract the following features from the job vacancy description. If a feature is not mentioned, set it to null. Features: - job_title: Должность (e.g., DevOps, Python программист) - employment_type: Тип занятости (e.g., Полная занятость, Частичная) - work_format: Формат работы (e.g., Офис, Удалённо, Гибрид) - experience: Опыт работы (e.g., 3-5 лет, Нет опыта) - position_level: Уровень позиции (e.g., Junior, Senior) - industry: Отрасль / Сфера деятельности (e.g., IT, Финансы) - tech_stack: Технологический стек / Ключевые навыки (list of strings) - location: География (e.g., Москва, Россия) - salary_range: Зарплатные ожидания / вилка (e.g., 100000-200000 руб) - languages: Языки (list of strings, e.g., ["Русский", "Английский"]) - education: Образование (e.g., Высшее, Среднее специальное) - schedule: График работы (e.g., Полный день, Сменный) - additional_requirements: Дополнительные предпочтения / требования (list of strings) Vacancy content: {content} """ for content in contents ] openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1) structured_llm = openai_client.with_structured_output(VacancyFeatures) response = structured_llm.batch(prompts) return response def get_next_vacancy(customer_cv): recommended_vacancy_ids = RecommendedVacancy.objects.filter( customer=customer_cv.customer, ).values_list('vacancy_id', flat=True) query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)]) result = search_similarities(query_filter, customer_cv.id) if not result: return None search_result_id, vacancy_content, link = result recommendation = RecommendedVacancy.objects.create( customer=customer_cv.customer, vacancy_id=search_result_id, ) return recommendation, vacancy_content, link