Compare commits

..

3 Commits

Author SHA1 Message Date
cf9f19a216 Fix job content sending 2025-10-29 01:01:09 +03:00
9cf4fa3083 Add existing vacancies filtering 2025-10-28 22:53:29 +03:00
618a102c98 Add vacancy search weights 2025-10-28 22:52:29 +03:00
3 changed files with 44 additions and 7 deletions

View File

@ -1,11 +1,27 @@
from django.core.management import BaseCommand
from vacancies.main.models import Vacancy
import clickhouse_connect
from vacancies.main.vector_store import add_vectors, extract_features
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
query = """
next_page_offset = 0
exist_points_ids = []
while next_page_offset is not None:
response = qdrant.scroll(
collection_name="vacancies",
limit=100_000,
offset=next_page_offset,
with_payload=False,
with_vectors=False,
timeout=30,
)
exist_points_ids.extend([point.id for point in response[0]])
next_page_offset = response[1]
exist_points_set = tuple(exist_points_ids)
print("qdrant vacancies points count:", len(exist_points_set))
query = f"""
SELECT id, chat_username, telegram_id, message, timestamp
FROM telegram_parser_chatmessage
WHERE timestamp >= now() - INTERVAL 30 DAY
@ -17,6 +33,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
'резюме', 'собеседование', 'junior', 'middle', 'senior'
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
AND id NOT IN {exist_points_set}
"""

View File

@ -1,6 +1,6 @@
import asyncio
from django.core.management import BaseCommand
from vacancies.main.vector_store import search_similarities
from vacancies.main.vector_store import search_similarities, client
from vacancies.main.models import CustomerCV, RecommendedVacancy
from vacancies.main.bot import application
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
@ -21,6 +21,11 @@ class Command(BaseCommand):
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
search_result_id = search_similarities(query_filter, customer_cv.id)
vacancy_content = client.retrieve(
collection_name="vacancies",
ids=[search_result_id],
)[0].payload["content"]
recommendation = RecommendedVacancy.objects.create(
customer=customer_cv.customer,
vacancy_id=search_result_id,
@ -28,7 +33,7 @@ class Command(BaseCommand):
asyncio.run(application.bot.send_message(
chat_id=recommendation.customer.chat_id,
text=recommendation.vacancy.content,
text=vacancy_content,
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=recommendation.vacancy.link),
]]),

View File

@ -5,14 +5,29 @@ from qdrant_client import QdrantClient
from qdrant_client.models import Filter
from vacancies.main.models import VacancyFeatures
client = QdrantClient(path="./embeddings")
#client = QdrantClient(url="http://localhost:6333")
# client = QdrantClient(path="./embeddings")
client = QdrantClient(url="http://localhost:6333")
FEATURE_NAMES = [
"employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
]
weights = {
"employment_type": 2,
"work_format": 2,
"experience": 4,
"position_level": 4,
"industry": 4,
"tech_stack": 5,
"location": 2,
"salary_range": 2,
"languages": 2,
"education": 1,
"schedule": 1,
"additional_requirements": 1,
}
vectors_config = {
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
}
@ -90,7 +105,7 @@ def search_similarities(query_filter: Filter, cv_id: int) -> list[dict]:
scored = []
for vid, feature_sims in max_similarities.items():
total = sum(feature_sims.values())
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
scored.append({"id": vid, "score": total})
scored.sort(key=lambda x: x["score"], reverse=True)