Compare commits
3 Commits
42395de6d3
...
cf9f19a216
| Author | SHA1 | Date | |
|---|---|---|---|
| cf9f19a216 | |||
| 9cf4fa3083 | |||
| 618a102c98 |
@ -1,11 +1,27 @@
|
||||
from django.core.management import BaseCommand
|
||||
from vacancies.main.models import Vacancy
|
||||
import clickhouse_connect
|
||||
from vacancies.main.vector_store import add_vectors, extract_features
|
||||
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
||||
|
||||
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
||||
|
||||
query = """
|
||||
next_page_offset = 0
|
||||
exist_points_ids = []
|
||||
while next_page_offset is not None:
|
||||
response = qdrant.scroll(
|
||||
collection_name="vacancies",
|
||||
limit=100_000,
|
||||
offset=next_page_offset,
|
||||
with_payload=False,
|
||||
with_vectors=False,
|
||||
timeout=30,
|
||||
)
|
||||
exist_points_ids.extend([point.id for point in response[0]])
|
||||
next_page_offset = response[1]
|
||||
exist_points_set = tuple(exist_points_ids)
|
||||
print("qdrant vacancies points count:", len(exist_points_set))
|
||||
|
||||
query = f"""
|
||||
SELECT id, chat_username, telegram_id, message, timestamp
|
||||
FROM telegram_parser_chatmessage
|
||||
WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||
@ -17,6 +33,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
||||
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
||||
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
|
||||
AND id NOT IN {exist_points_set}
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import asyncio
|
||||
from django.core.management import BaseCommand
|
||||
from vacancies.main.vector_store import search_similarities
|
||||
from vacancies.main.vector_store import search_similarities, client
|
||||
from vacancies.main.models import CustomerCV, RecommendedVacancy
|
||||
from vacancies.main.bot import application
|
||||
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
|
||||
@ -21,6 +21,11 @@ class Command(BaseCommand):
|
||||
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
|
||||
search_result_id = search_similarities(query_filter, customer_cv.id)
|
||||
|
||||
vacancy_content = client.retrieve(
|
||||
collection_name="vacancies",
|
||||
ids=[search_result_id],
|
||||
)[0].payload["content"]
|
||||
|
||||
recommendation = RecommendedVacancy.objects.create(
|
||||
customer=customer_cv.customer,
|
||||
vacancy_id=search_result_id,
|
||||
@ -28,7 +33,7 @@ class Command(BaseCommand):
|
||||
|
||||
asyncio.run(application.bot.send_message(
|
||||
chat_id=recommendation.customer.chat_id,
|
||||
text=recommendation.vacancy.content,
|
||||
text=vacancy_content,
|
||||
reply_markup=InlineKeyboardMarkup([[
|
||||
InlineKeyboardButton("Откликнуться", url=recommendation.vacancy.link),
|
||||
]]),
|
||||
|
||||
@ -5,14 +5,29 @@ from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Filter
|
||||
from vacancies.main.models import VacancyFeatures
|
||||
|
||||
client = QdrantClient(path="./embeddings")
|
||||
#client = QdrantClient(url="http://localhost:6333")
|
||||
# client = QdrantClient(path="./embeddings")
|
||||
client = QdrantClient(url="http://localhost:6333")
|
||||
|
||||
FEATURE_NAMES = [
|
||||
"employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
||||
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
|
||||
]
|
||||
|
||||
weights = {
|
||||
"employment_type": 2,
|
||||
"work_format": 2,
|
||||
"experience": 4,
|
||||
"position_level": 4,
|
||||
"industry": 4,
|
||||
"tech_stack": 5,
|
||||
"location": 2,
|
||||
"salary_range": 2,
|
||||
"languages": 2,
|
||||
"education": 1,
|
||||
"schedule": 1,
|
||||
"additional_requirements": 1,
|
||||
}
|
||||
|
||||
vectors_config = {
|
||||
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
||||
}
|
||||
@ -90,7 +105,7 @@ def search_similarities(query_filter: Filter, cv_id: int) -> list[dict]:
|
||||
|
||||
scored = []
|
||||
for vid, feature_sims in max_similarities.items():
|
||||
total = sum(feature_sims.values())
|
||||
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
|
||||
scored.append({"id": vid, "score": total})
|
||||
|
||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user