Compare commits
No commits in common. "cf9f19a216a92255ab312dc703e9a045316448f6" and "42395de6d339d6d490f4beddc2f747be63993f59" have entirely different histories.
cf9f19a216
...
42395de6d3
@ -1,27 +1,11 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from vacancies.main.models import Vacancy
|
from vacancies.main.models import Vacancy
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
from vacancies.main.vector_store import add_vectors, extract_features
|
||||||
|
|
||||||
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
||||||
|
|
||||||
next_page_offset = 0
|
query = """
|
||||||
exist_points_ids = []
|
|
||||||
while next_page_offset is not None:
|
|
||||||
response = qdrant.scroll(
|
|
||||||
collection_name="vacancies",
|
|
||||||
limit=100_000,
|
|
||||||
offset=next_page_offset,
|
|
||||||
with_payload=False,
|
|
||||||
with_vectors=False,
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
exist_points_ids.extend([point.id for point in response[0]])
|
|
||||||
next_page_offset = response[1]
|
|
||||||
exist_points_set = tuple(exist_points_ids)
|
|
||||||
print("qdrant vacancies points count:", len(exist_points_set))
|
|
||||||
|
|
||||||
query = f"""
|
|
||||||
SELECT id, chat_username, telegram_id, message, timestamp
|
SELECT id, chat_username, telegram_id, message, timestamp
|
||||||
FROM telegram_parser_chatmessage
|
FROM telegram_parser_chatmessage
|
||||||
WHERE timestamp >= now() - INTERVAL 30 DAY
|
WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||||
@ -33,7 +17,6 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
|||||||
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
||||||
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
||||||
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
|
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
|
||||||
AND id NOT IN {exist_points_set}
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from vacancies.main.vector_store import search_similarities, client
|
from vacancies.main.vector_store import search_similarities
|
||||||
from vacancies.main.models import CustomerCV, RecommendedVacancy
|
from vacancies.main.models import CustomerCV, RecommendedVacancy
|
||||||
from vacancies.main.bot import application
|
from vacancies.main.bot import application
|
||||||
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
|
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
|
||||||
@ -21,11 +21,6 @@ class Command(BaseCommand):
|
|||||||
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
|
query_filter = Filter(must_not=[HasIdCondition(has_id=recommended_vacancy_ids)])
|
||||||
search_result_id = search_similarities(query_filter, customer_cv.id)
|
search_result_id = search_similarities(query_filter, customer_cv.id)
|
||||||
|
|
||||||
vacancy_content = client.retrieve(
|
|
||||||
collection_name="vacancies",
|
|
||||||
ids=[search_result_id],
|
|
||||||
)[0].payload["content"]
|
|
||||||
|
|
||||||
recommendation = RecommendedVacancy.objects.create(
|
recommendation = RecommendedVacancy.objects.create(
|
||||||
customer=customer_cv.customer,
|
customer=customer_cv.customer,
|
||||||
vacancy_id=search_result_id,
|
vacancy_id=search_result_id,
|
||||||
@ -33,7 +28,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
asyncio.run(application.bot.send_message(
|
asyncio.run(application.bot.send_message(
|
||||||
chat_id=recommendation.customer.chat_id,
|
chat_id=recommendation.customer.chat_id,
|
||||||
text=vacancy_content,
|
text=recommendation.vacancy.content,
|
||||||
reply_markup=InlineKeyboardMarkup([[
|
reply_markup=InlineKeyboardMarkup([[
|
||||||
InlineKeyboardButton("Откликнуться", url=recommendation.vacancy.link),
|
InlineKeyboardButton("Откликнуться", url=recommendation.vacancy.link),
|
||||||
]]),
|
]]),
|
||||||
|
|||||||
@ -5,29 +5,14 @@ from qdrant_client import QdrantClient
|
|||||||
from qdrant_client.models import Filter
|
from qdrant_client.models import Filter
|
||||||
from vacancies.main.models import VacancyFeatures
|
from vacancies.main.models import VacancyFeatures
|
||||||
|
|
||||||
# client = QdrantClient(path="./embeddings")
|
client = QdrantClient(path="./embeddings")
|
||||||
client = QdrantClient(url="http://localhost:6333")
|
#client = QdrantClient(url="http://localhost:6333")
|
||||||
|
|
||||||
FEATURE_NAMES = [
|
FEATURE_NAMES = [
|
||||||
"employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
"employment_type", "work_format", "experience", "position_level", "industry", "tech_stack",
|
||||||
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
|
"location", "salary_range", "languages", "education", "schedule", "additional_requirements"
|
||||||
]
|
]
|
||||||
|
|
||||||
weights = {
|
|
||||||
"employment_type": 2,
|
|
||||||
"work_format": 2,
|
|
||||||
"experience": 4,
|
|
||||||
"position_level": 4,
|
|
||||||
"industry": 4,
|
|
||||||
"tech_stack": 5,
|
|
||||||
"location": 2,
|
|
||||||
"salary_range": 2,
|
|
||||||
"languages": 2,
|
|
||||||
"education": 1,
|
|
||||||
"schedule": 1,
|
|
||||||
"additional_requirements": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
vectors_config = {
|
vectors_config = {
|
||||||
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
name: models.VectorParams(size=3072, distance=models.Distance.COSINE) for name in FEATURE_NAMES
|
||||||
}
|
}
|
||||||
@ -105,7 +90,7 @@ def search_similarities(query_filter: Filter, cv_id: int) -> list[dict]:
|
|||||||
|
|
||||||
scored = []
|
scored = []
|
||||||
for vid, feature_sims in max_similarities.items():
|
for vid, feature_sims in max_similarities.items():
|
||||||
total = sum(feature_sims[feature] * weights.get(feature, 1) for feature in feature_sims)
|
total = sum(feature_sims.values())
|
||||||
scored.append({"id": vid, "score": total})
|
scored.append({"id": vid, "score": total})
|
||||||
|
|
||||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user