Add existing vacancies filtering

This commit is contained in:
estromenko 2025-10-28 22:53:29 +03:00
parent 618a102c98
commit 9cf4fa3083

View File

@ -1,11 +1,27 @@
from django.core.management import BaseCommand
from vacancies.main.models import Vacancy
import clickhouse_connect
from vacancies.main.vector_store import add_vectors, extract_features
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
query = """
next_page_offset = 0
exist_points_ids = []
while next_page_offset is not None:
response = qdrant.scroll(
collection_name="vacancies",
limit=100_000,
offset=next_page_offset,
with_payload=False,
with_vectors=False,
timeout=30,
)
exist_points_ids.extend([point.id for point in response[0]])
next_page_offset = response[1]
exist_points_set = tuple(exist_points_ids)
print("qdrant vacancies points count:", len(exist_points_set))
query = f"""
SELECT id, chat_username, telegram_id, message, timestamp
FROM telegram_parser_chatmessage
WHERE timestamp >= now() - INTERVAL 30 DAY
@ -17,6 +33,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
'резюме', 'собеседование', 'junior', 'middle', 'senior'
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
AND id NOT IN {exist_points_set}
"""