Add existing vacancies filtering
This commit is contained in:
parent
618a102c98
commit
9cf4fa3083
@ -1,11 +1,27 @@
|
||||
from django.core.management import BaseCommand
|
||||
from vacancies.main.models import Vacancy
|
||||
import clickhouse_connect
|
||||
from vacancies.main.vector_store import add_vectors, extract_features
|
||||
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
||||
|
||||
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
||||
|
||||
query = """
|
||||
next_page_offset = 0
|
||||
exist_points_ids = []
|
||||
while next_page_offset is not None:
|
||||
response = qdrant.scroll(
|
||||
collection_name="vacancies",
|
||||
limit=100_000,
|
||||
offset=next_page_offset,
|
||||
with_payload=False,
|
||||
with_vectors=False,
|
||||
timeout=30,
|
||||
)
|
||||
exist_points_ids.extend([point.id for point in response[0]])
|
||||
next_page_offset = response[1]
|
||||
exist_points_set = tuple(exist_points_ids)
|
||||
print("qdrant vacancies points count:", len(exist_points_set))
|
||||
|
||||
query = f"""
|
||||
SELECT id, chat_username, telegram_id, message, timestamp
|
||||
FROM telegram_parser_chatmessage
|
||||
WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||
@ -17,6 +33,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
||||
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
||||
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
|
||||
AND id NOT IN {exist_points_set}
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user