Add existing vacancies filtering
This commit is contained in:
parent
618a102c98
commit
9cf4fa3083
@ -1,11 +1,27 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from vacancies.main.models import Vacancy
|
from vacancies.main.models import Vacancy
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features
|
from vacancies.main.vector_store import add_vectors, extract_features, client as qdrant
|
||||||
|
|
||||||
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
|
||||||
|
|
||||||
query = """
|
next_page_offset = 0
|
||||||
|
exist_points_ids = []
|
||||||
|
while next_page_offset is not None:
|
||||||
|
response = qdrant.scroll(
|
||||||
|
collection_name="vacancies",
|
||||||
|
limit=100_000,
|
||||||
|
offset=next_page_offset,
|
||||||
|
with_payload=False,
|
||||||
|
with_vectors=False,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
exist_points_ids.extend([point.id for point in response[0]])
|
||||||
|
next_page_offset = response[1]
|
||||||
|
exist_points_set = tuple(exist_points_ids)
|
||||||
|
print("qdrant vacancies points count:", len(exist_points_set))
|
||||||
|
|
||||||
|
query = f"""
|
||||||
SELECT id, chat_username, telegram_id, message, timestamp
|
SELECT id, chat_username, telegram_id, message, timestamp
|
||||||
FROM telegram_parser_chatmessage
|
FROM telegram_parser_chatmessage
|
||||||
WHERE timestamp >= now() - INTERVAL 30 DAY
|
WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||||
@ -17,6 +33,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
|||||||
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
||||||
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
||||||
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
|
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
|
||||||
|
AND id NOT IN {exist_points_set}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user