This commit is contained in:
parent
9da00d5d1d
commit
d4b28b8e9f
@ -1,3 +1,7 @@
|
|||||||
|
import traceback
|
||||||
|
from itertools import batched
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
||||||
@ -45,17 +49,24 @@ class Command(BaseCommand):
|
|||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
result_rows_len = len(result_rows)
|
batch_size = 10
|
||||||
for index, row in enumerate(result_rows):
|
with ThreadPoolExecutor(max_workers=batch_size) as pool:
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
pool.map(self._process_batch, batched(result_rows, batch_size))
|
||||||
|
|
||||||
link = f"https://t.me/{chat_username}/{telegram_id}"
|
def _process_batch(self, result_rows):
|
||||||
print(f"Processing {index+1}/{result_rows_len} link: {link}")
|
try:
|
||||||
features = extract_features(message)
|
for index, row in enumerate(result_rows):
|
||||||
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|
||||||
add_vectors(
|
link = f"https://t.me/{chat_username}/{telegram_id}"
|
||||||
"vacancies",
|
print(f"Processing {index+1}/{len(result_rows)} link: {link}")
|
||||||
id,
|
features = extract_features(message)
|
||||||
features.model_dump(),
|
|
||||||
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
add_vectors(
|
||||||
)
|
"vacancies",
|
||||||
|
id,
|
||||||
|
features.model_dump(),
|
||||||
|
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
traceback.print_exception(exc)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user