Compare commits
3 Commits
09bf0ea580
...
d4b28b8e9f
| Author | SHA1 | Date | |
|---|---|---|---|
| d4b28b8e9f | |||
| 9da00d5d1d | |||
| f8ca003942 |
24
compose.yaml
24
compose.yaml
@ -1,9 +1,4 @@
|
|||||||
services:
|
services:
|
||||||
ofelia:
|
|
||||||
image: mcuadros/ofelia:latest
|
|
||||||
command: daemon --docker -f label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}
|
|
||||||
volumes:
|
|
||||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
||||||
qdrant:
|
qdrant:
|
||||||
image: qdrant/qdrant:latest
|
image: qdrant/qdrant:latest
|
||||||
restart: always
|
restart: always
|
||||||
@ -21,22 +16,3 @@ services:
|
|||||||
- "127.0.0.1:5432:5432"
|
- "127.0.0.1:5432:5432"
|
||||||
volumes:
|
volumes:
|
||||||
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
||||||
bot:
|
|
||||||
image: vision-career:latest
|
|
||||||
build: .
|
|
||||||
command: [".venv/bin/python", "manage.py", "runbot"]
|
|
||||||
restart: always
|
|
||||||
init: true
|
|
||||||
network_mode: host
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
labels:
|
|
||||||
ofelia.enabled: "true"
|
|
||||||
ofelia.job-exec.collect-vacancies-from-telegram-messages.schedule: "@every 1m"
|
|
||||||
ofelia.job-exec.collect-vacancies-from-telegram-messages.command: ".venv/bin/python manage.py collect_vacancies_from_telegram_messages"
|
|
||||||
ofelia.job-exec.generate-recommended-vacancies.schedule: "@every 1m"
|
|
||||||
ofelia.job-exec.generate-recommended-vacancies.command: ".venv/bin/python manage.py generate_recommended_vacancies"
|
|
||||||
develop:
|
|
||||||
watch:
|
|
||||||
- action: rebuild
|
|
||||||
path: .
|
|
||||||
|
|||||||
@ -1,3 +1,7 @@
|
|||||||
|
import traceback
|
||||||
|
from itertools import batched
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
||||||
@ -21,7 +25,6 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
|||||||
AND arrayCount(x -> position(lower(message), x) > 0, [
|
AND arrayCount(x -> position(lower(message), x) > 0, [
|
||||||
'о себе', 'обо мне', 'умею', '#ищу'
|
'о себе', 'обо мне', 'умею', '#ищу'
|
||||||
]) = 0
|
]) = 0
|
||||||
AND position(message, 'Руководитель/тимлид группы ИБ АСУ ТП') > 0
|
|
||||||
AND id NOT IN %(exist_points)s
|
AND id NOT IN %(exist_points)s
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -46,12 +49,17 @@ class Command(BaseCommand):
|
|||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
result_rows_len = len(result_rows)
|
batch_size = 10
|
||||||
|
with ThreadPoolExecutor(max_workers=batch_size) as pool:
|
||||||
|
pool.map(self._process_batch, batched(result_rows, batch_size))
|
||||||
|
|
||||||
|
def _process_batch(self, result_rows):
|
||||||
|
try:
|
||||||
for index, row in enumerate(result_rows):
|
for index, row in enumerate(result_rows):
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|
||||||
link = f"https://t.me/{chat_username}/{telegram_id}"
|
link = f"https://t.me/{chat_username}/{telegram_id}"
|
||||||
print(f"Processing {index+1}/{result_rows_len} link: {link}")
|
print(f"Processing {index+1}/{len(result_rows)} link: {link}")
|
||||||
features = extract_features(message)
|
features = extract_features(message)
|
||||||
|
|
||||||
add_vectors(
|
add_vectors(
|
||||||
@ -60,3 +68,5 @@ class Command(BaseCommand):
|
|||||||
features.model_dump(),
|
features.model_dump(),
|
||||||
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
||||||
)
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
traceback.print_exception(exc)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user