Compare commits
1 Commits
d4b28b8e9f
...
09bf0ea580
| Author | SHA1 | Date | |
|---|---|---|---|
| 09bf0ea580 |
24
compose.yaml
24
compose.yaml
@ -1,4 +1,9 @@
|
|||||||
services:
|
services:
|
||||||
|
ofelia:
|
||||||
|
image: mcuadros/ofelia:latest
|
||||||
|
command: daemon --docker -f label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
qdrant:
|
qdrant:
|
||||||
image: qdrant/qdrant:latest
|
image: qdrant/qdrant:latest
|
||||||
restart: always
|
restart: always
|
||||||
@ -16,3 +21,22 @@ services:
|
|||||||
- "127.0.0.1:5432:5432"
|
- "127.0.0.1:5432:5432"
|
||||||
volumes:
|
volumes:
|
||||||
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
||||||
|
bot:
|
||||||
|
image: vision-career:latest
|
||||||
|
build: .
|
||||||
|
command: [".venv/bin/python", "manage.py", "runbot"]
|
||||||
|
restart: always
|
||||||
|
init: true
|
||||||
|
network_mode: host
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
labels:
|
||||||
|
ofelia.enabled: "true"
|
||||||
|
ofelia.job-exec.collect-vacancies-from-telegram-messages.schedule: "@every 1m"
|
||||||
|
ofelia.job-exec.collect-vacancies-from-telegram-messages.command: ".venv/bin/python manage.py collect_vacancies_from_telegram_messages"
|
||||||
|
ofelia.job-exec.generate-recommended-vacancies.schedule: "@every 1m"
|
||||||
|
ofelia.job-exec.generate-recommended-vacancies.command: ".venv/bin/python manage.py generate_recommended_vacancies"
|
||||||
|
develop:
|
||||||
|
watch:
|
||||||
|
- action: rebuild
|
||||||
|
path: .
|
||||||
|
|||||||
@ -1,7 +1,3 @@
|
|||||||
import traceback
|
|
||||||
from itertools import batched
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
||||||
@ -25,6 +21,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
|||||||
AND arrayCount(x -> position(lower(message), x) > 0, [
|
AND arrayCount(x -> position(lower(message), x) > 0, [
|
||||||
'о себе', 'обо мне', 'умею', '#ищу'
|
'о себе', 'обо мне', 'умею', '#ищу'
|
||||||
]) = 0
|
]) = 0
|
||||||
|
AND position(message, 'Руководитель/тимлид группы ИБ АСУ ТП') > 0
|
||||||
AND id NOT IN %(exist_points)s
|
AND id NOT IN %(exist_points)s
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -49,24 +46,17 @@ class Command(BaseCommand):
|
|||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
batch_size = 10
|
result_rows_len = len(result_rows)
|
||||||
with ThreadPoolExecutor(max_workers=batch_size) as pool:
|
for index, row in enumerate(result_rows):
|
||||||
pool.map(self._process_batch, batched(result_rows, batch_size))
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|
||||||
def _process_batch(self, result_rows):
|
link = f"https://t.me/{chat_username}/{telegram_id}"
|
||||||
try:
|
print(f"Processing {index+1}/{result_rows_len} link: {link}")
|
||||||
for index, row in enumerate(result_rows):
|
features = extract_features(message)
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
|
||||||
|
|
||||||
link = f"https://t.me/{chat_username}/{telegram_id}"
|
add_vectors(
|
||||||
print(f"Processing {index+1}/{len(result_rows)} link: {link}")
|
"vacancies",
|
||||||
features = extract_features(message)
|
id,
|
||||||
|
features.model_dump(),
|
||||||
add_vectors(
|
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
||||||
"vacancies",
|
)
|
||||||
id,
|
|
||||||
features.model_dump(),
|
|
||||||
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
traceback.print_exception(exc)
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user