Compare commits
3 Commits
09bf0ea580
...
d4b28b8e9f
| Author | SHA1 | Date | |
|---|---|---|---|
| d4b28b8e9f | |||
| 9da00d5d1d | |||
| f8ca003942 |
24
compose.yaml
24
compose.yaml
@ -1,9 +1,4 @@
|
|||||||
services:
|
services:
|
||||||
ofelia:
|
|
||||||
image: mcuadros/ofelia:latest
|
|
||||||
command: daemon --docker -f label=com.docker.compose.project=${COMPOSE_PROJECT_NAME}
|
|
||||||
volumes:
|
|
||||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
||||||
qdrant:
|
qdrant:
|
||||||
image: qdrant/qdrant:latest
|
image: qdrant/qdrant:latest
|
||||||
restart: always
|
restart: always
|
||||||
@ -21,22 +16,3 @@ services:
|
|||||||
- "127.0.0.1:5432:5432"
|
- "127.0.0.1:5432:5432"
|
||||||
volumes:
|
volumes:
|
||||||
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
- "/srv/vision-career/postgres:/var/lib/postgresql/data"
|
||||||
bot:
|
|
||||||
image: vision-career:latest
|
|
||||||
build: .
|
|
||||||
command: [".venv/bin/python", "manage.py", "runbot"]
|
|
||||||
restart: always
|
|
||||||
init: true
|
|
||||||
network_mode: host
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
labels:
|
|
||||||
ofelia.enabled: "true"
|
|
||||||
ofelia.job-exec.collect-vacancies-from-telegram-messages.schedule: "@every 1m"
|
|
||||||
ofelia.job-exec.collect-vacancies-from-telegram-messages.command: ".venv/bin/python manage.py collect_vacancies_from_telegram_messages"
|
|
||||||
ofelia.job-exec.generate-recommended-vacancies.schedule: "@every 1m"
|
|
||||||
ofelia.job-exec.generate-recommended-vacancies.command: ".venv/bin/python manage.py generate_recommended_vacancies"
|
|
||||||
develop:
|
|
||||||
watch:
|
|
||||||
- action: rebuild
|
|
||||||
path: .
|
|
||||||
|
|||||||
@ -1,3 +1,7 @@
|
|||||||
|
import traceback
|
||||||
|
from itertools import batched
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
||||||
@ -11,13 +15,16 @@ FROM telegram_parser_chatmessage
|
|||||||
WHERE timestamp >= now() - INTERVAL 30 DAY
|
WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||||
AND length(message) > 150
|
AND length(message) > 150
|
||||||
AND arrayCount(x -> position(message, x) > 0, [
|
AND arrayCount(x -> position(message, x) > 0, [
|
||||||
'вакансия', 'ищем', 'требуется', 'разработчик', 'будет плюсом',
|
'ваканси', 'ищем', 'требуется', 'разработчик', 'будет плюсом',
|
||||||
'зарплата', 'оклад', 'з/п', 'руб', 'опыт работы',
|
'зарплат', 'оклад', 'з/п', 'руб', 'опыт',
|
||||||
'требования', 'обязанности', 'условия', 'компания', 'офис',
|
'требовани', 'обязанности', 'условия', 'офис',
|
||||||
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
|
'удаленн', 'гибкий график', 'полный день', 'занятост',
|
||||||
'резюме', 'собеседование', 'junior', 'middle', 'senior'
|
'резюме', 'собеседовани', 'junior', 'middle', 'senior', 'ждем', 'компани',
|
||||||
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0
|
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
|
||||||
AND position(message, '#ищу') = 0 AND position(message, 'умею') = 0
|
]) >= 5
|
||||||
|
AND arrayCount(x -> position(lower(message), x) > 0, [
|
||||||
|
'о себе', 'обо мне', 'умею', '#ищу'
|
||||||
|
]) = 0
|
||||||
AND id NOT IN %(exist_points)s
|
AND id NOT IN %(exist_points)s
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -42,17 +49,24 @@ class Command(BaseCommand):
|
|||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
result_rows_len = len(result_rows)
|
batch_size = 10
|
||||||
for index, row in enumerate(result_rows):
|
with ThreadPoolExecutor(max_workers=batch_size) as pool:
|
||||||
(id, chat_username, telegram_id, message, timestamp) = row
|
pool.map(self._process_batch, batched(result_rows, batch_size))
|
||||||
|
|
||||||
link = f"https://t.me/{chat_username}/{telegram_id}"
|
def _process_batch(self, result_rows):
|
||||||
print(f"Processing {index+1}/{result_rows_len} link: {link}")
|
try:
|
||||||
features = extract_features(message)
|
for index, row in enumerate(result_rows):
|
||||||
|
(id, chat_username, telegram_id, message, timestamp) = row
|
||||||
|
|
||||||
add_vectors(
|
link = f"https://t.me/{chat_username}/{telegram_id}"
|
||||||
"vacancies",
|
print(f"Processing {index+1}/{len(result_rows)} link: {link}")
|
||||||
id,
|
features = extract_features(message)
|
||||||
features.model_dump(),
|
|
||||||
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
add_vectors(
|
||||||
)
|
"vacancies",
|
||||||
|
id,
|
||||||
|
features.model_dump(),
|
||||||
|
{'content': message, 'features_json': features.model_dump(), "link": link, "timestamp": timestamp},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
traceback.print_exception(exc)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user