diff --git a/manage.py b/manage.py index e282dc9..581401c 100755 --- a/manage.py +++ b/manage.py @@ -7,6 +7,7 @@ import sys def main(): """Run administrative tasks.""" os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'vacancies.conf.settings') + os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/vacancies/conf/settings.py b/vacancies/conf/settings.py index 5090ecb..b9fb9d8 100644 --- a/vacancies/conf/settings.py +++ b/vacancies/conf/settings.py @@ -168,3 +168,5 @@ CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1") CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123")) QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333") + +COLLECT_VACANCIES_BATCH_SIZE = int(os.getenv("COLLECT_VACANCIES_BATCH_SIZE", "50")) diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py index 07f6fed..dfaaeb2 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py +++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py @@ -3,6 +3,7 @@ from itertools import batched from concurrent.futures import ThreadPoolExecutor from django.core.management import BaseCommand +from django.conf import settings import clickhouse_connect from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT @@ -23,7 +24,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY 'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт' ]) >= 5 AND arrayCount(x -> position(lower(message), x) > 0, [ - 'о себе', 'обо мне', 'умею', '#ищу', '#резюме' + 'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж' ]) = 0 AND id NOT IN %(exist_points)s """ @@ -49,9 +50,8 @@ class Command(BaseCommand): exist_points_set = tuple(set(exist_points_ids)) result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows - batch_size = 30 - with ThreadPoolExecutor(max_workers=batch_size) as pool: - pool.map(self._process_batch, batched(result_rows, batch_size)) + with ThreadPoolExecutor(max_workers=settings.COLLECT_VACANCIES_BATCH_SIZE) as pool: + pool.map(self._process_batch, batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE)) def _process_batch(self, result_rows): try: diff --git a/vacancies/main/management/commands/generate_recommended_vacancies.py b/vacancies/main/management/commands/generate_recommended_vacancies.py index d86c546..0679ab6 100644 --- a/vacancies/main/management/commands/generate_recommended_vacancies.py +++ b/vacancies/main/management/commands/generate_recommended_vacancies.py @@ -12,6 +12,9 @@ class Command(BaseCommand): help = "Generates new recommended vacancies" def handle(self, *args, **options): + asyncio.run(self.ahandle(*args, **options)) + + async def ahandle(self, *args, **options): customer_cvs = CustomerCV.objects.all() for customer_cv in customer_cvs: @@ -31,10 +34,10 @@ class Command(BaseCommand): vacancy_id=search_result_id, ) - asyncio.run(application.bot.send_message( + await application.bot.send_message( chat_id=recommendation.customer.chat_id, text=vacancy_content, reply_markup=InlineKeyboardMarkup([[ InlineKeyboardButton("Откликнуться", url=link), ]]), - )) + ) diff --git a/vacancies/main/vector_store.py b/vacancies/main/vector_store.py index 44dac9c..e1ac83e 100644 --- a/vacancies/main/vector_store.py +++ b/vacancies/main/vector_store.py @@ -147,8 +147,8 @@ def search_similarities(query_filter: Filter, cv_id: int): Резюме: {cv.payload['content']} Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс(от 0 до 9). - Если среди вакансий нет подходящих, то верни -1. - В ответе выведи только число. + Иногда могут попадаться чужие резюме вместо вакансий, их отдавать нельзя. + В ответе выведи только число. Если среди вакансий нет подходящих, то верни -1. {scored[:10]} """ openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)