Improve vacancies indexing and exclude more cvs from vacancy list
All checks were successful
release / docker (push) Successful in 36s

This commit is contained in:
estromenko 2025-11-03 18:09:11 +03:00
parent c5cba30997
commit d0131ab2a1
5 changed files with 14 additions and 8 deletions

View File

@ -7,6 +7,7 @@ import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'vacancies.conf.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
try:
from django.core.management import execute_from_command_line
except ImportError as exc:

View File

@ -168,3 +168,5 @@ CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1")
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123"))
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
COLLECT_VACANCIES_BATCH_SIZE = int(os.getenv("COLLECT_VACANCIES_BATCH_SIZE", "50"))

View File

@ -3,6 +3,7 @@ from itertools import batched
from concurrent.futures import ThreadPoolExecutor
from django.core.management import BaseCommand
from django.conf import settings
import clickhouse_connect
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT
@ -23,7 +24,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
]) >= 5
AND arrayCount(x -> position(lower(message), x) > 0, [
'о себе', 'обо мне', 'умею', '#ищу', '#резюме'
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж'
]) = 0
AND id NOT IN %(exist_points)s
"""
@ -49,9 +50,8 @@ class Command(BaseCommand):
exist_points_set = tuple(set(exist_points_ids))
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
batch_size = 30
with ThreadPoolExecutor(max_workers=batch_size) as pool:
pool.map(self._process_batch, batched(result_rows, batch_size))
with ThreadPoolExecutor(max_workers=settings.COLLECT_VACANCIES_BATCH_SIZE) as pool:
pool.map(self._process_batch, batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
def _process_batch(self, result_rows):
try:

View File

@ -12,6 +12,9 @@ class Command(BaseCommand):
help = "Generates new recommended vacancies"
def handle(self, *args, **options):
asyncio.run(self.ahandle(*args, **options))
async def ahandle(self, *args, **options):
customer_cvs = CustomerCV.objects.all()
for customer_cv in customer_cvs:
@ -31,10 +34,10 @@ class Command(BaseCommand):
vacancy_id=search_result_id,
)
asyncio.run(application.bot.send_message(
await application.bot.send_message(
chat_id=recommendation.customer.chat_id,
text=vacancy_content,
reply_markup=InlineKeyboardMarkup([[
InlineKeyboardButton("Откликнуться", url=link),
]]),
))
)

View File

@ -147,8 +147,8 @@ def search_similarities(query_filter: Filter, cv_id: int):
Резюме: {cv.payload['content']}
Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс(от 0 до 9).
Если среди вакансий нет подходящих, то верни -1.
В ответе выведи только число.
Иногда могут попадаться чужие резюме вместо вакансий, их отдавать нельзя.
В ответе выведи только число. Если среди вакансий нет подходящих, то верни -1.
{scored[:10]}
"""
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)