Improve vacancies indexing and exclude more cvs from vacancy list
All checks were successful
release / docker (push) Successful in 36s
All checks were successful
release / docker (push) Successful in 36s
This commit is contained in:
parent
c5cba30997
commit
d0131ab2a1
@ -7,6 +7,7 @@ import sys
|
||||
def main():
|
||||
"""Run administrative tasks."""
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'vacancies.conf.settings')
|
||||
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
||||
|
||||
@ -168,3 +168,5 @@ CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1")
|
||||
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123"))
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
||||
|
||||
COLLECT_VACANCIES_BATCH_SIZE = int(os.getenv("COLLECT_VACANCIES_BATCH_SIZE", "50"))
|
||||
|
||||
@ -3,6 +3,7 @@ from itertools import batched
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from django.core.management import BaseCommand
|
||||
from django.conf import settings
|
||||
import clickhouse_connect
|
||||
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
||||
from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT
|
||||
@ -23,7 +24,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
||||
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
|
||||
]) >= 5
|
||||
AND arrayCount(x -> position(lower(message), x) > 0, [
|
||||
'о себе', 'обо мне', 'умею', '#ищу', '#резюме'
|
||||
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж'
|
||||
]) = 0
|
||||
AND id NOT IN %(exist_points)s
|
||||
"""
|
||||
@ -49,9 +50,8 @@ class Command(BaseCommand):
|
||||
exist_points_set = tuple(set(exist_points_ids))
|
||||
|
||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||
batch_size = 30
|
||||
with ThreadPoolExecutor(max_workers=batch_size) as pool:
|
||||
pool.map(self._process_batch, batched(result_rows, batch_size))
|
||||
with ThreadPoolExecutor(max_workers=settings.COLLECT_VACANCIES_BATCH_SIZE) as pool:
|
||||
pool.map(self._process_batch, batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
|
||||
|
||||
def _process_batch(self, result_rows):
|
||||
try:
|
||||
|
||||
@ -12,6 +12,9 @@ class Command(BaseCommand):
|
||||
help = "Generates new recommended vacancies"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
asyncio.run(self.ahandle(*args, **options))
|
||||
|
||||
async def ahandle(self, *args, **options):
|
||||
customer_cvs = CustomerCV.objects.all()
|
||||
|
||||
for customer_cv in customer_cvs:
|
||||
@ -31,10 +34,10 @@ class Command(BaseCommand):
|
||||
vacancy_id=search_result_id,
|
||||
)
|
||||
|
||||
asyncio.run(application.bot.send_message(
|
||||
await application.bot.send_message(
|
||||
chat_id=recommendation.customer.chat_id,
|
||||
text=vacancy_content,
|
||||
reply_markup=InlineKeyboardMarkup([[
|
||||
InlineKeyboardButton("Откликнуться", url=link),
|
||||
]]),
|
||||
))
|
||||
)
|
||||
|
||||
@ -147,8 +147,8 @@ def search_similarities(query_filter: Filter, cv_id: int):
|
||||
Резюме: {cv.payload['content']}
|
||||
|
||||
Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс(от 0 до 9).
|
||||
Если среди вакансий нет подходящих, то верни -1.
|
||||
В ответе выведи только число.
|
||||
Иногда могут попадаться чужие резюме вместо вакансий, их отдавать нельзя.
|
||||
В ответе выведи только число. Если среди вакансий нет подходящих, то верни -1.
|
||||
{scored[:10]}
|
||||
"""
|
||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user