Improve vacancies indexing and exclude more cvs from vacancy list
All checks were successful
release / docker (push) Successful in 36s
All checks were successful
release / docker (push) Successful in 36s
This commit is contained in:
parent
c5cba30997
commit
d0131ab2a1
@ -7,6 +7,7 @@ import sys
|
|||||||
def main():
|
def main():
|
||||||
"""Run administrative tasks."""
|
"""Run administrative tasks."""
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'vacancies.conf.settings')
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'vacancies.conf.settings')
|
||||||
|
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
|
||||||
try:
|
try:
|
||||||
from django.core.management import execute_from_command_line
|
from django.core.management import execute_from_command_line
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
|
|||||||
@ -168,3 +168,5 @@ CLICKHOUSE_HOST = os.getenv("CLICKHOUSE_HOST", "127.0.0.1")
|
|||||||
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123"))
|
CLICKHOUSE_PORT = int(os.getenv("CLICKHOUSE_PORT", "18123"))
|
||||||
|
|
||||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
||||||
|
|
||||||
|
COLLECT_VACANCIES_BATCH_SIZE = int(os.getenv("COLLECT_VACANCIES_BATCH_SIZE", "50"))
|
||||||
|
|||||||
@ -3,6 +3,7 @@ from itertools import batched
|
|||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
|
from django.conf import settings
|
||||||
import clickhouse_connect
|
import clickhouse_connect
|
||||||
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
from vacancies.main.vector_store import add_vectors, extract_features, qdrant_client
|
||||||
from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT
|
from vacancies.conf.settings import CLICKHOUSE_HOST, CLICKHOUSE_PORT
|
||||||
@ -23,7 +24,7 @@ WHERE timestamp >= now() - INTERVAL 30 DAY
|
|||||||
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
|
'заниматься', 'формат', 'занятость', 'вилка', 'должност', 'контакт'
|
||||||
]) >= 5
|
]) >= 5
|
||||||
AND arrayCount(x -> position(lower(message), x) > 0, [
|
AND arrayCount(x -> position(lower(message), x) > 0, [
|
||||||
'о себе', 'обо мне', 'умею', '#ищу', '#резюме'
|
'о себе', 'обо мне', 'умею', '#ищу', '#резюме', 'университет', 'колледж'
|
||||||
]) = 0
|
]) = 0
|
||||||
AND id NOT IN %(exist_points)s
|
AND id NOT IN %(exist_points)s
|
||||||
"""
|
"""
|
||||||
@ -49,9 +50,8 @@ class Command(BaseCommand):
|
|||||||
exist_points_set = tuple(set(exist_points_ids))
|
exist_points_set = tuple(set(exist_points_ids))
|
||||||
|
|
||||||
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
result_rows = clickhouse_client.query(query, parameters={"exist_points": exist_points_set}).result_rows
|
||||||
batch_size = 30
|
with ThreadPoolExecutor(max_workers=settings.COLLECT_VACANCIES_BATCH_SIZE) as pool:
|
||||||
with ThreadPoolExecutor(max_workers=batch_size) as pool:
|
pool.map(self._process_batch, batched(result_rows, settings.COLLECT_VACANCIES_BATCH_SIZE))
|
||||||
pool.map(self._process_batch, batched(result_rows, batch_size))
|
|
||||||
|
|
||||||
def _process_batch(self, result_rows):
|
def _process_batch(self, result_rows):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -12,6 +12,9 @@ class Command(BaseCommand):
|
|||||||
help = "Generates new recommended vacancies"
|
help = "Generates new recommended vacancies"
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
asyncio.run(self.ahandle(*args, **options))
|
||||||
|
|
||||||
|
async def ahandle(self, *args, **options):
|
||||||
customer_cvs = CustomerCV.objects.all()
|
customer_cvs = CustomerCV.objects.all()
|
||||||
|
|
||||||
for customer_cv in customer_cvs:
|
for customer_cv in customer_cvs:
|
||||||
@ -31,10 +34,10 @@ class Command(BaseCommand):
|
|||||||
vacancy_id=search_result_id,
|
vacancy_id=search_result_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
asyncio.run(application.bot.send_message(
|
await application.bot.send_message(
|
||||||
chat_id=recommendation.customer.chat_id,
|
chat_id=recommendation.customer.chat_id,
|
||||||
text=vacancy_content,
|
text=vacancy_content,
|
||||||
reply_markup=InlineKeyboardMarkup([[
|
reply_markup=InlineKeyboardMarkup([[
|
||||||
InlineKeyboardButton("Откликнуться", url=link),
|
InlineKeyboardButton("Откликнуться", url=link),
|
||||||
]]),
|
]]),
|
||||||
))
|
)
|
||||||
|
|||||||
@ -147,8 +147,8 @@ def search_similarities(query_filter: Filter, cv_id: int):
|
|||||||
Резюме: {cv.payload['content']}
|
Резюме: {cv.payload['content']}
|
||||||
|
|
||||||
Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс(от 0 до 9).
|
Среди вакансий ниже выбери одну наиболее релевантную и выведи ее индекс(от 0 до 9).
|
||||||
Если среди вакансий нет подходящих, то верни -1.
|
Иногда могут попадаться чужие резюме вместо вакансий, их отдавать нельзя.
|
||||||
В ответе выведи только число.
|
В ответе выведи только число. Если среди вакансий нет подходящих, то верни -1.
|
||||||
{scored[:10]}
|
{scored[:10]}
|
||||||
"""
|
"""
|
||||||
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
openai_client = ChatOpenAI(model_name="gpt-5-mini", reasoning_effort="minimal", temperature=0, seed=42, top_p=1)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user