vision-career/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py

39 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from django.core.management import BaseCommand
from vacancies.main.models import Vacancy
import clickhouse_connect
from vacancies.main.vector_store import add_vacancy_vectors, extract_vacancy_features
clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123)
query = """
SELECT id, chat_username, telegram_id, message, timestamp
FROM telegram_parser_chatmessage
WHERE timestamp >= now() - INTERVAL 30 DAY
AND length(message) > 150
AND arrayCount(x -> position(message, x) > 0, [
'вакансия', 'ищем', 'требуется', 'разработчик', 'будет плюсом',
'зарплата', 'оклад', 'з/п', 'руб', 'опыт работы',
'требования', 'обязанности', 'условия', 'компания', 'офис',
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
'резюме', 'собеседование', 'junior', 'middle', 'senior'
]) >= 5 AND position(message, 'О себе') = 0 AND position(message, 'Обо мне') = 0 AND position(message, '#ищу') = 0
"""
class Command(BaseCommand):
help = "Collect vacancies from telegram messages"
def handle(self, *args, **options):
for index, row in enumerate(clickhouse_client.query(query).result_rows):
(id, chat_username, telegram_id, message, timestamp) = row
link = f"https://t.me/{chat_username}/{telegram_id}"
features = extract_vacancy_features(message)
vacancy, created = Vacancy.objects.get_or_create(
link=link,
defaults={'content': message, 'features_json': features.model_dump()}
)
print(index, link)
add_vacancy_vectors(vacancy.id, features.model_dump(), {"link": link})