vision-career/backend/index.py
2025-10-21 20:07:42 +03:00

32 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
sys.path.append(".")
from backend.agent import vectorstore
from langchain_core.documents import Document
import clickhouse_connect
query = """
SELECT id, chat_id, telegram_id, message, timestamp
FROM telegram_parser_chatmessage
WHERE timestamp >= now() - INTERVAL 30 DAY
AND length(message) > 150
AND arrayCount(x -> position(message, x) > 0, [
'вакансия', 'ищем', 'требуется', 'разработчик', 'будет плюсом',
'зарплата', 'оклад', 'з/п', 'руб', 'опыт работы',
'требования', 'обязанности', 'условия', 'компания', 'офис',
'удаленно', 'гибкий график', 'полный день', 'частичная занятость',
'резюме', 'собеседование', 'junior', 'middle', 'senior'
]) >= 5
"""
client = clickhouse_connect.create_client(port=18123)
documents = []
for row in client.query(query).result_rows:
(id, chat_id, telegram_id, message, timestamp) = row
metadata = {"chat_id": chat_id, "telegram_id": telegram_id, "timestamp": timestamp.isoformat()}
documents.append(Document(id=id, page_content=message, metadata=metadata))
vectorstore.add_documents(documents)