diff --git a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py index 7fce5b6..e2b373f 100644 --- a/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py +++ b/vacancies/main/management/commands/collect_vacancies_from_telegram_messages.py @@ -7,7 +7,7 @@ from vacancies.main.vector_store import vector_store clickhouse_client = clickhouse_connect.create_client(host="127.0.0.1", port=18123) query = """ -SELECT id, chat_id, telegram_id, message, timestamp +SELECT id, chat_username, telegram_id, message, timestamp FROM telegram_parser_chatmessage WHERE timestamp >= now() - INTERVAL 30 DAY AND length(message) > 150 @@ -28,10 +28,13 @@ class Command(BaseCommand): documents = [] for index, row in enumerate(clickhouse_client.query(query).result_rows): - (id, chat_id, telegram_id, message, timestamp) = row + (id, chat_username, telegram_id, message, timestamp) = row - link = f"https://t.me/c/{chat_id}/{telegram_id}" - vacancy = Vacancy.objects.create(name="test", content=message, link=link) + link = f"https://t.me/c/{chat_username}/{telegram_id}" + vacancy, created = Vacancy.objects.get_or_create( + link=link, + defaults={'content': message} + ) metadata = {"link": link, "vacancy_id": vacancy.id} documents.append(Document(page_content=message, metadata=metadata)) diff --git a/vacancies/main/management/commands/generate_recommended_vacancies.py b/vacancies/main/management/commands/generate_recommended_vacancies.py index 0cadccb..60391c8 100644 --- a/vacancies/main/management/commands/generate_recommended_vacancies.py +++ b/vacancies/main/management/commands/generate_recommended_vacancies.py @@ -12,7 +12,15 @@ class Command(BaseCommand): def handle(self, *args, **options): customer_cvs = CustomerCV.objects.all() for customer_cv in customer_cvs: - document = vector_store.similarity_search(customer_cv.content, k=1)[0] + recommended_vacancy_ids = RecommendedVacancy.objects.filter( + customer=customer_cv.customer + ).values_list('vacancy_id', flat=True) + + document = vector_store.similarity_search( + customer_cv.content, + k=1, + filter={"vacancy_id": {"$nin": list(recommended_vacancy_ids)}} + )[0] recommendation, _ = RecommendedVacancy.objects.get_or_create( customer=customer_cv.customer, vacancy_id=document.metadata["vacancy_id"], diff --git a/vacancies/main/migrations/0003_remove_recommendedvacancy_is_shown_and_more.py b/vacancies/main/migrations/0003_remove_recommendedvacancy_is_shown_and_more.py new file mode 100644 index 0000000..ab82c40 --- /dev/null +++ b/vacancies/main/migrations/0003_remove_recommendedvacancy_is_shown_and_more.py @@ -0,0 +1,26 @@ +# Generated by Django 5.2.7 on 2025-10-25 16:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('main', '0002_customer_created_at_customercv_content_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='recommendedvacancy', + name='is_shown', + ), + migrations.RemoveField( + model_name='vacancy', + name='name', + ), + migrations.AlterField( + model_name='vacancy', + name='link', + field=models.URLField(unique=True), + ), + ] diff --git a/vacancies/main/models.py b/vacancies/main/models.py index 6c8ed8a..8b84257 100644 --- a/vacancies/main/models.py +++ b/vacancies/main/models.py @@ -32,7 +32,7 @@ class CustomerCV(models.Model): class Vacancy(models.Model): content = models.TextField() - link = models.URLField() + link = models.URLField(unique=True) created_at = models.DateTimeField(auto_now_add=True) objects = models.Manager()