commit 2c407d9e8dd6c17360f9e418b3481daf943769b5 Author: Artem Guivan Date: Tue Jan 13 15:38:27 2026 +0300 Initial commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5bd43d0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,71 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# Environment +.env +.env.local +.env.*.local + +# Git +.git/ +.gitignore + +# Docker +docker-compose.yml +Dockerfile +.dockerignore + +# Documentation +*.md +docs/ + +# Logs +*.log +logs/ + +# OS +.DS_Store +Thumbs.db + +# Project specific +tests/ +*.pyc + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c9c552e --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +*.manifest +*.spec + +# Virtual Environments +.venv/ +venv/ +ENV/ +env/ +.ENV/ +.env/ +virtualenv/ +VIRTUAL_ENV/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ +.project +.pydevproject +.settings/ +*.sublime-project +*.sublime-workspace + +# Environment variables +.env +.env.local +.env.*.local +.env.development +.env.production +.env.test +*.env + +# Testing +.pytest_cache/ +.coverage +.coverage.* +htmlcov/ +.tox/ +.nox/ +.hypothesis/ +.pytest_cache/ +nosetests.xml +coverage.xml +*.cover +*.log +.cache + +# Jupyter Notebook +.ipynb_checkpoints +*.ipynb + +# pyenv +.python-version + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db +Desktop.ini + +# Docker +*.log +docker-compose.override.yml + +# Database +*.db +*.sqlite +*.sqlite3 +postgres_data/ +minio_data/ + +# Logs +logs/ +*.log +*.log.* + +# Temporary files +tmp/ +temp/ +*.tmp +*.bak +*.swp +*~ + +# Project specific +autorization/ +*.md.bak \ No newline at end of file diff --git a/IMPLEMENTATION_SUMMARY.md b/IMPLEMENTATION_SUMMARY.md new file mode 100644 index 0000000..6e1a53d --- /dev/null +++ b/IMPLEMENTATION_SUMMARY.md @@ -0,0 +1,286 @@ +# PostgreSQL Database Implementation Summary + +## What Was Done + +Successfully migrated from in-memory fake_db dictionaries to PostgreSQL with async SQLAlchemy ORM. + +## Files Modified + +### 1. [database/database.py](database/database.py) - **COMPLETELY REWRITTEN** +**Changes:** +- ✅ Converted from synchronous to **async SQLAlchemy** (using `asyncpg` driver) +- ✅ Added proper `User` model with: + - Auto-increment `id` as primary key + - Unique indexed `telegram_id` and `token` + - `created_at` and `updated_at` timestamps + - Composite index on `token` and `status` +- ✅ Added `Profile` model with: + - All fields from your Pydantic model + - Email as unique indexed field + - Timestamps +- ✅ Created `get_db()` dependency for FastAPI +- ✅ Added `init_db()` and `drop_db()` utility functions +- ✅ Configured connection pooling and async engine + +### 2. [app.py](app.py) - **MAJOR UPDATES** +**Changes:** +- ✅ Removed `profile_db = {}` in-memory dict +- ✅ Added database imports and `Depends(get_db)` to all endpoints +- ✅ Added `@app.on_event("startup")` to initialize DB on app start +- ✅ Updated `/profile` POST endpoint: + - Now saves to PostgreSQL `profiles` table + - Handles create/update logic + - Properly commits transactions +- ✅ Updated `/profile/{email}` GET endpoint: + - Queries from PostgreSQL + - Converts DB model to Pydantic response +- ✅ Updated `/login` endpoint: + - Creates `User` record with pending status + - Stores in PostgreSQL instead of dict +- ✅ Updated `/check-auth/{token}` endpoint: + - Queries user by token from PostgreSQL + - Returns proper status +- ✅ Updated `/database/tokens` endpoint: + - Lists all users from database + +### 3. [bot.py](bot.py) - **MAJOR REFACTORING** +**Changes:** +- ✅ Removed all references to `fake_db` +- ✅ Removed `app.db = fake_db` synchronization code +- ✅ Added proper database imports +- ✅ Updated `/start` command handler: + - Uses `AsyncSessionLocal()` for DB sessions + - Queries user by token + - Updates telegram_id, username, and status + - Proper error handling with rollback +- ✅ Added `init_db()` call in `start_bot()` + +### 4. [requirements.txt](requirements.txt) - **CREATED** +**New dependencies:** +- FastAPI + Uvicorn +- Pydantic with email support +- SQLAlchemy 2.0 with async support +- asyncpg (PostgreSQL async driver) +- psycopg2-binary (backup driver) +- greenlet (required for SQLAlchemy async) +- aiogram 3.3.0 (Telegram bot) +- minio (file storage) +- python-dotenv +- Testing: pytest, pytest-asyncio, httpx + +### 5. [init_db.py](init_db.py) - **CREATED** +**Purpose:** Interactive script to initialize or reset database +**Features:** +- Option 1: Create tables +- Option 2: Drop and recreate (reset) +- Safe with confirmation prompts + +### 6. [README.md](README.md) - **COMPLETELY REWRITTEN** +**New content:** +- Complete setup instructions +- Database schema documentation +- API endpoints reference +- Usage flow diagram +- Development guide +- Troubleshooting section + +## Database Schema + +### Users Table +```sql +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + telegram_id INTEGER NOT NULL UNIQUE, + token VARCHAR(255) NOT NULL UNIQUE, + username VARCHAR(100), + status VARCHAR(50) NOT NULL DEFAULT 'pending', + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); +CREATE INDEX idx_token_status ON users(token, status); +CREATE UNIQUE INDEX ix_users_telegram_id ON users(telegram_id); +CREATE UNIQUE INDEX ix_users_token ON users(token); +``` + +### Profiles Table +```sql +CREATE TABLE profiles ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) NOT NULL UNIQUE, + name VARCHAR(255) NOT NULL, + position VARCHAR(255) NOT NULL, + competencies TEXT, + experience TEXT, + skills TEXT, + country VARCHAR(100), + languages VARCHAR(255), + employment_format VARCHAR(100), + rate VARCHAR(100), + relocation VARCHAR(100), + cv_url VARCHAR(500), + created_at TIMESTAMP NOT NULL DEFAULT NOW(), + updated_at TIMESTAMP NOT NULL DEFAULT NOW() +); +CREATE UNIQUE INDEX ix_profiles_email ON profiles(email); +``` + +## Architecture Benefits + +### Before (In-Memory Dicts) +❌ Data lost on restart +❌ No persistence +❌ No concurrent access control +❌ No data validation at DB level +❌ No relationships or constraints +❌ No transaction safety + +### After (PostgreSQL + SQLAlchemy) +✅ **Persistent** - Data survives restarts +✅ **ACID compliant** - Transaction safety +✅ **Concurrent** - Handle multiple requests +✅ **Indexed** - Fast queries on telegram_id, token, email +✅ **Constraints** - Unique tokens, emails +✅ **Timestamps** - Track created_at, updated_at +✅ **Async** - Non-blocking database operations +✅ **Pooling** - Efficient connection management + +## How It Works Now + +### Authentication Flow +1. User visits website → `GET /login` +2. FastAPI creates new `User` record in PostgreSQL: + ```python + User(telegram_id=0, token=uuid4(), status='pending') + ``` +3. Returns Telegram bot URL with token +4. User clicks link → Opens bot → Sends `/start {token}` +5. Bot queries database for token: + ```python + user = await session.execute(select(User).where(User.token == token)) + ``` +6. Bot updates user: + ```python + user.telegram_id = message.from_user.id + user.username = message.from_user.username + user.status = 'success' + await session.commit() + ``` +7. Website polls `/check-auth/{token}` → Gets auth status from DB + +### Profile Management Flow +1. User submits profile → `POST /profile` +2. FastAPI uploads CV to MinIO +3. Checks if profile exists: + ```python + existing = await db.execute(select(Profile).where(Profile.email == email)) + ``` +4. Updates existing or creates new profile +5. Commits to PostgreSQL + +## Testing the Implementation + +### 1. Initialize Database +```bash +python init_db.py +# Choose option 1 to create tables +``` + +### 2. Verify Tables +```bash +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "\dt" +# Should show: users, profiles +``` + +### 3. Test Database Connection +```bash +.venv/bin/python database/database.py +# Should create test user and retrieve it +``` + +### 4. Start Application +```bash +# Option A: Together +python bot.py + +# Option B: Separate terminals +uvicorn app:app --reload +# In another terminal: +python -c "from bot import start_bot; import asyncio; asyncio.run(start_bot())" +``` + +### 5. Test Endpoints +```bash +# Test login +curl http://localhost:8000/login + +# Test check-auth +curl http://localhost:8000/check-auth/{token} + +# Test tokens list +curl http://localhost:8000/database/tokens +``` + +## Common Issues & Solutions + +### Issue: "No module named 'sqlalchemy'" +**Solution:** Install dependencies +```bash +uv pip install -r requirements.txt +``` + +### Issue: "the greenlet library is required" +**Solution:** Already added to requirements.txt +```bash +uv pip install greenlet==3.0.3 +``` + +### Issue: "Connection refused" to PostgreSQL +**Solution:** Start Docker services +```bash +docker-compose up -d +docker ps # Verify postgres is running +``` + +### Issue: Old table structure +**Solution:** Reset database +```bash +python init_db.py # Choose option 2 (reset) +``` + +## Next Steps (Optional Improvements) + +1. **Add foreign key relationship** between users and profiles +2. **Implement token expiration** (add expires_at column) +3. **Add database migrations** (Alembic) +4. **Add indexes** for common queries +5. **Implement connection pooling tuning** for production +6. **Add Redis caching** for frequently accessed data +7. **Implement soft deletes** (deleted_at column) +8. **Add audit logs** table for tracking changes +9. **Create database backup scripts** +10. **Add monitoring** with Prometheus/Grafana + +## Code Quality Improvements Made + +- ✅ **Type hints** throughout database code +- ✅ **Docstrings** on all major functions +- ✅ **Error handling** with try/except and rollback +- ✅ **Session management** using context managers +- ✅ **Connection pooling** with proper configuration +- ✅ **Index optimization** on frequently queried fields +- ✅ **Async/await** pattern throughout +- ✅ **Environment variables** for all config +- ✅ **Dependency injection** with FastAPI Depends() + +## Summary + +Your application now has a **production-ready database layer** with: +- ✅ Proper ORM models +- ✅ Async database operations +- ✅ Transaction safety +- ✅ Data persistence +- ✅ Proper indexing +- ✅ Error handling +- ✅ Clean architecture + +All the logic for authentication tokens and profile storage has been successfully migrated from in-memory dictionaries to PostgreSQL! diff --git a/QUICK_START.md b/QUICK_START.md new file mode 100644 index 0000000..bb7358c --- /dev/null +++ b/QUICK_START.md @@ -0,0 +1,235 @@ +# Quick Start Guide + +## Installation (5 minutes) + +```bash +# 1. Install dependencies with uv +uv pip install -r requirements.txt + +# 2. Start Docker services (PostgreSQL + MinIO) +docker-compose up -d + +# 3. Initialize database +python init_db.py +# Choose option 1: Create tables + +# 4. Verify setup +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "\dt" +# Should show: users, profiles tables +``` + +## Running the Application + +### Option 1: All-in-One (Recommended for Development) +```bash +python bot.py +``` +This starts both the FastAPI server and Telegram bot together. + +### Option 2: Separate Processes (For Testing) +```bash +# Terminal 1: API Server +uvicorn app:app --reload --host localhost --port 8000 + +# Terminal 2: Telegram Bot +.venv/bin/python -c "from bot import start_bot; import asyncio; asyncio.run(start_bot())" +``` + +## Testing the API + +### 1. Test Login Endpoint +```bash +curl http://localhost:8000/login +``` +**Expected response:** +```json +{ + "message": "Нажмите на кнопку для регистрации", + "url": "https://t.me/ITMOshkaBot?start={token}", + "check_status_url": "/check-auth/{token}" +} +``` + +### 2. Check Database Tokens +```bash +curl http://localhost:8000/database/tokens +``` +**Expected response:** +```json +[ + { + "token": "uuid-here", + "status": "pending", + "username": null + } +] +``` + +### 3. Test Profile Creation +```bash +curl -X POST http://localhost:8000/profile \ + -F "name=John Doe" \ + -F "email=john@example.com" \ + -F "position=Python Developer" \ + -F "skills=Python, FastAPI, PostgreSQL" \ + -F "cv=@/path/to/resume.pdf" +``` + +### 4. Get Profile +```bash +curl http://localhost:8000/profile/john@example.com +``` + +### 5. List Vacancies +```bash +curl http://localhost:8000/vacancies +``` + +## Database Quick Commands + +### View All Users +```bash +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "SELECT * FROM users;" +``` + +### View All Profiles +```bash +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "SELECT email, name, position FROM profiles;" +``` + +### Delete All Users (Reset) +```bash +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "DELETE FROM users;" +``` + +### Count Records +```bash +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "SELECT COUNT(*) FROM users;" +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c "SELECT COUNT(*) FROM profiles;" +``` + +## Common Tasks + +### Reset Database +```bash +python init_db.py +# Choose option 2: Drop and recreate tables +``` + +### Check PostgreSQL Logs +```bash +docker logs rag_ai_postgres -f +``` + +### Check MinIO Status +```bash +docker ps | grep minio +# Or visit: http://localhost:9001 +# Login: minioadmin / minioadmin +``` + +### Backup Database +```bash +docker exec rag_ai_postgres pg_dump -U postgres rag_ai_assistant > backup_$(date +%Y%m%d).sql +``` + +### Restore Database +```bash +docker exec -i rag_ai_postgres psql -U postgres rag_ai_assistant < backup_20240109.sql +``` + +## Environment Variables (.env) + +Make sure these are set correctly: + +```env +# Bot +BOT_API_KEY=your_telegram_bot_token_here + +# Database +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres +POSTGRES_DB=rag_ai_assistant +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 + +# MinIO +MINIO_ENDPOINT=localhost:9000 +MINIO_ACCESS_KEY=minioadmin +MINIO_SECRET_KEY=minioadmin +MINIO_BUCKET=resumes +``` + +## Troubleshooting + +### "Connection refused" errors +```bash +# Check if PostgreSQL is running +docker ps | grep postgres + +# Restart if needed +docker-compose restart rag_ai_postgres +``` + +### "No module named 'X'" errors +```bash +# Reinstall dependencies +uv pip install -r requirements.txt +``` + +### Database tables don't exist +```bash +# Reinitialize +python init_db.py +``` + +### Bot not responding +```bash +# Check bot token in .env +cat .env | grep BOT_API_KEY + +# Test bot manually +curl https://api.telegram.org/bot{YOUR_TOKEN}/getMe +``` + +## API Documentation + +Once running, visit: +- **Swagger UI:** http://localhost:8000/docs +- **ReDoc:** http://localhost:8000/redoc + +## Project Structure + +``` +├── app.py # FastAPI application +├── bot.py # Telegram bot +├── config.py # Configuration +├── init_db.py # Database setup script +├── requirements.txt # Dependencies +├── .env # Environment variables (create this!) +├── database/ +│ └── database.py # ORM models & DB session +└── tests/ # Your tests +``` + +## Next Steps + +1. ✅ Database is set up +2. ✅ API is working +3. ✅ Bot is connected +4. 📝 Test the authentication flow +5. 📝 Test profile creation +6. 📝 Add your custom business logic + +## Support + +- **Documentation:** See [README.md](README.md) for full details +- **Implementation Details:** See [IMPLEMENTATION_SUMMARY.md](IMPLEMENTATION_SUMMARY.md) +- **Database Schema:** See diagrams in implementation summary + +--- + +**Status:** ✅ All systems operational +**PostgreSQL:** Port 5432 +**MinIO:** Port 9000 (API) / 9001 (Console) +**FastAPI:** Port 8000 diff --git a/README.md b/README.md new file mode 100644 index 0000000..0c98afc --- /dev/null +++ b/README.md @@ -0,0 +1,254 @@ +# RAG AI Assistant + +A FastAPI application with Telegram bot integration for user authentication and profile management. + +## Features + +- **User Authentication** via Telegram Bot +- **Profile Management** with CV upload to MinIO +- **PostgreSQL Database** with async SQLAlchemy ORM +- **RESTful API** endpoints for profiles and vacancies + +## Architecture + +- **Backend**: FastAPI (Python) +- **Database**: PostgreSQL with async support (asyncpg) +- **File Storage**: MinIO (S3-compatible) +- **Bot**: Telegram Bot (aiogram 3.x) +- **ORM**: SQLAlchemy with async sessions + +## Database Schema + +### Users Table +- `id` - Auto-increment primary key +- `telegram_id` - Unique Telegram user ID (indexed) +- `token` - Unique auth token (indexed) +- `username` - Telegram username +- `status` - Auth status ('pending' or 'success') +- `created_at` - Timestamp +- `updated_at` - Timestamp + +### Profiles Table +- `id` - Auto-increment primary key +- `email` - Unique email (indexed) +- `name`, `position`, `competencies`, `experience`, `skills` +- `country`, `languages`, `employment_format`, `rate`, `relocation` +- `cv_url` - Link to uploaded CV in MinIO +- `created_at` - Timestamp +- `updated_at` - Timestamp + +## Setup Instructions + +### 1. Prerequisites + +- Python 3.10+ +- PostgreSQL 14+ +- Docker (for MinIO) + +### 2. Install Dependencies + +```bash +# Create virtual environment +python -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate + +# Install dependencies +pip install -r requirements.txt +``` + +### 3. Configure Environment + +Copy and configure your `.env` file: + +```env +# Bot Configuration +BOT_API_KEY=your_telegram_bot_token +HOST=localhost +PORT=8000 + +# PostgreSQL Configuration +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres +POSTGRES_DB=rag_ai_assistant +POSTGRES_PORT=5432 +POSTGRES_HOST=localhost + +# MinIO Configuration +MINIO_ROOT_USER=minioadmin +MINIO_ROOT_PASSWORD=minioadmin +MINIO_ENDPOINT=localhost:9000 +MINIO_PORT=9000 +MINIO_CONSOLE_PORT=9001 +MINIO_ACCESS_KEY=minioadmin +MINIO_SECRET_KEY=minioadmin +MINIO_BUCKET=resumes +``` + +### 4. Start Services + +```bash +# Start PostgreSQL and MinIO using Docker +docker-compose up -d + +# Verify services are running +docker ps +``` + +### 5. Initialize Database + +```bash +# Run database initialization script +python init_db.py + +# Or manually create tables +python -c "from database.database import init_db; import asyncio; asyncio.run(init_db())" +``` + +### 6. Run Application + +**Option A: Run bot and API together** +```bash +python bot.py +``` + +**Option B: Run separately** +```bash +# Terminal 1 - API +uvicorn app:app --reload --host localhost --port 8000 + +# Terminal 2 - Bot +python -c "from bot import start_bot; import asyncio; asyncio.run(start_bot())" +``` + +## API Endpoints + +### Authentication +- `GET /login` - Generate auth token and Telegram bot URL +- `GET /check-auth/{token}` - Check authentication status +- `GET /database/tokens` - List all tokens (debug) + +### Profiles +- `POST /profile` - Create/update profile with CV upload +- `GET /profile/{email}` - Get profile by email +- `GET /profile/cv/{file_id}` - Download CV file + +### Vacancies +- `GET /vacancies` - List all vacancies + +## Usage Flow + +1. **User visits website** → Calls `GET /login` +2. **Gets Telegram bot link** with unique token +3. **User clicks link** → Opens Telegram bot +4. **User sends `/start {token}`** to bot +5. **Bot validates token** → Updates status to "success" in DB +6. **Website polls** `GET /check-auth/{token}` → User authenticated! + +## Database Operations + +### Manual Database Operations + +```python +# Test database connection +python database/database.py + +# Initialize database +python init_db.py + +# Drop and recreate tables (⚠️ DELETES ALL DATA) +python init_db.py # Choose option 2 +``` + +### Using Database Sessions in Code + +```python +from database.database import get_db, User +from sqlalchemy import select +from fastapi import Depends + +@app.get("/example") +async def example_endpoint(db: AsyncSession = Depends(get_db)): + # Query users + result = await db.execute(select(User).where(User.status == "success")) + users = result.scalars().all() + return users +``` + +## Development + +### Project Structure +``` +. +├── app.py # FastAPI application +├── bot.py # Telegram bot +├── config.py # Configuration +├── init_db.py # Database initialization script +├── requirements.txt # Python dependencies +├── .env # Environment variables +├── database/ +│ ├── database.py # Database models and session management +│ └── minio-processor.py # MinIO utilities +├── tests/ # Test files +└── docker-compose.yml # Docker services +``` + +### Testing + +```bash +# Install test dependencies +pip install pytest pytest-asyncio httpx + +# Run tests +pytest tests/ -v + +# Test specific file +pytest tests/test_bot.py -v +``` + +## Troubleshooting + +### PostgreSQL Connection Issues +```bash +# Check if PostgreSQL is running +docker ps | grep postgres + +# Check connection +psql -h localhost -U postgres -d rag_ai_assistant +``` + +### MinIO Connection Issues +```bash +# Check if MinIO is running +docker ps | grep minio + +# Access MinIO console +open http://localhost:9001 +``` + +### Database Migration +```bash +# If you need to reset the database +python init_db.py # Choose option 2 (reset) +``` + +## Production Considerations + +1. **Security** + - Change default passwords in `.env` + - Use environment-specific configurations + - Enable SSL/TLS for PostgreSQL + - Implement rate limiting + +2. **Performance** + - Adjust SQLAlchemy pool settings + - Add database indexes as needed + - Implement caching (Redis) + +3. **Monitoring** + - Add logging + - Set up error tracking (Sentry) + - Monitor database performance + +## License + +MIT \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..2414247 --- /dev/null +++ b/app.py @@ -0,0 +1,232 @@ +from typing import List, Optional +from pydantic import BaseModel, EmailStr +from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Depends +from fastapi.responses import RedirectResponse +from minio import Minio +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +import os +import uuid +from config import Config +from database.database import get_db, init_db, User, Profile as DBProfile + +from routers import profile, vacancies, chat +MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "localhost:9000") +MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "minioadmin") +MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "minioadmin") +MINIO_BUCKET = os.getenv("MINIO_BUCKET", "resumes") + +minio_client = None + +def get_minio_client(): + global minio_client + if minio_client is None: + try: + minio_client = Minio( + MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + secure=False + ) + if not minio_client.bucket_exists(MINIO_BUCKET): + minio_client.make_bucket(MINIO_BUCKET) + print(f"MinIO connected successfully to {MINIO_ENDPOINT}") + except Exception as e: + print(f"Warning: Could not connect to MinIO at {MINIO_ENDPOINT}: {e}") + print("MinIO features will be disabled. Start MinIO to enable file storage.") + minio_client = False + return minio_client if minio_client else None + + +class Profile(BaseModel): + name: str + email: EmailStr + position: str + competencies: Optional[str] = None + experience: Optional[str] = None + skills: Optional[str] = None + country: Optional[str] = None + languages: Optional[str] = None + employment_format: Optional[str] = None + rate: Optional[str] = None + relocation: Optional[str] = None + cv_url: Optional[str] = None + +class Vacancy(BaseModel): + title: str + salary_range: str + employment_format: str + skills: str + link: str + +vacancies_db = [ + Vacancy(title="Python Developer", salary_range="2000-4000 USD", employment_format="remote", skills="Python, FastAPI", link="https://example.com/vacancy/1"), + Vacancy(title="Frontend Engineer", salary_range="1500-3000 USD", employment_format="office", skills="React, TypeScript", link="https://example.com/vacancy/2") +] + +app = FastAPI() +app.include_router(profile.router) +app.include_router(vacancies.router) +app.include_router(chat.router) + +@app.on_event("startup") +async def startup_event(): + await init_db() + print("FastAPI application started successfully") + +@app.post("/profile") +async def save_profile( + name: str = Form(...), + email: EmailStr = Form(...), + position: str = Form(...), + competencies: Optional[str] = Form(None), + experience: Optional[str] = Form(None), + skills: Optional[str] = Form(None), + country: Optional[str] = Form(None), + languages: Optional[str] = Form(None), + employment_format: Optional[str] = Form(None), + rate: Optional[str] = Form(None), + relocation: Optional[str] = Form(None), + cv: UploadFile = File(...), + db: AsyncSession = Depends(get_db) +): + if cv.content_type != "application/pdf": + raise HTTPException(status_code=400, detail="CV must be a PDF file") + + client = get_minio_client() + if not client: + raise HTTPException(status_code=503, detail="File storage (MinIO) is not available. Please start MinIO service.") + + file_id = f"{email.replace('@','_')}_{cv.filename}" + client.put_object( + MINIO_BUCKET, + file_id, + cv.file, + length=-1, + part_size=10*1024*1024, + content_type="application/pdf" + ) + cv_url = f"/profile/cv/{file_id}" + + result = await db.execute(select(DBProfile).where(DBProfile.email == email)) + existing_profile = result.scalar_one_or_none() + + if existing_profile: + existing_profile.name = name + existing_profile.position = position + existing_profile.competencies = competencies + existing_profile.experience = experience + existing_profile.skills = skills + existing_profile.country = country + existing_profile.languages = languages + existing_profile.employment_format = employment_format + existing_profile.rate = rate + existing_profile.relocation = relocation + existing_profile.cv_url = cv_url + else: + new_profile = DBProfile( + name=name, + email=email, + position=position, + competencies=competencies, + experience=experience, + skills=skills, + country=country, + languages=languages, + employment_format=employment_format, + rate=rate, + relocation=relocation, + cv_url=cv_url + ) + db.add(new_profile) + + await db.commit() + return {"message": "Profile saved", "cv_url": cv_url} + +@app.get("/profile/{email}", response_model=Profile) +async def get_profile(email: str, db: AsyncSession = Depends(get_db)): + result = await db.execute(select(DBProfile).where(DBProfile.email == email)) + profile = result.scalar_one_or_none() + + if not profile: + raise HTTPException(status_code=404, detail="Profile not found") + + return Profile( + name=profile.name, + email=profile.email, + position=profile.position, + competencies=profile.competencies, + experience=profile.experience, + skills=profile.skills, + country=profile.country, + languages=profile.languages, + employment_format=profile.employment_format, + rate=profile.rate, + relocation=profile.relocation, + cv_url=profile.cv_url + ) + +@app.get("/database/tokens") +async def get_tokens(db: AsyncSession = Depends(get_db)): + result = await db.execute(select(User)) + users = result.scalars().all() + return [{"token": user.token, "status": user.status, "username": user.username} for user in users] + +@app.get("/profile/cv/{file_id}") +async def download_cv(file_id: str): + client = get_minio_client() + if not client: + raise HTTPException(status_code=503, detail="File storage (MinIO) is not available. Please start MinIO service.") + try: + response = client.get_object(MINIO_BUCKET, file_id) + return RedirectResponse(response.geturl()) + except Exception: + raise HTTPException(status_code=404, detail="CV not found") + +@app.get("/vacancies", response_model=List[Vacancy]) +async def get_vacancies(): + return vacancies_db + +@app.get("/login") +async def login_page(db: AsyncSession = Depends(get_db)): + auth_token = str(uuid.uuid4()) + bot_name = Config.BOT_NAME + + new_user = User( + telegram_id=None, + token=auth_token, + username=None, + status="pending" + ) + db.add(new_user) + await db.commit() + + bot_url = f"https://t.me/{bot_name}?start={auth_token}" + + return { + "message": "Нажмите на кнопку для регистрации", + "url": bot_url, + "check_status_url": f"/check-auth/{auth_token}" + } + + +@app.get("/check-auth/{token}") +async def check_auth(token: str, db: AsyncSession = Depends(get_db)): + """Check if user has completed authentication via Telegram bot""" + result = await db.execute(select(User).where(User.token == token)) + user = result.scalar_one_or_none() + + if not user: + return {"authorized": False, "message": "Invalid token"} + + if user.status == "success": + return { + "authorized": True, + "user_data": { + "telegram_id": user.telegram_id, + "username": user.username, + "status": user.status + } + } + + return {"authorized": False, "message": "Authentication pending"} diff --git a/authorizer.py b/authorizer.py new file mode 100644 index 0000000..1b3a263 --- /dev/null +++ b/authorizer.py @@ -0,0 +1,36 @@ +from dataclasses import dataclass +import random + + +@dataclass +class Statuses: + PENDING: str = "pending" + COMPLETED: str = "completed" + + +def get_gen_token() -> str: + """Generate a numeric token and return it as a string. + + Uses integers for randint endpoints to avoid TypeError. + """ + return str(random.randint(1, 10 ** 12)) + + +bot_name = "test_bot" +gen_token = get_gen_token() + +link = f"https://t.me/{bot_name}?start={gen_token}" + + +# Minimal ORM/connector placeholder +class BotConnector: + def __init__(self): + # Initialize DB connection or client here + pass + + def authorize(self, token: str, telegram_user_id: int): + """Mark token as completed / link token to telegram user id. + + Implement DB write logic here. + """ + pass \ No newline at end of file diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..f19e26b --- /dev/null +++ b/bot.py @@ -0,0 +1,108 @@ +import asyncio +import threading +from aiogram import Bot, Dispatcher, types +from aiogram.filters import CommandObject, Command +from dotenv import load_dotenv +import os +from sqlalchemy import select +from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession +from database.database import User, get_database_url + +load_dotenv() + +BOT_API_KEY = os.getenv("BOT_API_KEY") +if not BOT_API_KEY: + raise RuntimeError("BOT_API_KEY not set") + +# Create separate database engine for bot to avoid event loop conflicts +bot_engine = create_async_engine( + get_database_url(), + echo=False, + pool_pre_ping=True, + pool_size=5, + max_overflow=10 +) + +BotSessionLocal = async_sessionmaker( + bot_engine, + class_=AsyncSession, + expire_on_commit=False +) + +bot = Bot(token=BOT_API_KEY) +dp = Dispatcher() + +@dp.message(Command("start")) +async def cmd_start(message: types.Message, command: CommandObject): + """Handle /start command with auth token""" + token = command.args + + if not token: + await message.answer("Пожалуйста, перейдите по ссылке с сайта для авторизации.") + return + + # Use bot's dedicated database session + async with BotSessionLocal() as session: + try: + # Find user by token + result = await session.execute(select(User).where(User.token == token)) + user = result.scalar_one_or_none() + + if user: + # Update user with Telegram data + user.status = "success" + user.telegram_id = message.from_user.id + user.username = message.from_user.username or f"user_{message.from_user.id}" + await session.commit() + + await message.answer("Успешная авторизация! Вернитесь на сайт, вас должно автоматически авторизовать.") + else: + await message.answer("Ошибка: неверный или истекший токен.") + except Exception as e: + await session.rollback() + print(f"Database error: {e}") + import traceback + traceback.print_exc() + await message.answer("Произошла ошибка при авторизации. Попробуйте снова.") + +def start_web_server(): + """Start the FastAPI web server in a separate thread""" + import uvicorn + from config import Config + + HOST = getattr(Config, "HOST", "127.0.0.1") + PORT = getattr(Config, "PORT", 8000) + + print(f"Starting web server on http://{HOST}:{PORT}") + uvicorn.run("app:app", host=HOST, port=PORT, reload=False) + +async def start_bot(): + """Start the Telegram bot""" + print("Starting Telegram bot...") + print(f"Bot token loaded: {'Yes' if BOT_API_KEY else 'No'}") + + await dp.start_polling(bot) + +async def main(): + """Run bot (web server runs in separate thread)""" + await start_bot() + +if __name__ == "__main__": + # Start web server in a separate thread + server_thread = threading.Thread(target=start_web_server, daemon=True) + server_thread.start() + + # Give the web server a moment to start + import time + time.sleep(1) + + # Run bot in main thread + try: + print("Initializing services...") + asyncio.run(main()) + except KeyboardInterrupt: + print("\nShutting down...") + except Exception as e: + print(f"Error: {e}") + raise + diff --git a/check_db.sh b/check_db.sh new file mode 100755 index 0000000..5e5f390 --- /dev/null +++ b/check_db.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Quick database check script + +echo "===================================" +echo "Database Quick Check" +echo "===================================" + +echo -e "\n📊 Table Counts:" +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c " +SELECT + (SELECT COUNT(*) FROM users) as total_users, + (SELECT COUNT(*) FROM users WHERE status='success') as authenticated, + (SELECT COUNT(*) FROM users WHERE status='pending') as pending, + (SELECT COUNT(*) FROM profiles) as total_profiles;" + +echo -e "\n👥 Recent Users (last 5):" +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c " +SELECT + id, + telegram_id, + LEFT(token, 20) || '...' as token_preview, + username, + status, + created_at +FROM users +ORDER BY created_at DESC +LIMIT 5;" + +echo -e "\n📝 Recent Profiles (last 5):" +docker exec rag_ai_postgres psql -U postgres -d rag_ai_assistant -c " +SELECT + id, + email, + name, + position, + created_at +FROM profiles +ORDER BY created_at DESC +LIMIT 5;" + +echo -e "\n===================================" +echo "To run custom queries:" +echo "docker exec -it rag_ai_postgres psql -U postgres -d rag_ai_assistant" +echo "===================================" diff --git a/claude/config.json b/claude/config.json new file mode 100644 index 0000000..9f255f9 --- /dev/null +++ b/claude/config.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "playwright": { + "command": "npx", + "args": [ + "-y", + "@executeautomation/playwright-mcp-server" + ] + } + } +} \ No newline at end of file diff --git a/config.py b/config.py new file mode 100644 index 0000000..fb0e432 --- /dev/null +++ b/config.py @@ -0,0 +1,4 @@ +class Config: + BOT_NAME = "ITMOshkaBot" + HOST = "localhost" + PORT = 8000 \ No newline at end of file diff --git a/database/database.py b/database/database.py new file mode 100644 index 0000000..90699a8 --- /dev/null +++ b/database/database.py @@ -0,0 +1,289 @@ +from sqlalchemy import Column, Integer, BigInteger, String, DateTime, Text, Index, Boolean, JSON, ForeignKey, Enum +from sqlalchemy.orm import relationship +import enum +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker +from sqlalchemy.orm import declarative_base +from datetime import datetime +import os +from dotenv import load_dotenv +from pathlib import Path +from typing import AsyncGenerator + +env_path = Path(__file__).parent.parent / '.env' +load_dotenv(dotenv_path=env_path) + +Base = declarative_base() + +# --- Models --- + +class User(Base): + """User model for storing authentication tokens and Telegram user data""" + __tablename__ = 'users' + + id = Column(Integer, primary_key=True, autoincrement=True) + telegram_id = Column(BigInteger, unique=True, nullable=True, index=True) # Nullable until authenticated, BIGINT for large Telegram IDs + token = Column(String(255), unique=True, nullable=False, index=True) + username = Column(String(100), nullable=True) + status = Column(String(50), nullable=False, default='pending') # 'pending' or 'success' + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + __table_args__ = ( + Index('idx_token_status', 'token', 'status'), + ) + + def __repr__(self): + return f"" + + +class Profile(Base): + """Profile model for storing user professional information""" + __tablename__ = 'profiles' + + id = Column(Integer, primary_key=True, autoincrement=True) + email = Column(String(255), unique=True, nullable=False, index=True) + name = Column(String(255), nullable=False) + position = Column(String(255), nullable=False) + competencies = Column(Text, nullable=True) + experience = Column(Text, nullable=True) + skills = Column(Text, nullable=True) + country = Column(String(100), nullable=True) + languages = Column(String(255), nullable=True) + employment_format = Column(String(100), nullable=True) + rate = Column(String(100), nullable=True) + relocation = Column(String(100), nullable=True) + cv_url = Column(String(500), nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + def __repr__(self): + return f"" + + +# --- Enums --- + +class ContactType(str, enum.Enum): + LINK = "LINK" + TG_LINK = "TG_LINK" + EMAIL = "EMAIL" + PHONE = "PHONE" + + +class WorkArrangement(str, enum.Enum): + REMOTE = "REMOTE" + OFFICE = "OFFICE" + HYBRID = "HYBRID" + + +class SeniorityLevel(str, enum.Enum): + JUNIOR = "JUNIOR" + MIDDLE = "MIDDLE" + SENIOR = "SENIOR" + LEAD = "LEAD" + + +class LanguageCode(str, enum.Enum): + RU = "RU" + EN = "EN" + DE = "DE" + FR = "FR" + ES = "ES" + ZH = "ZH" + + +# --- Vacancy Models --- + +class Vacancy(Base): + """Vacancy/Job posting model""" + __tablename__ = 'vacancies' + + id = Column(Integer, primary_key=True, autoincrement=True) + + # Basic info + title = Column(String(255), nullable=False) # Название вакансии + position = Column(String(255), nullable=False) # Позиция + company_name = Column(String(255), nullable=False) + + # Salary + salary_min = Column(Integer, nullable=True) + salary_max = Column(Integer, nullable=True) + salary_currency = Column(String(10), nullable=True, default='USD') + + # Location and work format + country_name = Column(String(100), nullable=True) + locations = Column(Text, nullable=True) # JSON string or comma-separated + work_arrangement = Column(String(50), nullable=False, default='REMOTE') + relocation_supported = Column(Boolean, default=False) + + # Requirements + skills = Column(Text, nullable=False) # Comma-separated or JSON + key_competencies = Column(Text, nullable=True) + seniority_level = Column(String(50), nullable=True) + required_language_codes = Column(JSON, nullable=True) # ["RU", "EN"] + + # Description + description = Column(Text, nullable=False) + + # Contacts - stored as JSON array + # [{"id": "uuid", "type": "LINK", "value": "https://..."}] + contacts = Column(JSON, nullable=True) + + # External link + link = Column(String(500), nullable=True) + + # Timestamps + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + def __repr__(self): + return f"" + + +# --- Chat Models --- + +class ChatMessage(Base): + """Chat message model""" + __tablename__ = 'chat_messages' + + id = Column(Integer, primary_key=True, autoincrement=True) + user_id = Column(BigInteger, ForeignKey('users.telegram_id'), nullable=True) # Optional: link to user + + # Message content + content = Column(Text, nullable=False) + role = Column(String(50), nullable=False, default='user') # user, assistant, system + + # Favorite flag + is_favorite = Column(Boolean, default=False, index=True) + + # Timestamps + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + # Relationships + files = relationship("ChatFile", back_populates="message", cascade="all, delete-orphan") + + def __repr__(self): + return f"" + + +class ChatFile(Base): + """Files attached to chat messages""" + __tablename__ = 'chat_files' + + id = Column(Integer, primary_key=True, autoincrement=True) + message_id = Column(Integer, ForeignKey('chat_messages.id', ondelete='CASCADE'), nullable=False) + + # File info + file_name = Column(String(255), nullable=False) + file_url = Column(String(500), nullable=False) # MinIO URL + file_type = Column(String(100), nullable=True) # MIME type + file_size = Column(Integer, nullable=True) # Size in bytes + + # Timestamps + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + # Relationships + message = relationship("ChatMessage", back_populates="files") + + def __repr__(self): + return f"" + + +# --- Database Configuration --- + +def get_database_url() -> str: + """Get PostgreSQL connection URL from environment variables""" + postgres_user = os.getenv("POSTGRES_USER", "postgres") + postgres_password = os.getenv("POSTGRES_PASSWORD", "postgres") + postgres_db = os.getenv("POSTGRES_DB", "rag_ai_assistant") + postgres_host = os.getenv("POSTGRES_HOST", "localhost") + postgres_port = os.getenv("POSTGRES_PORT", "5432") + + # Use asyncpg driver for async support + database_url = f"postgresql+asyncpg://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}" + print(f"Database URL: postgresql+asyncpg://{postgres_user}:***@{postgres_host}:{postgres_port}/{postgres_db}") + return database_url + + +# Create async engine +engine = create_async_engine( + get_database_url(), + echo=False, # Set to True for SQL query logging + pool_pre_ping=True, # Verify connections before using them + pool_size=5, + max_overflow=10 +) + +# Create async session factory +AsyncSessionLocal = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False +) + + +# --- Session Management --- + +async def get_db() -> AsyncGenerator[AsyncSession, None]: + """ + Dependency for getting async database session. + Usage in FastAPI: + @app.get("/endpoint") + async def endpoint(db: AsyncSession = Depends(get_db)): + ... + """ + async with AsyncSessionLocal() as session: + try: + yield session + await session.commit() + except Exception: + await session.rollback() + raise + finally: + await session.close() + + +async def init_db(): + """Initialize database tables""" + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + print("Database tables created successfully") + + +async def drop_db(): + """Drop all database tables (use with caution!)""" + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + print("Database tables dropped") + + +# --- For testing and manual operations --- + +if __name__ == "__main__": + import asyncio + + async def main(): + print("Initializing database...") + await init_db() + + # Test: Create a sample user + async with AsyncSessionLocal() as session: + test_user = User( + telegram_id=123456789, + token="test_token_123", + username="TestUser", + status="pending" + ) + session.add(test_user) + await session.commit() + print(f"Created test user: {test_user}") + + # Query the user back + from sqlalchemy import select + result = await session.execute( + select(User).where(User.telegram_id == 123456789) + ) + user = result.scalar_one_or_none() + print(f"Retrieved user: {user}") + + asyncio.run(main()) \ No newline at end of file diff --git a/database/minio_processor.py b/database/minio_processor.py new file mode 100644 index 0000000..0910c6b --- /dev/null +++ b/database/minio_processor.py @@ -0,0 +1,114 @@ +import os +from typing import List, Optional, BinaryIO +from minio import Minio +from minio.error import S3Error + + +class MinIOProcessor: + def __init__( + self, + endpoint: str, + access_key: str, + secret_key: str, + secure: bool = False + ): + self._client = Minio( + endpoint, + access_key=access_key, + secret_key=secret_key, + secure=secure + ) + + def list_buckets(self) -> List[str]: + try: + buckets = self._client.list_buckets() + return [bucket.name for bucket in buckets] + except S3Error as e: + raise RuntimeError(f"Failed to list buckets: {e}") + + def bucket_exists(self, bucket_name: str) -> bool: + try: + return self._client.bucket_exists(bucket_name) + except S3Error as e: + raise RuntimeError(f"Failed to check bucket existence: {e}") + + def create_bucket(self, bucket_name: str) -> bool: + if self.bucket_exists(bucket_name): + return False + + try: + self._client.make_bucket(bucket_name) + return True + except S3Error as e: + raise RuntimeError(f"Failed to create bucket '{bucket_name}': {e}") + + def put_object( + self, + bucket_name: str, + object_name: str, + data: BinaryIO, + length: int = -1, + content_type: str = "application/octet-stream" + ) -> str: + if not self.bucket_exists(bucket_name): + self.create_bucket(bucket_name) + + try: + self._client.put_object( + bucket_name, + object_name, + data, + length=length, + content_type=content_type + ) + return object_name + except S3Error as e: + raise RuntimeError(f"Failed to put object '{object_name}' in bucket '{bucket_name}': {e}") + + def get_object(self, bucket_name: str, object_name: str) -> bytes: + if not self.bucket_exists(bucket_name): + raise ValueError(f"Bucket '{bucket_name}' does not exist") + + try: + response = self._client.get_object(bucket_name, object_name) + return response.read() + except S3Error as e: + raise RuntimeError(f"Failed to get object '{object_name}' from bucket '{bucket_name}': {e}") + finally: + response.close() + response.release_conn() + + def delete_object(self, bucket_name: str, object_name: str) -> bool: + if not self.bucket_exists(bucket_name): + return False + + try: + self._client.remove_object(bucket_name, object_name) + return True + except S3Error as e: + raise RuntimeError(f"Failed to delete object '{object_name}' from bucket '{bucket_name}': {e}") + + def list_objects(self, bucket_name: str, prefix: Optional[str] = None) -> List[str]: + if not self.bucket_exists(bucket_name): + raise ValueError(f"Bucket '{bucket_name}' does not exist") + + try: + objects = self._client.list_objects(bucket_name, prefix=prefix) + return [obj.object_name for obj in objects] + except S3Error as e: + raise RuntimeError(f"Failed to list objects in bucket '{bucket_name}': {e}") + +if __name__ == "__main__": + processor = MinIOProcessor( + endpoint=os.getenv("MINIO_ENDPOINT", "localhost:9000"), + access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"), + secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"), + secure=False + ) + + buckets = processor.list_buckets() + processor.create_bucket("new-bucket") + # processor.put_object("new-bucket", "file.pdf", file_data, content_type="application/pdf") + + print(buckets) + # print(processor.get_object("new-bucket", "file.pdf")) \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..61ec42d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,53 @@ +services: + postgres: + image: postgres:16-alpine + container_name: rag_ai_postgres + restart: unless-stopped + environment: + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: ${POSTGRES_DB:-rag_ai_assistant} + ports: + - "${POSTGRES_PORT:-5432}:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - rag_ai_network + + minio: + image: minio/minio:latest + container_name: rag_ai_minio + restart: unless-stopped + environment: + MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin} + MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin} + ports: + - "${MINIO_PORT:-9000}:9000" + - "${MINIO_CONSOLE_PORT:-9001}:9001" + volumes: + - minio_data:/data + command: server /data --console-address ":9001" + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9000/minio/health/live || exit 1"] + interval: 30s + timeout: 20s + retries: 3 + start_period: 40s + networks: + - rag_ai_network + +volumes: + postgres_data: + driver: local + minio_data: + driver: local + +networks: + rag_ai_network: + driver: bridge + diff --git a/init_db.py b/init_db.py new file mode 100644 index 0000000..2465566 --- /dev/null +++ b/init_db.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +""" +Database initialization script. +Run this script to create all database tables. + +Usage: + python init_db.py +""" + +import asyncio +from database.database import init_db, drop_db + + +async def main(): + """Initialize database tables""" + print("=" * 50) + print("Database Initialization Script") + print("=" * 50) + + choice = input("\nWhat would you like to do?\n1. Create tables (init)\n2. Drop and recreate tables (reset)\n3. Exit\n\nChoice: ") + + if choice == "1": + print("\nCreating database tables...") + await init_db() + print("✅ Database initialized successfully!") + + elif choice == "2": + confirm = input("\n⚠️ WARNING: This will DELETE ALL DATA! Are you sure? (yes/no): ") + if confirm.lower() == "yes": + print("\nDropping existing tables...") + await drop_db() + print("Creating new tables...") + await init_db() + print("✅ Database reset successfully!") + else: + print("❌ Operation cancelled") + + elif choice == "3": + print("Exiting...") + + else: + print("❌ Invalid choice") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/logger/logger.py b/logger/logger.py new file mode 100644 index 0000000..0682187 --- /dev/null +++ b/logger/logger.py @@ -0,0 +1,3 @@ +class Loger: + def __init__(self): + pass \ No newline at end of file diff --git a/openrouter_client.py b/openrouter_client.py new file mode 100644 index 0000000..6361f2d --- /dev/null +++ b/openrouter_client.py @@ -0,0 +1,136 @@ +import os +import httpx +import asyncio +import io +from typing import Optional, Dict, Any +from dotenv import load_dotenv +import pdfplumber + +load_dotenv() + +OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") +OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" + + +class OpenRouterClient: + def __init__(self, api_key: Optional[str] = None): + self.api_key = api_key or OPENROUTER_API_KEY + if not self.api_key: + raise ValueError("OPENROUTER_API_KEY not set in environment") + + self.headers = { + "Authorization": f"Bearer {self.api_key}", + "HTTP-Referer": "http://localhost:8000", + "X-Title": "RAG AI Assistant", + } + + async def parse_cv_from_pdf( + self, + pdf_bytes: bytes, + model: str = "qwen/qwen-2.5-72b-instruct", + max_retries: int = 3 + ) -> Dict[str, Any]: + with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: + text = "\n".join([page.extract_text() or "" for page in pdf.pages]) + + if not text.strip(): + raise ValueError("Could not extract text from PDF") + + prompt = f"""Analyze this CV/Resume and extract the following information in JSON format. + +CV Text: +{text} + +Please extract and return ONLY a JSON object with these fields (use "NONE" for missing information): +{{ + "name": "Full name", + "email": "Email address", + "position": "Current or desired job title", + "competencies": "Key competencies and areas of expertise", + "experience": "Work experience summary", + "skills": "Technical and soft skills (comma-separated)", + "country": "Country", + "languages": "Languages spoken (comma-separated)", + "employment_format": "Preferred employment format (remote/office/hybrid)", + "rate": "Salary expectations or rate", + "relocation": "Relocation preferences" +}} + +Important: +- If you can't find an entity, use "NONE" +- Return ONLY the JSON object, no additional text or markdown +""" + + for attempt in range(max_retries): + try: + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + f"{OPENROUTER_BASE_URL}/chat/completions", + headers=self.headers, + json={ + "model": model, + "messages": [ + { + "role": "user", + "content": prompt + } + ], + "temperature": 0.1, + } + ) + + if response.status_code == 429: + if attempt < max_retries - 1: + wait_time = (2 ** attempt) * 5 + await asyncio.sleep(wait_time) + continue + + response.raise_for_status() + + result = response.json() + content = result["choices"][0]["message"]["content"] + + import json + content = content.strip() + if content.startswith("```json"): + content = content[7:] + if content.startswith("```"): + content = content[3:] + if content.endswith("```"): + content = content[:-3] + content = content.strip() + + return json.loads(content) + + except httpx.HTTPStatusError as e: + if e.response.status_code == 429 and attempt < max_retries - 1: + wait_time = (2 ** attempt) * 5 + await asyncio.sleep(wait_time) + continue + raise + + raise Exception("Rate limit exceeded after retries") + + async def chat_completion( + self, + messages: list, + model: str = "qwen/qwen3-8b", + temperature: float = 0.7 + ) -> str: + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + f"{OPENROUTER_BASE_URL}/chat/completions", + headers=self.headers, + json={ + "model": model, + "messages": messages, + "temperature": temperature, + } + ) + response.raise_for_status() + + result = response.json() + return result["choices"][0]["message"]["content"] + + +openrouter_client = OpenRouterClient() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..147bef0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +# FastAPI Framework +fastapi==0.109.0 +uvicorn[standard]==0.27.0 +python-multipart==0.0.6 + +# Pydantic for data validation +pydantic==2.5.3 +pydantic[email]==2.5.3 + +# Database (PostgreSQL with async support) +sqlalchemy==2.0.25 +asyncpg==0.29.0 +psycopg2-binary==2.9.9 +greenlet==3.0.3 + +# Telegram Bot +aiogram==3.3.0 + +# MinIO for file storage +minio==7.2.3 + +# Environment variables +python-dotenv==1.0.0 + +# OpenRouter AI +openai==1.12.0 # Compatible with OpenRouter API + +# PDF processing +PyPDF2==3.0.1 +pdfplumber==0.10.3 + +# Testing (optional but recommended) +pytest==7.4.4 +pytest-asyncio==0.23.3 +httpx==0.26.0 diff --git a/routers/__init__.py b/routers/__init__.py new file mode 100644 index 0000000..873f7bb --- /dev/null +++ b/routers/__init__.py @@ -0,0 +1 @@ +# Routers package diff --git a/routers/chat.py b/routers/chat.py new file mode 100644 index 0000000..96ccd3c --- /dev/null +++ b/routers/chat.py @@ -0,0 +1,156 @@ +from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload +from pydantic import BaseModel +from typing import Optional, List +from datetime import datetime +import io +import os + +from database.database import get_db, ChatMessage as DBChatMessage, ChatFile as DBChatFile +from database.minio_processor import MinIOProcessor + +minio_client = MinIOProcessor( + endpoint=os.getenv("MINIO_ENDPOINT", "localhost:9000"), + access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"), + secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"), + secure=False +) +MINIO_BUCKET = os.getenv("MINIO_BUCKET", "resumes") + +router = APIRouter(prefix="/api/chat", tags=["chat"]) + + +class ChatFileResponse(BaseModel): + id: int + file_name: str + file_type: str + file_url: str + created_at: datetime + + class Config: + from_attributes = True + + +class ChatMessageResponse(BaseModel): + id: int + user_id: int + role: str + content: str + is_favorite: bool + files: List[ChatFileResponse] + created_at: datetime + + class Config: + from_attributes = True + + +@router.post("", response_model=ChatMessageResponse) +async def create_chat_message( + user_id: int = Form(...), + role: str = Form(...), + content: str = Form(...), + files: Optional[List[UploadFile]] = File(None), + db: AsyncSession = Depends(get_db) +): + message = DBChatMessage( + user_id=user_id, + role=role, + content=content, + is_favorite=False + ) + db.add(message) + await db.flush() + + if files: + for file in files: + object_name = f"chat/{user_id}/{message.id}/{file.filename}" + try: + file_content = await file.read() + minio_client.put_object( + bucket_name=MINIO_BUCKET, + object_name=object_name, + data=io.BytesIO(file_content), + length=len(file_content), + content_type=file.content_type or "application/octet-stream" + ) + file_url = f"minio://{MINIO_BUCKET}/{object_name}" + except Exception as e: + await db.rollback() + raise HTTPException(status_code=500, detail=f"Failed to upload file: {str(e)}") + + chat_file = DBChatFile( + message_id=message.id, + file_name=file.filename, + file_type=file.content_type or "application/octet-stream", + file_url=file_url + ) + db.add(chat_file) + + await db.commit() + await db.refresh(message) + + result = await db.execute( + select(DBChatMessage) + .where(DBChatMessage.id == message.id) + .options(selectinload(DBChatMessage.files)) + ) + message = result.scalar_one() + + return message + + +@router.get("", response_model=List[ChatMessageResponse]) +async def get_chat_messages( + user_id: Optional[int] = None, + is_favorite: Optional[bool] = None, + db: AsyncSession = Depends(get_db) +): + query = select(DBChatMessage).options(selectinload(DBChatMessage.files)) + + if user_id is not None: + query = query.where(DBChatMessage.user_id == user_id) + if is_favorite is not None: + query = query.where(DBChatMessage.is_favorite == is_favorite) + + query = query.order_by(DBChatMessage.created_at.desc()) + + result = await db.execute(query) + messages = result.scalars().all() + + return messages + + +@router.get("/{message_id}", response_model=ChatMessageResponse) +async def get_chat_message(message_id: int, db: AsyncSession = Depends(get_db)): + result = await db.execute( + select(DBChatMessage) + .where(DBChatMessage.id == message_id) + .options(selectinload(DBChatMessage.files)) + ) + message = result.scalar_one_or_none() + + if not message: + raise HTTPException(status_code=404, detail="Message not found") + + return message + + +@router.put("/{message_id}/favorite", response_model=ChatMessageResponse) +async def toggle_favorite(message_id: int, db: AsyncSession = Depends(get_db)): + result = await db.execute( + select(DBChatMessage) + .where(DBChatMessage.id == message_id) + .options(selectinload(DBChatMessage.files)) + ) + message = result.scalar_one_or_none() + + if not message: + raise HTTPException(status_code=404, detail="Message not found") + + message.is_favorite = not message.is_favorite + await db.commit() + await db.refresh(message) + + return message diff --git a/routers/profile.py b/routers/profile.py new file mode 100644 index 0000000..b599cae --- /dev/null +++ b/routers/profile.py @@ -0,0 +1,179 @@ +from fastapi import APIRouter, File, UploadFile, Form, HTTPException, Depends +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from pydantic import BaseModel, EmailStr +from typing import Optional +import io +import os + +from database.database import get_db, Profile as DBProfile +from database.minio_processor import MinIOProcessor +from openrouter_client import openrouter_client + +minio_client = MinIOProcessor( + endpoint=os.getenv("MINIO_ENDPOINT", "localhost:9000"), + access_key=os.getenv("MINIO_ACCESS_KEY", "minioadmin"), + secret_key=os.getenv("MINIO_SECRET_KEY", "minioadmin"), + secure=False +) +MINIO_BUCKET = os.getenv("MINIO_BUCKET", "resumes") + +router = APIRouter(prefix="/api/profile", tags=["profile"]) + + +class ProfileResponse(BaseModel): + id: int + name: str + email: str + position: str + competencies: Optional[str] + experience: Optional[str] + skills: Optional[str] + country: Optional[str] + languages: Optional[str] + employment_format: Optional[str] + rate: Optional[str] + relocation: Optional[str] + cv_url: Optional[str] + + class Config: + from_attributes = True + + +@router.post("/upload-cv", response_model=ProfileResponse) +async def upload_and_parse_cv( + cv: UploadFile = File(...), + db: AsyncSession = Depends(get_db) +): + if cv.content_type != "application/pdf": + raise HTTPException(status_code=400, detail="CV must be a PDF file") + + pdf_content = await cv.read() + + try: + parsed_data = await openrouter_client.parse_cv_from_pdf(pdf_content) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error parsing CV with AI: {str(e)}") + + if not parsed_data.get("email"): + raise HTTPException(status_code=400, detail="Could not extract email from CV") + + email = parsed_data["email"] + + result = await db.execute(select(DBProfile).where(DBProfile.email == email)) + existing_profile = result.scalar_one_or_none() + + object_name = f"cv/{email.replace('@', '_')}_{cv.filename}" + try: + await cv.seek(0) + cv_file_data = await cv.read() + await cv.seek(0) + + minio_client.put_object( + bucket_name=MINIO_BUCKET, + object_name=object_name, + data=io.BytesIO(cv_file_data), + length=len(cv_file_data), + content_type="application/pdf" + ) + cv_url = f"minio://{MINIO_BUCKET}/{object_name}" + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to upload CV to storage: {str(e)}") + + if existing_profile: + existing_profile.name = parsed_data.get("name") or existing_profile.name + existing_profile.position = parsed_data.get("position") or existing_profile.position + existing_profile.competencies = parsed_data.get("competencies") + existing_profile.experience = parsed_data.get("experience") + existing_profile.skills = parsed_data.get("skills") + existing_profile.country = parsed_data.get("country") + existing_profile.languages = parsed_data.get("languages") + existing_profile.employment_format = parsed_data.get("employment_format") + existing_profile.rate = parsed_data.get("rate") + existing_profile.relocation = parsed_data.get("relocation") + existing_profile.cv_url = cv_url + profile = existing_profile + else: + profile = DBProfile( + email=email, + name=parsed_data.get("name", "Unknown"), + position=parsed_data.get("position", "Not specified"), + competencies=parsed_data.get("competencies"), + experience=parsed_data.get("experience"), + skills=parsed_data.get("skills"), + country=parsed_data.get("country"), + languages=parsed_data.get("languages"), + employment_format=parsed_data.get("employment_format"), + rate=parsed_data.get("rate"), + relocation=parsed_data.get("relocation"), + cv_url=cv_url + ) + db.add(profile) + + await db.commit() + await db.refresh(profile) + + return profile + + +@router.post("/save", response_model=ProfileResponse) +async def save_profile_manual( + name: str = Form(...), + email: EmailStr = Form(...), + position: str = Form(...), + competencies: Optional[str] = Form(None), + experience: Optional[str] = Form(None), + skills: Optional[str] = Form(None), + country: Optional[str] = Form(None), + languages: Optional[str] = Form(None), + employment_format: Optional[str] = Form(None), + rate: Optional[str] = Form(None), + relocation: Optional[str] = Form(None), + db: AsyncSession = Depends(get_db) +): + result = await db.execute(select(DBProfile).where(DBProfile.email == email)) + existing_profile = result.scalar_one_or_none() + + if existing_profile: + existing_profile.name = name + existing_profile.position = position + existing_profile.competencies = competencies + existing_profile.experience = experience + existing_profile.skills = skills + existing_profile.country = country + existing_profile.languages = languages + existing_profile.employment_format = employment_format + existing_profile.rate = rate + existing_profile.relocation = relocation + profile = existing_profile + else: + profile = DBProfile( + name=name, + email=email, + position=position, + competencies=competencies, + experience=experience, + skills=skills, + country=country, + languages=languages, + employment_format=employment_format, + rate=rate, + relocation=relocation + ) + db.add(profile) + + await db.commit() + await db.refresh(profile) + + return profile + + +@router.get("/{email}", response_model=ProfileResponse) +async def get_profile(email: str, db: AsyncSession = Depends(get_db)): + result = await db.execute(select(DBProfile).where(DBProfile.email == email)) + profile = result.scalar_one_or_none() + + if not profile: + raise HTTPException(status_code=404, detail="Profile not found") + + return profile diff --git a/routers/vacancies.py b/routers/vacancies.py new file mode 100644 index 0000000..2faf0ea --- /dev/null +++ b/routers/vacancies.py @@ -0,0 +1,69 @@ +from fastapi import APIRouter, HTTPException, Depends +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from pydantic import BaseModel +from typing import Optional, List +from datetime import datetime + +from database.database import get_db, Vacancy as DBVacancy + +router = APIRouter(prefix="/api/vacancies", tags=["vacancies"]) + + +class VacancyListItem(BaseModel): + id: int + position: str + salary_min: Optional[int] + salary_max: Optional[int] + salary_currency: Optional[str] + work_arrangement: Optional[str] + key_competencies: Optional[str] + + class Config: + from_attributes = True + + +class ContactItem(BaseModel): + id: Optional[int] + type: str + value: str + + +class VacancyDetail(BaseModel): + id: int + company_name: str + contacts: Optional[List[ContactItem]] + country_name: Optional[str] + created_at: datetime + description: Optional[str] + key_competencies: Optional[str] + locations: Optional[str] + position: str + relocation_supported: Optional[bool] + required_language_codes: Optional[List[str]] + salary_currency: Optional[str] + salary_min: Optional[int] + salary_max: Optional[int] + seniority_level: Optional[str] + work_arrangement: Optional[str] + + class Config: + from_attributes = True + + +@router.get("", response_model=List[VacancyListItem]) +async def get_all_vacancies(db: AsyncSession = Depends(get_db)): + result = await db.execute(select(DBVacancy)) + vacancies = result.scalars().all() + return vacancies + + +@router.get("/{vacancy_id}", response_model=VacancyDetail) +async def get_vacancy_by_id(vacancy_id: int, db: AsyncSession = Depends(get_db)): + result = await db.execute(select(DBVacancy).where(DBVacancy.id == vacancy_id)) + vacancy = result.scalar_one_or_none() + + if not vacancy: + raise HTTPException(status_code=404, detail="Vacancy not found") + + return vacancy diff --git a/tests/authorize_tests.py b/tests/authorize_tests.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_bot.py b/tests/test_bot.py new file mode 100644 index 0000000..b0d1907 --- /dev/null +++ b/tests/test_bot.py @@ -0,0 +1,112 @@ +""" +Test file for bot.py + +To run the bot: + python bot.py + +To test the bot functionality: + python -m pytest tests/test_bot.py -v + or + python tests/test_bot.py +""" + +import pytest +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch +import sys +import os + +# Add parent directory to path to import bot module +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from bot import cmd_start, fake_db + + +class TestBot: + """Test cases for bot functionality""" + + def setup_method(self): + """Clear fake_db before each test""" + fake_db.clear() + + @pytest.mark.asyncio + async def test_start_command_without_token(self): + """Test /start command without token""" + # Create mock message + message = AsyncMock() + message.from_user.id = 12345 + message.from_user.username = "test_user" + message.answer = AsyncMock() + + # Create mock command object + command = MagicMock() + command.args = None + + # Call the handler + await cmd_start(message, command) + + # Verify the response + message.answer.assert_called_once_with( + "Пожалуйста, перейдите по ссылке с сайта для авторизации." + ) + + @pytest.mark.asyncio + async def test_start_command_with_valid_token(self): + """Test /start command with valid token""" + # Setup: add token to fake_db + test_token = "test-token-123" + fake_db[test_token] = {"status": "pending"} + + # Create mock message + message = AsyncMock() + message.from_user.id = 12345 + message.from_user.username = "test_user" + message.answer = AsyncMock() + + # Create mock command object + command = MagicMock() + command.args = test_token + + # Call the handler + await cmd_start(message, command) + + # Verify the response + message.answer.assert_called_once_with( + "Успешная авторизация! Вернитесь на сайт, вас должно автоматически авторизовать." + ) + + # Verify fake_db was updated + assert fake_db[test_token]["status"] == "success" + assert fake_db[test_token]["telegram_id"] == 12345 + assert fake_db[test_token]["username"] == "test_user" + + @pytest.mark.asyncio + async def test_start_command_with_invalid_token(self): + """Test /start command with invalid token""" + # Create mock message + message = AsyncMock() + message.from_user.id = 12345 + message.from_user.username = "test_user" + message.answer = AsyncMock() + + # Create mock command object + command = MagicMock() + command.args = "invalid-token" + + # Call the handler + await cmd_start(message, command) + + # Verify the response + message.answer.assert_called_once_with( + "Ошибка: неверный или истекший токен." + ) + + # Verify fake_db was not updated + assert "invalid-token" not in fake_db + + +if __name__ == "__main__": + """Run tests directly""" + print("Running bot tests...") + pytest.main([__file__, "-v"]) +