import os import httpx import asyncio import io from typing import Optional, Dict, Any from dotenv import load_dotenv import pdfplumber load_dotenv() OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" class OpenRouterClient: def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or OPENROUTER_API_KEY if not self.api_key: raise ValueError("OPENROUTER_API_KEY not set in environment") self.headers = { "Authorization": f"Bearer {self.api_key}", "HTTP-Referer": "http://localhost:8000", "X-Title": "RAG AI Assistant", } async def parse_cv_from_pdf( self, pdf_bytes: bytes, model: str = "qwen/qwen-2.5-72b-instruct", max_retries: int = 3 ) -> Dict[str, Any]: with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: text = "\n".join([page.extract_text() or "" for page in pdf.pages]) if not text.strip(): raise ValueError("Could not extract text from PDF") prompt = f"""Analyze this CV/Resume and extract the following information in JSON format. CV Text: {text} Please extract and return ONLY a JSON object with these fields (use "NONE" for missing information): {{ "name": "Full name", "email": "Email address", "position": "Current or desired job title", "competencies": "Key competencies and areas of expertise", "experience": "Work experience summary", "skills": "Technical and soft skills (comma-separated)", "country": "Country", "languages": "Languages spoken (comma-separated)", "employment_format": "Preferred employment format (remote/office/hybrid)", "rate": "Salary expectations or rate", "relocation": "Relocation preferences" }} Important: - If you can't find an entity, use "NONE" - Return ONLY the JSON object, no additional text or markdown """ for attempt in range(max_retries): try: async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( f"{OPENROUTER_BASE_URL}/chat/completions", headers=self.headers, json={ "model": model, "messages": [ { "role": "user", "content": prompt } ], "temperature": 0.1, } ) if response.status_code == 429: if attempt < max_retries - 1: wait_time = (2 ** attempt) * 5 await asyncio.sleep(wait_time) continue response.raise_for_status() result = response.json() content = result["choices"][0]["message"]["content"] import json content = content.strip() if content.startswith("```json"): content = content[7:] if content.startswith("```"): content = content[3:] if content.endswith("```"): content = content[:-3] content = content.strip() return json.loads(content) except httpx.HTTPStatusError as e: if e.response.status_code == 429 and attempt < max_retries - 1: wait_time = (2 ** attempt) * 5 await asyncio.sleep(wait_time) continue raise raise Exception("Rate limit exceeded after retries") async def chat_completion( self, messages: list, model: str = "qwen/qwen3-8b", temperature: float = 0.7 ) -> str: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.post( f"{OPENROUTER_BASE_URL}/chat/completions", headers=self.headers, json={ "model": model, "messages": messages, "temperature": temperature, } ) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] openrouter_client = OpenRouterClient()