From 8faee3e6e1601efbc6043cd8c296046c5b40451f Mon Sep 17 00:00:00 2001 From: TomOPlomo Date: Thu, 7 Aug 2025 15:33:09 +0200 Subject: [PATCH] fix: remove some requirements --- .gitignore | 4 +++- extract_single.py | 6 +++--- requirements.txt | 36 ++++++++---------------------------- 3 files changed, 14 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 2eea525..7ca967b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -.env \ No newline at end of file +.env +.DS_Store +output/* \ No newline at end of file diff --git a/extract_single.py b/extract_single.py index 2b16f8c..8598d02 100644 --- a/extract_single.py +++ b/extract_single.py @@ -263,7 +263,7 @@ TEXTE DE LA PAGE {page_num}: logger.warning(f"⚠️ Page {page_num} is empty") return [] - max_chars = 8000 # todo + max_chars = 8000 user_prompt = user_prompt_template.format( page_num=page_num, text=page_text[:max_chars] @@ -278,8 +278,8 @@ TEXTE DE LA PAGE {page_num}: {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], - temperature=0.1, - max_tokens=4000, # todo + temperature=1.0, + max_tokens=4000, response_format={"type": "json_object"} ) diff --git a/requirements.txt b/requirements.txt index 16e98a7..fda054d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,36 +1,16 @@ -# Azure services +# Azure services (required) azure-ai-formrecognizer==3.3.0 azure-core>=1.29.0 openai>=1.0.0 -# PDF processing -pdf2image==1.17.0 -Pillow>=10.0.0 -pytesseract==0.3.13 # Fallback OCR -pdfplumber==0.10.3 # Enhanced table extraction +# Environment variables +python-dotenv==1.0.0 + +# PDF processing (required for page counting) PyPDF2>=3.0.1 -# CLI and utilities +# CLI interface click==8.1.7 -pathlib2==2.3.7.post1 -# Data handling -numpy>=1.24.0 -python-dateutil>=2.8.2 - -# For better logging -colorlog>=6.7.0 - -# Development tools (optional) -pytest>=7.4.0 -black>=23.0.0 -flake8>=6.0.0 - -# Type hints -typing-extensions>=4.0.0 -dataclasses>=0.6 - -# For text processing -nltk>=3.8.1 -textstat>=0.7.3 -python-dotenv==1.0.0 \ No newline at end of file +# Python built-in modules extensions (if needed for older Python versions) +typing-extensions>=4.0.0 # Only if Python < 3.8 \ No newline at end of file