fix: remove some requirements

This commit is contained in:
TomOPlomo
2025-08-07 15:33:09 +02:00
parent addb11fa93
commit 8faee3e6e1
3 changed files with 14 additions and 32 deletions

2
.gitignore vendored
View File

@@ -1 +1,3 @@
.env
.DS_Store
output/*

View File

@@ -263,7 +263,7 @@ TEXTE DE LA PAGE {page_num}:
logger.warning(f"⚠️ Page {page_num} is empty")
return []
max_chars = 8000 # todo
max_chars = 8000
user_prompt = user_prompt_template.format(
page_num=page_num,
text=page_text[:max_chars]
@@ -278,8 +278,8 @@ TEXTE DE LA PAGE {page_num}:
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.1,
max_tokens=4000, # todo
temperature=1.0,
max_tokens=4000,
response_format={"type": "json_object"}
)

View File

@@ -1,36 +1,16 @@
# Azure services
# Azure services (required)
azure-ai-formrecognizer==3.3.0
azure-core>=1.29.0
openai>=1.0.0
# PDF processing
pdf2image==1.17.0
Pillow>=10.0.0
pytesseract==0.3.13 # Fallback OCR
pdfplumber==0.10.3 # Enhanced table extraction
# Environment variables
python-dotenv==1.0.0
# PDF processing (required for page counting)
PyPDF2>=3.0.1
# CLI and utilities
# CLI interface
click==8.1.7
pathlib2==2.3.7.post1
# Data handling
numpy>=1.24.0
python-dateutil>=2.8.2
# For better logging
colorlog>=6.7.0
# Development tools (optional)
pytest>=7.4.0
black>=23.0.0
flake8>=6.0.0
# Type hints
typing-extensions>=4.0.0
dataclasses>=0.6
# For text processing
nltk>=3.8.1
textstat>=0.7.3
python-dotenv==1.0.0
# Python built-in modules extensions (if needed for older Python versions)
typing-extensions>=4.0.0 # Only if Python < 3.8