fix: remove some requirements

This commit is contained in:
TomOPlomo
2025-08-07 15:33:09 +02:00
parent addb11fa93
commit 8faee3e6e1
3 changed files with 14 additions and 32 deletions

4
.gitignore vendored
View File

@@ -1 +1,3 @@
.env .env
.DS_Store
output/*

View File

@@ -263,7 +263,7 @@ TEXTE DE LA PAGE {page_num}:
logger.warning(f"⚠️ Page {page_num} is empty") logger.warning(f"⚠️ Page {page_num} is empty")
return [] return []
max_chars = 8000 # todo max_chars = 8000
user_prompt = user_prompt_template.format( user_prompt = user_prompt_template.format(
page_num=page_num, page_num=page_num,
text=page_text[:max_chars] text=page_text[:max_chars]
@@ -278,8 +278,8 @@ TEXTE DE LA PAGE {page_num}:
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt} {"role": "user", "content": user_prompt}
], ],
temperature=0.1, temperature=1.0,
max_tokens=4000, # todo max_tokens=4000,
response_format={"type": "json_object"} response_format={"type": "json_object"}
) )

View File

@@ -1,36 +1,16 @@
# Azure services # Azure services (required)
azure-ai-formrecognizer==3.3.0 azure-ai-formrecognizer==3.3.0
azure-core>=1.29.0 azure-core>=1.29.0
openai>=1.0.0 openai>=1.0.0
# PDF processing # Environment variables
pdf2image==1.17.0 python-dotenv==1.0.0
Pillow>=10.0.0
pytesseract==0.3.13 # Fallback OCR # PDF processing (required for page counting)
pdfplumber==0.10.3 # Enhanced table extraction
PyPDF2>=3.0.1 PyPDF2>=3.0.1
# CLI and utilities # CLI interface
click==8.1.7 click==8.1.7
pathlib2==2.3.7.post1
# Data handling # Python built-in modules extensions (if needed for older Python versions)
numpy>=1.24.0 typing-extensions>=4.0.0 # Only if Python < 3.8
python-dateutil>=2.8.2
# For better logging
colorlog>=6.7.0
# Development tools (optional)
pytest>=7.4.0
black>=23.0.0
flake8>=6.0.0
# Type hints
typing-extensions>=4.0.0
dataclasses>=0.6
# For text processing
nltk>=3.8.1
textstat>=0.7.3
python-dotenv==1.0.0