fix: remove some requirements
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1 +1,3 @@
|
|||||||
.env
|
.env
|
||||||
|
.DS_Store
|
||||||
|
output/*
|
||||||
@@ -263,7 +263,7 @@ TEXTE DE LA PAGE {page_num}:
|
|||||||
logger.warning(f"⚠️ Page {page_num} is empty")
|
logger.warning(f"⚠️ Page {page_num} is empty")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
max_chars = 8000 # todo
|
max_chars = 8000
|
||||||
user_prompt = user_prompt_template.format(
|
user_prompt = user_prompt_template.format(
|
||||||
page_num=page_num,
|
page_num=page_num,
|
||||||
text=page_text[:max_chars]
|
text=page_text[:max_chars]
|
||||||
@@ -278,8 +278,8 @@ TEXTE DE LA PAGE {page_num}:
|
|||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
{"role": "user", "content": user_prompt}
|
{"role": "user", "content": user_prompt}
|
||||||
],
|
],
|
||||||
temperature=0.1,
|
temperature=1.0,
|
||||||
max_tokens=4000, # todo
|
max_tokens=4000,
|
||||||
response_format={"type": "json_object"}
|
response_format={"type": "json_object"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,36 +1,16 @@
|
|||||||
# Azure services
|
# Azure services (required)
|
||||||
azure-ai-formrecognizer==3.3.0
|
azure-ai-formrecognizer==3.3.0
|
||||||
azure-core>=1.29.0
|
azure-core>=1.29.0
|
||||||
openai>=1.0.0
|
openai>=1.0.0
|
||||||
|
|
||||||
# PDF processing
|
# Environment variables
|
||||||
pdf2image==1.17.0
|
python-dotenv==1.0.0
|
||||||
Pillow>=10.0.0
|
|
||||||
pytesseract==0.3.13 # Fallback OCR
|
# PDF processing (required for page counting)
|
||||||
pdfplumber==0.10.3 # Enhanced table extraction
|
|
||||||
PyPDF2>=3.0.1
|
PyPDF2>=3.0.1
|
||||||
|
|
||||||
# CLI and utilities
|
# CLI interface
|
||||||
click==8.1.7
|
click==8.1.7
|
||||||
pathlib2==2.3.7.post1
|
|
||||||
|
|
||||||
# Data handling
|
# Python built-in modules extensions (if needed for older Python versions)
|
||||||
numpy>=1.24.0
|
typing-extensions>=4.0.0 # Only if Python < 3.8
|
||||||
python-dateutil>=2.8.2
|
|
||||||
|
|
||||||
# For better logging
|
|
||||||
colorlog>=6.7.0
|
|
||||||
|
|
||||||
# Development tools (optional)
|
|
||||||
pytest>=7.4.0
|
|
||||||
black>=23.0.0
|
|
||||||
flake8>=6.0.0
|
|
||||||
|
|
||||||
# Type hints
|
|
||||||
typing-extensions>=4.0.0
|
|
||||||
dataclasses>=0.6
|
|
||||||
|
|
||||||
# For text processing
|
|
||||||
nltk>=3.8.1
|
|
||||||
textstat>=0.7.3
|
|
||||||
python-dotenv==1.0.0
|
|
||||||
Reference in New Issue
Block a user