fix: remove some requirements
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1 +1,3 @@
|
||||
.env
|
||||
.DS_Store
|
||||
output/*
|
||||
@@ -263,7 +263,7 @@ TEXTE DE LA PAGE {page_num}:
|
||||
logger.warning(f"⚠️ Page {page_num} is empty")
|
||||
return []
|
||||
|
||||
max_chars = 8000 # todo
|
||||
max_chars = 8000
|
||||
user_prompt = user_prompt_template.format(
|
||||
page_num=page_num,
|
||||
text=page_text[:max_chars]
|
||||
@@ -278,8 +278,8 @@ TEXTE DE LA PAGE {page_num}:
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=4000, # todo
|
||||
temperature=1.0,
|
||||
max_tokens=4000,
|
||||
response_format={"type": "json_object"}
|
||||
)
|
||||
|
||||
|
||||
@@ -1,36 +1,16 @@
|
||||
# Azure services
|
||||
# Azure services (required)
|
||||
azure-ai-formrecognizer==3.3.0
|
||||
azure-core>=1.29.0
|
||||
openai>=1.0.0
|
||||
|
||||
# PDF processing
|
||||
pdf2image==1.17.0
|
||||
Pillow>=10.0.0
|
||||
pytesseract==0.3.13 # Fallback OCR
|
||||
pdfplumber==0.10.3 # Enhanced table extraction
|
||||
# Environment variables
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# PDF processing (required for page counting)
|
||||
PyPDF2>=3.0.1
|
||||
|
||||
# CLI and utilities
|
||||
# CLI interface
|
||||
click==8.1.7
|
||||
pathlib2==2.3.7.post1
|
||||
|
||||
# Data handling
|
||||
numpy>=1.24.0
|
||||
python-dateutil>=2.8.2
|
||||
|
||||
# For better logging
|
||||
colorlog>=6.7.0
|
||||
|
||||
# Development tools (optional)
|
||||
pytest>=7.4.0
|
||||
black>=23.0.0
|
||||
flake8>=6.0.0
|
||||
|
||||
# Type hints
|
||||
typing-extensions>=4.0.0
|
||||
dataclasses>=0.6
|
||||
|
||||
# For text processing
|
||||
nltk>=3.8.1
|
||||
textstat>=0.7.3
|
||||
python-dotenv==1.0.0
|
||||
# Python built-in modules extensions (if needed for older Python versions)
|
||||
typing-extensions>=4.0.0 # Only if Python < 3.8
|
||||
Reference in New Issue
Block a user