diff --git a/.gitignore b/.gitignore index f8ab18c..e49f677 100644 --- a/.gitignore +++ b/.gitignore @@ -70,4 +70,4 @@ htmlcov/ # uv uv.lock -model/* +model/ diff --git a/app/api/v1/endpoints/convert.py b/app/api/v1/endpoints/convert.py index 0ffa29a..256c085 100644 --- a/app/api/v1/endpoints/convert.py +++ b/app/api/v1/endpoints/convert.py @@ -34,4 +34,3 @@ async def convert_markdown_to_docx( media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", headers={"Content-Disposition": f'attachment; filename="{filename}"'}, ) - diff --git a/app/core/config.py b/app/core/config.py index 0a57ad1..af18a14 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -4,6 +4,7 @@ from functools import lru_cache from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict +import torch class Settings(BaseSettings): @@ -23,13 +24,15 @@ class Settings(BaseSettings): paddleocr_vl_url: str = "http://localhost:8080/v1" # Model Paths - doclayout_model_path: str = "app/model/DocLayout" - pp_doclayout_model_dir: str = "app/model/PP-DocLayout" + doclayout_model_path: str = "app/model/DocLayout/best.pt" + pp_doclayout_model_dir: str = "app/model/PP-DocLayout/PP-DocLayoutV2" # Image Processing max_image_size_mb: int = 10 image_padding_ratio: float = 0.15 # 15% on each side = 30% total expansion + device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # cuda:0 or cpu + # Server Settings host: str = "0.0.0.0" port: int = 8053 @@ -49,4 +52,3 @@ class Settings(BaseSettings): def get_settings() -> Settings: """Get cached settings instance.""" return Settings() - diff --git a/app/services/image_processor.py b/app/services/image_processor.py index e9c0e26..34a6419 100644 --- a/app/services/image_processor.py +++ b/app/services/image_processor.py @@ -136,4 +136,3 @@ class ImageProcessor: buffer.seek(0) return base64.b64encode(buffer.getvalue()).decode("utf-8") - diff --git a/app/services/layout_detector.py b/app/services/layout_detector.py index 03bb020..b7ed407 100644 --- a/app/services/layout_detector.py +++ b/app/services/layout_detector.py @@ -3,6 +3,9 @@ import numpy as np from app.schemas.image import LayoutInfo, LayoutRegion +from app.core.config import get_settings + +settings = get_settings() class LayoutDetector: @@ -73,7 +76,7 @@ class LayoutDetector: image, imgsz=image_size, conf=self.confidence_threshold, - device="cuda:0", + device=settings.device, ) regions: list[LayoutRegion] = []