feat add glm-ocr core
This commit is contained in:
@@ -3,9 +3,8 @@
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
import torch
|
||||
from typing import Optional
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
@@ -48,21 +47,25 @@ class Settings(BaseSettings):
|
||||
is_padding: bool = True
|
||||
padding_ratio: float = 0.1
|
||||
|
||||
max_tokens: int = 4096
|
||||
|
||||
# Model Paths
|
||||
pp_doclayout_model_dir: Optional[str] = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
||||
pp_doclayout_model_dir: str | None = (
|
||||
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
||||
)
|
||||
|
||||
# Image Processing
|
||||
max_image_size_mb: int = 10
|
||||
image_padding_ratio: float = 0.1 # 10% on each side = 20% total expansion
|
||||
|
||||
device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # cuda:0 or cpu
|
||||
device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Server Settings
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8053
|
||||
|
||||
# Logging Settings
|
||||
log_dir: Optional[str] = None # Defaults to /app/logs in container or ./logs locally
|
||||
log_dir: str | None = None # Defaults to /app/logs in container or ./logs locally
|
||||
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
|
||||
@property
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from app.services.image_processor import ImageProcessor
|
||||
from app.services.layout_detector import LayoutDetector
|
||||
from app.services.ocr_service import OCRService, MineruOCRService, GLMOCRService
|
||||
from app.services.ocr_service import GLMOCREndToEndService
|
||||
from app.services.converter import Converter
|
||||
from app.core.config import get_settings
|
||||
|
||||
@@ -31,40 +31,17 @@ def get_image_processor() -> ImageProcessor:
|
||||
return ImageProcessor()
|
||||
|
||||
|
||||
def get_ocr_service() -> OCRService:
|
||||
"""Get an OCR service instance."""
|
||||
return OCRService(
|
||||
vl_server_url=get_settings().paddleocr_vl_url,
|
||||
layout_detector=get_layout_detector(),
|
||||
image_processor=get_image_processor(),
|
||||
converter=get_converter(),
|
||||
)
|
||||
|
||||
|
||||
def get_converter() -> Converter:
|
||||
"""Get a DOCX converter instance."""
|
||||
return Converter()
|
||||
|
||||
|
||||
def get_mineru_ocr_service() -> MineruOCRService:
|
||||
"""Get a MinerOCR service instance."""
|
||||
def get_glmocr_endtoend_service() -> GLMOCREndToEndService:
|
||||
"""Get end-to-end GLM-OCR service (layout detection + per-region OCR)."""
|
||||
settings = get_settings()
|
||||
api_url = getattr(settings, "miner_ocr_api_url", "http://127.0.0.1:8000/file_parse")
|
||||
glm_ocr_url = getattr(settings, "glm_ocr_url", "http://localhost:8002/v1")
|
||||
return MineruOCRService(
|
||||
api_url=api_url,
|
||||
converter=get_converter(),
|
||||
image_processor=get_image_processor(),
|
||||
glm_ocr_url=glm_ocr_url,
|
||||
)
|
||||
|
||||
|
||||
def get_glmocr_service() -> GLMOCRService:
|
||||
"""Get a GLM OCR service instance."""
|
||||
settings = get_settings()
|
||||
glm_ocr_url = getattr(settings, "glm_ocr_url", "http://127.0.0.1:8002/v1")
|
||||
return GLMOCRService(
|
||||
vl_server_url=glm_ocr_url,
|
||||
return GLMOCREndToEndService(
|
||||
vl_server_url=settings.glm_ocr_url,
|
||||
image_processor=get_image_processor(),
|
||||
converter=get_converter(),
|
||||
layout_detector=get_layout_detector(),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user