Compare commits
5 Commits
16399f0929
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39e72a5743 | ||
| aee1a1bf3b | |||
| ff82021467 | |||
|
|
11e9ed780d | ||
|
|
d1050acbdc |
@@ -1,15 +1,16 @@
|
|||||||
"""Format conversion endpoints."""
|
"""Format conversion endpoints."""
|
||||||
|
|
||||||
import logging
|
from urllib.parse import quote
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
|
|
||||||
from app.core.dependencies import get_converter
|
from app.core.dependencies import get_converter
|
||||||
|
from app.core.logging_config import get_logger
|
||||||
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
|
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
|
||||||
from app.services.converter import Converter
|
from app.services.converter import Converter
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = get_logger()
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -35,10 +36,11 @@ async def convert_markdown_to_docx(
|
|||||||
request.filename,
|
request.filename,
|
||||||
len(docx_bytes),
|
len(docx_bytes),
|
||||||
)
|
)
|
||||||
|
encoded_name = quote(f"{request.filename}.docx")
|
||||||
return Response(
|
return Response(
|
||||||
content=docx_bytes,
|
content=docx_bytes,
|
||||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'},
|
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
|
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
|
||||||
|
|||||||
@@ -50,9 +50,7 @@ class Settings(BaseSettings):
|
|||||||
max_tokens: int = 4096
|
max_tokens: int = 4096
|
||||||
|
|
||||||
# Model Paths
|
# Model Paths
|
||||||
pp_doclayout_model_dir: str | None = (
|
pp_doclayout_model_dir: str | None = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
||||||
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Image Processing
|
# Image Processing
|
||||||
max_image_size_mb: int = 10
|
max_image_size_mb: int = 10
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ _logger: logging.Logger | None = None
|
|||||||
|
|
||||||
|
|
||||||
def get_logger() -> logging.Logger:
|
def get_logger() -> logging.Logger:
|
||||||
"""Get the global logger instance."""
|
"""Get the global logger instance, initializing if needed."""
|
||||||
global _logger
|
global _logger
|
||||||
if _logger is None:
|
if _logger is None:
|
||||||
_logger = setup_logging()
|
_logger = setup_logging()
|
||||||
|
|||||||
@@ -265,7 +265,7 @@ class GLMResultFormatter:
|
|||||||
# Formula wrapping
|
# Formula wrapping
|
||||||
if label == "formula":
|
if label == "formula":
|
||||||
content = content.strip()
|
content = content.strip()
|
||||||
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]:
|
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)"), ("$", "$")]:
|
||||||
if content.startswith(s):
|
if content.startswith(s):
|
||||||
content = content[len(s) :].strip()
|
content = content[len(s) :].strip()
|
||||||
if content.endswith(e):
|
if content.endswith(e):
|
||||||
|
|||||||
@@ -148,7 +148,7 @@ class LayoutDetector:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions)
|
mixed_recognition = any(region.type == "text" and region.score > 0.85 for region in regions)
|
||||||
|
|
||||||
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)
|
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)
|
||||||
|
|
||||||
|
|||||||
@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
|
|
||||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||||
_TASK_PROMPTS: dict[str, str] = {
|
_TASK_PROMPTS: dict[str, str] = {
|
||||||
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
|
"text": "Text Recognition. If the content is a formula, please output display latex code, else output text",
|
||||||
"formula": "Formula Recognition:",
|
"formula": "Formula Recognition:",
|
||||||
"table": "Table Recognition:",
|
"table": "Table Recognition:",
|
||||||
}
|
}
|
||||||
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
|
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please output display latex code, else output text"
|
||||||
|
|
||||||
|
|
||||||
class GLMOCREndToEndService(OCRServiceBase):
|
class GLMOCREndToEndService(OCRServiceBase):
|
||||||
@@ -868,13 +868,14 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
"""
|
"""
|
||||||
# 1. Layout detection
|
# 1. Layout detection
|
||||||
img_h, img_w = image.shape[:2]
|
img_h, img_w = image.shape[:2]
|
||||||
layout_info = self.layout_detector.detect(image)
|
padded_image = self.image_processor.add_padding(image)
|
||||||
|
layout_info = self.layout_detector.detect(padded_image)
|
||||||
|
|
||||||
# Sort regions in reading order: top-to-bottom, left-to-right
|
# Sort regions in reading order: top-to-bottom, left-to-right
|
||||||
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
||||||
|
|
||||||
# 3. OCR: per-region (parallel) or full-image fallback
|
# 3. OCR: per-region (parallel) or full-image fallback
|
||||||
if not layout_info.regions:
|
if not layout_info.regions or (len(layout_info.regions) == 1 and not layout_info.MixedRecognition):
|
||||||
# No layout detected → assume it's a formula, use formula recognition
|
# No layout detected → assume it's a formula, use formula recognition
|
||||||
logger.info("No layout regions detected, treating image as formula")
|
logger.info("No layout regions detected, treating image as formula")
|
||||||
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
|
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
|
||||||
@@ -890,7 +891,7 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
if region.type == "figure":
|
if region.type == "figure":
|
||||||
continue
|
continue
|
||||||
x1, y1, x2, y2 = (int(c) for c in region.bbox)
|
x1, y1, x2, y2 = (int(c) for c in region.bbox)
|
||||||
cropped = image[y1:y2, x1:x2]
|
cropped = padded_image[y1:y2, x1:x2]
|
||||||
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
|
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Skipping region idx=%d (label=%s): crop too small %s",
|
"Skipping region idx=%d (label=%s): crop too small %s",
|
||||||
|
|||||||
Reference in New Issue
Block a user