5 Commits

Author SHA1 Message Date
liuyuanchuang
39e72a5743 fix: encode non-ASCII filename in Content-Disposition header
Use RFC 5987 filename*=UTF-8'' percent-encoding to support Chinese and
other Unicode characters in DOCX download filenames.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 17:41:18 +08:00
aee1a1bf3b fix: single dollar sysmpol 2026-03-12 23:20:14 +08:00
ff82021467 optimize: formula is recognize text 2026-03-12 22:30:27 +08:00
liuyuanchuang
11e9ed780d Merge branch 'main' of https://code.texpixel.com/YogeLiu/doc_processer 2026-03-12 12:41:43 +08:00
liuyuanchuang
d1050acbdc fix: looger path 2026-03-12 12:41:26 +08:00
6 changed files with 15 additions and 14 deletions

View File

@@ -1,15 +1,16 @@
"""Format conversion endpoints.""" """Format conversion endpoints."""
import logging from urllib.parse import quote
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import Response from fastapi.responses import Response
from app.core.dependencies import get_converter from app.core.dependencies import get_converter
from app.core.logging_config import get_logger
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
from app.services.converter import Converter from app.services.converter import Converter
logger = logging.getLogger(__name__) logger = get_logger()
router = APIRouter() router = APIRouter()
@@ -35,10 +36,11 @@ async def convert_markdown_to_docx(
request.filename, request.filename,
len(docx_bytes), len(docx_bytes),
) )
encoded_name = quote(f"{request.filename}.docx")
return Response( return Response(
content=docx_bytes, content=docx_bytes,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'}, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
) )
except Exception as e: except Exception as e:
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e) logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)

View File

@@ -50,9 +50,7 @@ class Settings(BaseSettings):
max_tokens: int = 4096 max_tokens: int = 4096
# Model Paths # Model Paths
pp_doclayout_model_dir: str | None = ( pp_doclayout_model_dir: str | None = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
)
# Image Processing # Image Processing
max_image_size_mb: int = 10 max_image_size_mb: int = 10

View File

@@ -141,7 +141,7 @@ _logger: logging.Logger | None = None
def get_logger() -> logging.Logger: def get_logger() -> logging.Logger:
"""Get the global logger instance.""" """Get the global logger instance, initializing if needed."""
global _logger global _logger
if _logger is None: if _logger is None:
_logger = setup_logging() _logger = setup_logging()

View File

@@ -265,7 +265,7 @@ class GLMResultFormatter:
# Formula wrapping # Formula wrapping
if label == "formula": if label == "formula":
content = content.strip() content = content.strip()
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]: for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)"), ("$", "$")]:
if content.startswith(s): if content.startswith(s):
content = content[len(s) :].strip() content = content[len(s) :].strip()
if content.endswith(e): if content.endswith(e):

View File

@@ -148,7 +148,7 @@ class LayoutDetector:
) )
) )
mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions) mixed_recognition = any(region.type == "text" and region.score > 0.85 for region in regions)
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition) return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)

View File

@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):
# Task-specific prompts (from GLM-OCR SDK config.yaml) # Task-specific prompts (from GLM-OCR SDK config.yaml)
_TASK_PROMPTS: dict[str, str] = { _TASK_PROMPTS: dict[str, str] = {
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text", "text": "Text Recognition. If the content is a formula, please output display latex code, else output text",
"formula": "Formula Recognition:", "formula": "Formula Recognition:",
"table": "Table Recognition:", "table": "Table Recognition:",
} }
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text" _DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please output display latex code, else output text"
class GLMOCREndToEndService(OCRServiceBase): class GLMOCREndToEndService(OCRServiceBase):
@@ -868,13 +868,14 @@ class GLMOCREndToEndService(OCRServiceBase):
""" """
# 1. Layout detection # 1. Layout detection
img_h, img_w = image.shape[:2] img_h, img_w = image.shape[:2]
layout_info = self.layout_detector.detect(image) padded_image = self.image_processor.add_padding(image)
layout_info = self.layout_detector.detect(padded_image)
# Sort regions in reading order: top-to-bottom, left-to-right # Sort regions in reading order: top-to-bottom, left-to-right
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0])) layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
# 3. OCR: per-region (parallel) or full-image fallback # 3. OCR: per-region (parallel) or full-image fallback
if not layout_info.regions: if not layout_info.regions or (len(layout_info.regions) == 1 and not layout_info.MixedRecognition):
# No layout detected → assume it's a formula, use formula recognition # No layout detected → assume it's a formula, use formula recognition
logger.info("No layout regions detected, treating image as formula") logger.info("No layout regions detected, treating image as formula")
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"]) raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
@@ -890,7 +891,7 @@ class GLMOCREndToEndService(OCRServiceBase):
if region.type == "figure": if region.type == "figure":
continue continue
x1, y1, x2, y2 = (int(c) for c in region.bbox) x1, y1, x2, y2 = (int(c) for c in region.bbox)
cropped = image[y1:y2, x1:x2] cropped = padded_image[y1:y2, x1:x2]
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10: if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
logger.warning( logger.warning(
"Skipping region idx=%d (label=%s): crop too small %s", "Skipping region idx=%d (label=%s): crop too small %s",