5 Commits

Author SHA1 Message Date
liuyuanchuang
39e72a5743 fix: encode non-ASCII filename in Content-Disposition header
Use RFC 5987 filename*=UTF-8'' percent-encoding to support Chinese and
other Unicode characters in DOCX download filenames.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-13 17:41:18 +08:00
aee1a1bf3b fix: single dollar sysmpol 2026-03-12 23:20:14 +08:00
ff82021467 optimize: formula is recognize text 2026-03-12 22:30:27 +08:00
liuyuanchuang
11e9ed780d Merge branch 'main' of https://code.texpixel.com/YogeLiu/doc_processer 2026-03-12 12:41:43 +08:00
liuyuanchuang
d1050acbdc fix: looger path 2026-03-12 12:41:26 +08:00
6 changed files with 15 additions and 14 deletions

View File

@@ -1,15 +1,16 @@
"""Format conversion endpoints."""
import logging
from urllib.parse import quote
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import Response
from app.core.dependencies import get_converter
from app.core.logging_config import get_logger
from app.schemas.convert import LatexToOmmlRequest, LatexToOmmlResponse, MarkdownToDocxRequest
from app.services.converter import Converter
logger = logging.getLogger(__name__)
logger = get_logger()
router = APIRouter()
@@ -35,10 +36,11 @@ async def convert_markdown_to_docx(
request.filename,
len(docx_bytes),
)
encoded_name = quote(f"{request.filename}.docx")
return Response(
content=docx_bytes,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'},
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
)
except Exception as e:
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)

View File

@@ -50,9 +50,7 @@ class Settings(BaseSettings):
max_tokens: int = 4096
# Model Paths
pp_doclayout_model_dir: str | None = (
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
)
pp_doclayout_model_dir: str | None = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
# Image Processing
max_image_size_mb: int = 10

View File

@@ -141,7 +141,7 @@ _logger: logging.Logger | None = None
def get_logger() -> logging.Logger:
"""Get the global logger instance."""
"""Get the global logger instance, initializing if needed."""
global _logger
if _logger is None:
_logger = setup_logging()

View File

@@ -265,7 +265,7 @@ class GLMResultFormatter:
# Formula wrapping
if label == "formula":
content = content.strip()
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]:
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)"), ("$", "$")]:
if content.startswith(s):
content = content[len(s) :].strip()
if content.endswith(e):

View File

@@ -148,7 +148,7 @@ class LayoutDetector:
)
)
mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions)
mixed_recognition = any(region.type == "text" and region.score > 0.85 for region in regions)
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)

View File

@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):
# Task-specific prompts (from GLM-OCR SDK config.yaml)
_TASK_PROMPTS: dict[str, str] = {
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
"text": "Text Recognition. If the content is a formula, please output display latex code, else output text",
"formula": "Formula Recognition:",
"table": "Table Recognition:",
}
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please output display latex code, else output text"
class GLMOCREndToEndService(OCRServiceBase):
@@ -868,13 +868,14 @@ class GLMOCREndToEndService(OCRServiceBase):
"""
# 1. Layout detection
img_h, img_w = image.shape[:2]
layout_info = self.layout_detector.detect(image)
padded_image = self.image_processor.add_padding(image)
layout_info = self.layout_detector.detect(padded_image)
# Sort regions in reading order: top-to-bottom, left-to-right
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
# 3. OCR: per-region (parallel) or full-image fallback
if not layout_info.regions:
if not layout_info.regions or (len(layout_info.regions) == 1 and not layout_info.MixedRecognition):
# No layout detected → assume it's a formula, use formula recognition
logger.info("No layout regions detected, treating image as formula")
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
@@ -890,7 +891,7 @@ class GLMOCREndToEndService(OCRServiceBase):
if region.type == "figure":
continue
x1, y1, x2, y2 = (int(c) for c in region.bbox)
cropped = image[y1:y2, x1:x2]
cropped = padded_image[y1:y2, x1:x2]
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
logger.warning(
"Skipping region idx=%d (label=%s): crop too small %s",