Compare commits
3 Commits
11e9ed780d
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39e72a5743 | ||
| aee1a1bf3b | |||
| ff82021467 |
@@ -1,5 +1,7 @@
|
||||
"""Format conversion endpoints."""
|
||||
|
||||
from urllib.parse import quote
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import Response
|
||||
|
||||
@@ -34,10 +36,11 @@ async def convert_markdown_to_docx(
|
||||
request.filename,
|
||||
len(docx_bytes),
|
||||
)
|
||||
encoded_name = quote(f"{request.filename}.docx")
|
||||
return Response(
|
||||
content=docx_bytes,
|
||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
headers={"Content-Disposition": f'attachment; filename="{request.filename}.docx"'},
|
||||
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_name}"},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("DOCX conversion failed, filename=%s: %s", request.filename, e)
|
||||
|
||||
@@ -50,9 +50,7 @@ class Settings(BaseSettings):
|
||||
max_tokens: int = 4096
|
||||
|
||||
# Model Paths
|
||||
pp_doclayout_model_dir: str | None = (
|
||||
"/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
||||
)
|
||||
pp_doclayout_model_dir: str | None = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3"
|
||||
|
||||
# Image Processing
|
||||
max_image_size_mb: int = 10
|
||||
|
||||
@@ -265,7 +265,7 @@ class GLMResultFormatter:
|
||||
# Formula wrapping
|
||||
if label == "formula":
|
||||
content = content.strip()
|
||||
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)")]:
|
||||
for s, e in [("$$", "$$"), (r"\[", r"\]"), (r"\(", r"\)"), ("$", "$")]:
|
||||
if content.startswith(s):
|
||||
content = content[len(s) :].strip()
|
||||
if content.endswith(e):
|
||||
|
||||
@@ -148,7 +148,7 @@ class LayoutDetector:
|
||||
)
|
||||
)
|
||||
|
||||
mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions)
|
||||
mixed_recognition = any(region.type == "text" and region.score > 0.85 for region in regions)
|
||||
|
||||
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)
|
||||
|
||||
|
||||
@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):
|
||||
|
||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||
_TASK_PROMPTS: dict[str, str] = {
|
||||
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
|
||||
"text": "Text Recognition. If the content is a formula, please output display latex code, else output text",
|
||||
"formula": "Formula Recognition:",
|
||||
"table": "Table Recognition:",
|
||||
}
|
||||
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
|
||||
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please output display latex code, else output text"
|
||||
|
||||
|
||||
class GLMOCREndToEndService(OCRServiceBase):
|
||||
@@ -868,13 +868,14 @@ class GLMOCREndToEndService(OCRServiceBase):
|
||||
"""
|
||||
# 1. Layout detection
|
||||
img_h, img_w = image.shape[:2]
|
||||
layout_info = self.layout_detector.detect(image)
|
||||
padded_image = self.image_processor.add_padding(image)
|
||||
layout_info = self.layout_detector.detect(padded_image)
|
||||
|
||||
# Sort regions in reading order: top-to-bottom, left-to-right
|
||||
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
||||
|
||||
# 3. OCR: per-region (parallel) or full-image fallback
|
||||
if not layout_info.regions:
|
||||
if not layout_info.regions or (len(layout_info.regions) == 1 and not layout_info.MixedRecognition):
|
||||
# No layout detected → assume it's a formula, use formula recognition
|
||||
logger.info("No layout regions detected, treating image as formula")
|
||||
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
|
||||
@@ -890,7 +891,7 @@ class GLMOCREndToEndService(OCRServiceBase):
|
||||
if region.type == "figure":
|
||||
continue
|
||||
x1, y1, x2, y2 = (int(c) for c in region.bbox)
|
||||
cropped = image[y1:y2, x1:x2]
|
||||
cropped = padded_image[y1:y2, x1:x2]
|
||||
if cropped.size == 0 or cropped.shape[0] < 10 or cropped.shape[1] < 10:
|
||||
logger.warning(
|
||||
"Skipping region idx=%d (label=%s): crop too small %s",
|
||||
|
||||
Reference in New Issue
Block a user