From d86107976a6731e39543d1e71de480d1143f3b3e Mon Sep 17 00:00:00 2001 From: liuyuanchuang Date: Sat, 7 Feb 2026 13:26:57 +0800 Subject: [PATCH] feat: update threshold --- app/services/layout_detector.py | 2 +- app/services/ocr_service.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/app/services/layout_detector.py b/app/services/layout_detector.py index d03ea6e..0cdf75b 100644 --- a/app/services/layout_detector.py +++ b/app/services/layout_detector.py @@ -134,7 +134,7 @@ class LayoutDetector: ) ) - mixed_recognition = any(region.type == "text" and region.score > 0.6 for region in regions) + mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions) return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition) diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 6734695..78e38e6 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -701,11 +701,6 @@ class MineruOCRService(OCRServiceBase): if "results" in result and "image" in result["results"]: markdown_content = result["results"]["image"].get("md_content", "") - # Check if markdown contains formula image references - if "![](images/" in markdown_content: - # Use PaddleOCR-VL to recognize the formula - markdown_content = self._extract_and_recognize_formulas(markdown_content, image) - # Apply postprocessing to fix OCR errors markdown_content = _postprocess_markdown(markdown_content)