From c93eba2839f4d23b90ee8b86861c6f567623baa6 Mon Sep 17 00:00:00 2001 From: liuyuanchuang Date: Thu, 5 Feb 2026 20:50:04 +0800 Subject: [PATCH] refact: add log --- app/core/config.py | 2 +- app/core/dependencies.py | 5 +-- app/services/ocr_service.py | 21 ++++++++++--- test_paddleocr_vl_integration.py | 53 ++++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 test_paddleocr_vl_integration.py diff --git a/app/core/config.py b/app/core/config.py index ab3e21e..e767b7c 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -22,7 +22,7 @@ class Settings(BaseSettings): debug: bool = False # PaddleOCR-VL Settings - paddleocr_vl_url: str = "http://127.0.0.1:8000/v1" + paddleocr_vl_url: str = "http://127.0.0.1:8001/v1" # MinerOCR Settings miner_ocr_api_url: str = "http://127.0.0.1:8000/file_parse" diff --git a/app/core/dependencies.py b/app/core/dependencies.py index 7e45829..3eb0f52 100644 --- a/app/core/dependencies.py +++ b/app/core/dependencies.py @@ -49,10 +49,11 @@ def get_converter() -> Converter: def get_mineru_ocr_service() -> MineruOCRService: """Get a MinerOCR service instance.""" settings = get_settings() - api_url = getattr(settings, 'miner_ocr_api_url', 'http://127.0.0.1:8000/file_parse') + api_url = getattr(settings, "miner_ocr_api_url", "http://127.0.0.1:8000/file_parse") + paddleocr_vl_url = getattr(settings, "paddleocr_vl_url", "http://localhost:8001/v1") return MineruOCRService( api_url=api_url, converter=get_converter(), image_processor=get_image_processor(), + paddleocr_vl_url=paddleocr_vl_url, ) - diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 19641be..7502da6 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -547,27 +547,35 @@ class MineruOCRService(OCRServiceBase): Returns: Markdown content with formulas recognized by PaddleOCR-VL. """ - # Pattern to match image references: ![](images/xxx.png) + # Pattern to match image references: ![](images/xxx.png) or ![](images/xxx.jpg) image_pattern = re.compile(r"!\[\]\(images/[^)]+\)") if not image_pattern.search(markdown_content): return markdown_content + print(f"[DEBUG] Found image reference in markdown, triggering PaddleOCR-VL recognition") + try: # For now, use the entire image for formula recognition # TODO: Extract specific regions if image paths contain coordinates formula_text = self._recognize_formula_with_paddleocr_vl(original_image) + print(f"[DEBUG] PaddleOCR-VL recognized formula: {formula_text[:100] if formula_text else 'Empty'}...") + # Replace image references with recognized formulas # Wrap in display math delimiters if not already wrapped - if not formula_text.startswith("$$"): + if formula_text and not formula_text.startswith("$$"): formula_text = f"$${formula_text}$$" markdown_content = image_pattern.sub(formula_text, markdown_content) + print(f"[DEBUG] Formula recognition successful, updated markdown") except Exception as e: - # If formula recognition fails, keep original content - print(f"Warning: Formula recognition failed: {e}") + # If formula recognition fails, keep original content and log error + import traceback + + print(f"[ERROR] Formula recognition failed: {e}") + print(f"[ERROR] Traceback: {traceback.format_exc()}") return markdown_content @@ -622,10 +630,15 @@ class MineruOCRService(OCRServiceBase): if "results" in result and "image" in result["results"]: markdown_content = result["results"]["image"].get("md_content", "") + print(f"[DEBUG] Markdown content from Mineru: {markdown_content[:200]}...") + # Check if markdown contains formula image references if "![](images/" in markdown_content: + print(f"[DEBUG] Detected image reference, calling PaddleOCR-VL...") # Use PaddleOCR-VL to recognize the formula markdown_content = self._extract_and_recognize_formulas(markdown_content, image) + else: + print(f"[DEBUG] No image reference found in markdown") # Apply postprocessing to fix OCR errors markdown_content = _postprocess_markdown(markdown_content) diff --git a/test_paddleocr_vl_integration.py b/test_paddleocr_vl_integration.py new file mode 100644 index 0000000..7d5eadc --- /dev/null +++ b/test_paddleocr_vl_integration.py @@ -0,0 +1,53 @@ +"""Test script for PaddleOCR-VL integration in MineruOCRService.""" + +import cv2 +import numpy as np +from app.services.ocr_service import MineruOCRService +from app.services.converter import Converter +from app.services.image_processor import ImageProcessor + +def test_paddleocr_vl_integration(): + """Test that PaddleOCR-VL is called when image references are found.""" + + # Create a simple test image (white background with black text) + test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255 + cv2.putText(test_image, "x^2 + y^2 = 1", (50, 50), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) + + # Initialize service + service = MineruOCRService( + api_url="http://127.0.0.1:8000/file_parse", + converter=Converter(), + image_processor=ImageProcessor(), + paddleocr_vl_url="http://localhost:8000/v1" # Your PaddleOCR-VL server + ) + + # Simulate markdown with image reference (this is what Mineru returns) + test_markdown = "![](images/af7f211f671f16f57d346e8e17611e68e0f4671bd1ae52ed59013c10eecef589.jpg)" + + print("Testing formula extraction...") + result = service._extract_and_recognize_formulas(test_markdown, test_image) + + print(f"\nOriginal markdown: {test_markdown}") + print(f"Processed markdown: {result}") + + # Check if the image reference was replaced + if "![](images/" in result: + print("\n❌ FAILED: Image reference was not replaced") + else: + print("\n✅ SUCCESS: Image reference was replaced with formula") + +if __name__ == "__main__": + print("=" * 60) + print("PaddleOCR-VL Integration Test") + print("=" * 60) + print("\nMake sure your PaddleOCR-VL server is running at:") + print("http://localhost:8000/v1") + print("\n" + "=" * 60 + "\n") + + try: + test_paddleocr_vl_integration() + except Exception as e: + print(f"\n❌ Test failed with error: {e}") + import traceback + traceback.print_exc()