From c93eba2839f4d23b90ee8b86861c6f567623baa6 Mon Sep 17 00:00:00 2001
From: liuyuanchuang <yuanchuang_liu@qingsongchou.com>
Date: Thu, 5 Feb 2026 20:50:04 +0800
Subject: [PATCH] refact: add log

---
 app/core/config.py               |  2 +-
 app/core/dependencies.py         |  5 +--
 app/services/ocr_service.py      | 21 ++++++++++---
 test_paddleocr_vl_integration.py | 53 ++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 7 deletions(-)
 create mode 100644 test_paddleocr_vl_integration.py

diff --git a/app/core/config.py b/app/core/config.py
index ab3e21e..e767b7c 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -22,7 +22,7 @@ class Settings(BaseSettings):
     debug: bool = False
 
     # PaddleOCR-VL Settings
-    paddleocr_vl_url: str = "http://127.0.0.1:8000/v1"
+    paddleocr_vl_url: str = "http://127.0.0.1:8001/v1"
 
     # MinerOCR Settings
     miner_ocr_api_url: str = "http://127.0.0.1:8000/file_parse"
diff --git a/app/core/dependencies.py b/app/core/dependencies.py
index 7e45829..3eb0f52 100644
--- a/app/core/dependencies.py
+++ b/app/core/dependencies.py
@@ -49,10 +49,11 @@ def get_converter() -> Converter:
 def get_mineru_ocr_service() -> MineruOCRService:
     """Get a MinerOCR service instance."""
     settings = get_settings()
-    api_url = getattr(settings, 'miner_ocr_api_url', 'http://127.0.0.1:8000/file_parse')
+    api_url = getattr(settings, "miner_ocr_api_url", "http://127.0.0.1:8000/file_parse")
+    paddleocr_vl_url = getattr(settings, "paddleocr_vl_url", "http://localhost:8001/v1")
     return MineruOCRService(
         api_url=api_url,
         converter=get_converter(),
         image_processor=get_image_processor(),
+        paddleocr_vl_url=paddleocr_vl_url,
     )
-
diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py
index 19641be..7502da6 100644
--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -547,27 +547,35 @@ class MineruOCRService(OCRServiceBase):
         Returns:
             Markdown content with formulas recognized by PaddleOCR-VL.
         """
-        # Pattern to match image references: ![](images/xxx.png)
+        # Pattern to match image references: ![](images/xxx.png) or ![](images/xxx.jpg)
         image_pattern = re.compile(r"!\[\]\(images/[^)]+\)")
 
         if not image_pattern.search(markdown_content):
             return markdown_content
 
+        print(f"[DEBUG] Found image reference in markdown, triggering PaddleOCR-VL recognition")
+
         try:
             # For now, use the entire image for formula recognition
             # TODO: Extract specific regions if image paths contain coordinates
             formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
 
+            print(f"[DEBUG] PaddleOCR-VL recognized formula: {formula_text[:100] if formula_text else 'Empty'}...")
+
             # Replace image references with recognized formulas
             # Wrap in display math delimiters if not already wrapped
-            if not formula_text.startswith("$$"):
+            if formula_text and not formula_text.startswith("$$"):
                 formula_text = f"$${formula_text}$$"
 
             markdown_content = image_pattern.sub(formula_text, markdown_content)
+            print(f"[DEBUG] Formula recognition successful, updated markdown")
 
         except Exception as e:
-            # If formula recognition fails, keep original content
-            print(f"Warning: Formula recognition failed: {e}")
+            # If formula recognition fails, keep original content and log error
+            import traceback
+
+            print(f"[ERROR] Formula recognition failed: {e}")
+            print(f"[ERROR] Traceback: {traceback.format_exc()}")
 
         return markdown_content
 
@@ -622,10 +630,15 @@ class MineruOCRService(OCRServiceBase):
             if "results" in result and "image" in result["results"]:
                 markdown_content = result["results"]["image"].get("md_content", "")
 
+            print(f"[DEBUG] Markdown content from Mineru: {markdown_content[:200]}...")
+
             # Check if markdown contains formula image references
             if "![](images/" in markdown_content:
+                print(f"[DEBUG] Detected image reference, calling PaddleOCR-VL...")
                 # Use PaddleOCR-VL to recognize the formula
                 markdown_content = self._extract_and_recognize_formulas(markdown_content, image)
+            else:
+                print(f"[DEBUG] No image reference found in markdown")
 
             # Apply postprocessing to fix OCR errors
             markdown_content = _postprocess_markdown(markdown_content)
diff --git a/test_paddleocr_vl_integration.py b/test_paddleocr_vl_integration.py
new file mode 100644
index 0000000..7d5eadc
--- /dev/null
+++ b/test_paddleocr_vl_integration.py
@@ -0,0 +1,53 @@
+"""Test script for PaddleOCR-VL integration in MineruOCRService."""
+
+import cv2
+import numpy as np
+from app.services.ocr_service import MineruOCRService
+from app.services.converter import Converter
+from app.services.image_processor import ImageProcessor
+
+def test_paddleocr_vl_integration():
+    """Test that PaddleOCR-VL is called when image references are found."""
+    
+    # Create a simple test image (white background with black text)
+    test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
+    cv2.putText(test_image, "x^2 + y^2 = 1", (50, 50), 
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
+    
+    # Initialize service
+    service = MineruOCRService(
+        api_url="http://127.0.0.1:8000/file_parse",
+        converter=Converter(),
+        image_processor=ImageProcessor(),
+        paddleocr_vl_url="http://localhost:8000/v1"  # Your PaddleOCR-VL server
+    )
+    
+    # Simulate markdown with image reference (this is what Mineru returns)
+    test_markdown = "![](images/af7f211f671f16f57d346e8e17611e68e0f4671bd1ae52ed59013c10eecef589.jpg)"
+    
+    print("Testing formula extraction...")
+    result = service._extract_and_recognize_formulas(test_markdown, test_image)
+    
+    print(f"\nOriginal markdown: {test_markdown}")
+    print(f"Processed markdown: {result}")
+    
+    # Check if the image reference was replaced
+    if "![](images/" in result:
+        print("\n❌ FAILED: Image reference was not replaced")
+    else:
+        print("\n✅ SUCCESS: Image reference was replaced with formula")
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("PaddleOCR-VL Integration Test")
+    print("=" * 60)
+    print("\nMake sure your PaddleOCR-VL server is running at:")
+    print("http://localhost:8000/v1")
+    print("\n" + "=" * 60 + "\n")
+    
+    try:
+        test_paddleocr_vl_integration()
+    except Exception as e:
+        print(f"\n❌ Test failed with error: {e}")
+        import traceback
+        traceback.print_exc()