From eb68843e2c0f27fa938e9fb808fa4c4ee0bd8f7b Mon Sep 17 00:00:00 2001
From: liuyuanchuang <yuanchuang_liu@qingsongchou.com>
Date: Thu, 5 Feb 2026 21:26:23 +0800
Subject: [PATCH] feat: update model name

---
 app/services/ocr_service.py | 32 +++++--------------
 test_vllm_connection.py     | 62 +++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 24 deletions(-)
 create mode 100644 test_vllm_connection.py

diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py
index 7502da6..7b928ea 100644
--- a/app/services/ocr_service.py
+++ b/app/services/ocr_service.py
@@ -527,7 +527,7 @@ class MineruOCRService(OCRServiceBase):
             messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
 
             response = self.openai_client.chat.completions.create(
-                model="PaddlePaddle/PaddleOCR-VL",
+                model="PaddleOCR-VL-0.9B",  # Use exact model name from vLLM server
                 messages=messages,
                 temperature=0.0,
             )
@@ -553,31 +553,15 @@ class MineruOCRService(OCRServiceBase):
         if not image_pattern.search(markdown_content):
             return markdown_content
 
-        print(f"[DEBUG] Found image reference in markdown, triggering PaddleOCR-VL recognition")
+        formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
 
-        try:
-            # For now, use the entire image for formula recognition
-            # TODO: Extract specific regions if image paths contain coordinates
-            formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
+        if formula_text.startswith("\[") or formula_text.startswith("\("):
+            formula_text = formula_text.replace("\[", "$$").replace("\(", "$$")
+            formula_text = formula_text.replace("\]", "$$").replace("\)", "$$")
+        else:
+            formula_text = f"$${formula_text}$$"
 
-            print(f"[DEBUG] PaddleOCR-VL recognized formula: {formula_text[:100] if formula_text else 'Empty'}...")
-
-            # Replace image references with recognized formulas
-            # Wrap in display math delimiters if not already wrapped
-            if formula_text and not formula_text.startswith("$$"):
-                formula_text = f"$${formula_text}$$"
-
-            markdown_content = image_pattern.sub(formula_text, markdown_content)
-            print(f"[DEBUG] Formula recognition successful, updated markdown")
-
-        except Exception as e:
-            # If formula recognition fails, keep original content and log error
-            import traceback
-
-            print(f"[ERROR] Formula recognition failed: {e}")
-            print(f"[ERROR] Traceback: {traceback.format_exc()}")
-
-        return markdown_content
+        return formula_text
 
     def recognize(self, image: np.ndarray) -> dict:
         """Recognize content using local file_parse API.
diff --git a/test_vllm_connection.py b/test_vllm_connection.py
new file mode 100644
index 0000000..8ac9035
--- /dev/null
+++ b/test_vllm_connection.py
@@ -0,0 +1,62 @@
+"""Quick test to verify PaddleOCR-VL connection."""
+
+from openai import OpenAI
+import base64
+import cv2
+import numpy as np
+
+# Create test image
+test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
+cv2.putText(test_image, "x^2 = 4", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
+
+# Encode to base64
+success, encoded_image = cv2.imencode(".png", test_image)
+if not success:
+    print("Failed to encode image")
+    exit(1)
+
+image_base64 = base64.b64encode(encoded_image.tobytes()).decode("utf-8")
+image_url = f"data:image/png;base64,{image_base64}"
+
+# Test connection
+client = OpenAI(
+    api_key="EMPTY",
+    base_url="http://100.115.184.74:8001/v1",
+    timeout=3600
+)
+
+print("Testing PaddleOCR-VL connection...")
+print(f"Server: http://100.115.184.74:8001/v1")
+print(f"Model: PaddleOCR-VL-0.9B")
+print("-" * 60)
+
+try:
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": image_url}
+                },
+                {
+                    "type": "text",
+                    "text": "Formula Recognition:"
+                }
+            ]
+        }
+    ]
+
+    response = client.chat.completions.create(
+        model="PaddleOCR-VL-0.9B",
+        messages=messages,
+        temperature=0.0,
+    )
+    
+    print("✅ SUCCESS!")
+    print(f"Response: {response.choices[0].message.content}")
+    
+except Exception as e:
+    print(f"❌ FAILED: {e}")
+    import traceback
+    traceback.print_exc()