From eb68843e2c0f27fa938e9fb808fa4c4ee0bd8f7b Mon Sep 17 00:00:00 2001 From: liuyuanchuang Date: Thu, 5 Feb 2026 21:26:23 +0800 Subject: [PATCH] feat: update model name --- app/services/ocr_service.py | 32 +++++-------------- test_vllm_connection.py | 62 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 24 deletions(-) create mode 100644 test_vllm_connection.py diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 7502da6..7b928ea 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -527,7 +527,7 @@ class MineruOCRService(OCRServiceBase): messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}] response = self.openai_client.chat.completions.create( - model="PaddlePaddle/PaddleOCR-VL", + model="PaddleOCR-VL-0.9B", # Use exact model name from vLLM server messages=messages, temperature=0.0, ) @@ -553,31 +553,15 @@ class MineruOCRService(OCRServiceBase): if not image_pattern.search(markdown_content): return markdown_content - print(f"[DEBUG] Found image reference in markdown, triggering PaddleOCR-VL recognition") + formula_text = self._recognize_formula_with_paddleocr_vl(original_image) - try: - # For now, use the entire image for formula recognition - # TODO: Extract specific regions if image paths contain coordinates - formula_text = self._recognize_formula_with_paddleocr_vl(original_image) + if formula_text.startswith("\[") or formula_text.startswith("\("): + formula_text = formula_text.replace("\[", "$$").replace("\(", "$$") + formula_text = formula_text.replace("\]", "$$").replace("\)", "$$") + else: + formula_text = f"$${formula_text}$$" - print(f"[DEBUG] PaddleOCR-VL recognized formula: {formula_text[:100] if formula_text else 'Empty'}...") - - # Replace image references with recognized formulas - # Wrap in display math delimiters if not already wrapped - if formula_text and not formula_text.startswith("$$"): - formula_text = f"$${formula_text}$$" - - markdown_content = image_pattern.sub(formula_text, markdown_content) - print(f"[DEBUG] Formula recognition successful, updated markdown") - - except Exception as e: - # If formula recognition fails, keep original content and log error - import traceback - - print(f"[ERROR] Formula recognition failed: {e}") - print(f"[ERROR] Traceback: {traceback.format_exc()}") - - return markdown_content + return formula_text def recognize(self, image: np.ndarray) -> dict: """Recognize content using local file_parse API. diff --git a/test_vllm_connection.py b/test_vllm_connection.py new file mode 100644 index 0000000..8ac9035 --- /dev/null +++ b/test_vllm_connection.py @@ -0,0 +1,62 @@ +"""Quick test to verify PaddleOCR-VL connection.""" + +from openai import OpenAI +import base64 +import cv2 +import numpy as np + +# Create test image +test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255 +cv2.putText(test_image, "x^2 = 4", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) + +# Encode to base64 +success, encoded_image = cv2.imencode(".png", test_image) +if not success: + print("Failed to encode image") + exit(1) + +image_base64 = base64.b64encode(encoded_image.tobytes()).decode("utf-8") +image_url = f"data:image/png;base64,{image_base64}" + +# Test connection +client = OpenAI( + api_key="EMPTY", + base_url="http://100.115.184.74:8001/v1", + timeout=3600 +) + +print("Testing PaddleOCR-VL connection...") +print(f"Server: http://100.115.184.74:8001/v1") +print(f"Model: PaddleOCR-VL-0.9B") +print("-" * 60) + +try: + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": image_url} + }, + { + "type": "text", + "text": "Formula Recognition:" + } + ] + } + ] + + response = client.chat.completions.create( + model="PaddleOCR-VL-0.9B", + messages=messages, + temperature=0.0, + ) + + print("✅ SUCCESS!") + print(f"Response: {response.choices[0].message.content}") + +except Exception as e: + print(f"❌ FAILED: {e}") + import traceback + traceback.print_exc()