feat: update model name
This commit is contained in:
@@ -527,7 +527,7 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||||
|
|
||||||
response = self.openai_client.chat.completions.create(
|
response = self.openai_client.chat.completions.create(
|
||||||
model="PaddlePaddle/PaddleOCR-VL",
|
model="PaddleOCR-VL-0.9B", # Use exact model name from vLLM server
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@@ -553,31 +553,15 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
if not image_pattern.search(markdown_content):
|
if not image_pattern.search(markdown_content):
|
||||||
return markdown_content
|
return markdown_content
|
||||||
|
|
||||||
print(f"[DEBUG] Found image reference in markdown, triggering PaddleOCR-VL recognition")
|
formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
|
||||||
|
|
||||||
try:
|
if formula_text.startswith("\[") or formula_text.startswith("\("):
|
||||||
# For now, use the entire image for formula recognition
|
formula_text = formula_text.replace("\[", "$$").replace("\(", "$$")
|
||||||
# TODO: Extract specific regions if image paths contain coordinates
|
formula_text = formula_text.replace("\]", "$$").replace("\)", "$$")
|
||||||
formula_text = self._recognize_formula_with_paddleocr_vl(original_image)
|
else:
|
||||||
|
formula_text = f"$${formula_text}$$"
|
||||||
|
|
||||||
print(f"[DEBUG] PaddleOCR-VL recognized formula: {formula_text[:100] if formula_text else 'Empty'}...")
|
return formula_text
|
||||||
|
|
||||||
# Replace image references with recognized formulas
|
|
||||||
# Wrap in display math delimiters if not already wrapped
|
|
||||||
if formula_text and not formula_text.startswith("$$"):
|
|
||||||
formula_text = f"$${formula_text}$$"
|
|
||||||
|
|
||||||
markdown_content = image_pattern.sub(formula_text, markdown_content)
|
|
||||||
print(f"[DEBUG] Formula recognition successful, updated markdown")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# If formula recognition fails, keep original content and log error
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
print(f"[ERROR] Formula recognition failed: {e}")
|
|
||||||
print(f"[ERROR] Traceback: {traceback.format_exc()}")
|
|
||||||
|
|
||||||
return markdown_content
|
|
||||||
|
|
||||||
def recognize(self, image: np.ndarray) -> dict:
|
def recognize(self, image: np.ndarray) -> dict:
|
||||||
"""Recognize content using local file_parse API.
|
"""Recognize content using local file_parse API.
|
||||||
|
|||||||
62
test_vllm_connection.py
Normal file
62
test_vllm_connection.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""Quick test to verify PaddleOCR-VL connection."""
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
import base64
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Create test image
|
||||||
|
test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
|
||||||
|
cv2.putText(test_image, "x^2 = 4", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
|
||||||
|
|
||||||
|
# Encode to base64
|
||||||
|
success, encoded_image = cv2.imencode(".png", test_image)
|
||||||
|
if not success:
|
||||||
|
print("Failed to encode image")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
image_base64 = base64.b64encode(encoded_image.tobytes()).decode("utf-8")
|
||||||
|
image_url = f"data:image/png;base64,{image_base64}"
|
||||||
|
|
||||||
|
# Test connection
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="EMPTY",
|
||||||
|
base_url="http://100.115.184.74:8001/v1",
|
||||||
|
timeout=3600
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Testing PaddleOCR-VL connection...")
|
||||||
|
print(f"Server: http://100.115.184.74:8001/v1")
|
||||||
|
print(f"Model: PaddleOCR-VL-0.9B")
|
||||||
|
print("-" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": image_url}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Formula Recognition:"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="PaddleOCR-VL-0.9B",
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("✅ SUCCESS!")
|
||||||
|
print(f"Response: {response.choices[0].message.content}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ FAILED: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
Reference in New Issue
Block a user