fix: update port
This commit is contained in:
@@ -489,7 +489,7 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
api_url: str = "http://127.0.0.1:8000/file_parse",
|
api_url: str = "http://127.0.0.1:8000/file_parse",
|
||||||
image_processor: Optional[ImageProcessor] = None,
|
image_processor: Optional[ImageProcessor] = None,
|
||||||
converter: Optional[Converter] = None,
|
converter: Optional[Converter] = None,
|
||||||
paddleocr_vl_url: str = "http://localhost:8001/v1",
|
paddleocr_vl_url: str = "http://localhost:8000/v1",
|
||||||
):
|
):
|
||||||
"""Initialize Local API service.
|
"""Initialize Local API service.
|
||||||
|
|
||||||
|
|||||||
@@ -1,53 +0,0 @@
|
|||||||
"""Test script for PaddleOCR-VL integration in MineruOCRService."""
|
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
from app.services.ocr_service import MineruOCRService
|
|
||||||
from app.services.converter import Converter
|
|
||||||
from app.services.image_processor import ImageProcessor
|
|
||||||
|
|
||||||
def test_paddleocr_vl_integration():
|
|
||||||
"""Test that PaddleOCR-VL is called when image references are found."""
|
|
||||||
|
|
||||||
# Create a simple test image (white background with black text)
|
|
||||||
test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
|
|
||||||
cv2.putText(test_image, "x^2 + y^2 = 1", (50, 50),
|
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
|
|
||||||
|
|
||||||
# Initialize service
|
|
||||||
service = MineruOCRService(
|
|
||||||
api_url="http://127.0.0.1:8000/file_parse",
|
|
||||||
converter=Converter(),
|
|
||||||
image_processor=ImageProcessor(),
|
|
||||||
paddleocr_vl_url="http://localhost:8000/v1" # Your PaddleOCR-VL server
|
|
||||||
)
|
|
||||||
|
|
||||||
# Simulate markdown with image reference (this is what Mineru returns)
|
|
||||||
test_markdown = ""
|
|
||||||
|
|
||||||
print("Testing formula extraction...")
|
|
||||||
result = service._extract_and_recognize_formulas(test_markdown, test_image)
|
|
||||||
|
|
||||||
print(f"\nOriginal markdown: {test_markdown}")
|
|
||||||
print(f"Processed markdown: {result}")
|
|
||||||
|
|
||||||
# Check if the image reference was replaced
|
|
||||||
if "
|
|
||||||
else:
|
|
||||||
print("\n✅ SUCCESS: Image reference was replaced with formula")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("=" * 60)
|
|
||||||
print("PaddleOCR-VL Integration Test")
|
|
||||||
print("=" * 60)
|
|
||||||
print("\nMake sure your PaddleOCR-VL server is running at:")
|
|
||||||
print("http://localhost:8000/v1")
|
|
||||||
print("\n" + "=" * 60 + "\n")
|
|
||||||
|
|
||||||
try:
|
|
||||||
test_paddleocr_vl_integration()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"\n❌ Test failed with error: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
"""Quick test to verify PaddleOCR-VL connection."""
|
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
import base64
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# Create test image
|
|
||||||
test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
|
|
||||||
cv2.putText(test_image, "x^2 = 4", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
|
|
||||||
|
|
||||||
# Encode to base64
|
|
||||||
success, encoded_image = cv2.imencode(".png", test_image)
|
|
||||||
if not success:
|
|
||||||
print("Failed to encode image")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
image_base64 = base64.b64encode(encoded_image.tobytes()).decode("utf-8")
|
|
||||||
image_url = f"data:image/png;base64,{image_base64}"
|
|
||||||
|
|
||||||
# Test connection
|
|
||||||
client = OpenAI(
|
|
||||||
api_key="EMPTY",
|
|
||||||
base_url="http://100.115.184.74:8001/v1",
|
|
||||||
timeout=3600
|
|
||||||
)
|
|
||||||
|
|
||||||
print("Testing PaddleOCR-VL connection...")
|
|
||||||
print(f"Server: http://100.115.184.74:8001/v1")
|
|
||||||
print(f"Model: PaddleOCR-VL-0.9B")
|
|
||||||
print("-" * 60)
|
|
||||||
|
|
||||||
try:
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": image_url}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "Formula Recognition:"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="PaddleOCR-VL-0.9B",
|
|
||||||
messages=messages,
|
|
||||||
temperature=0.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("✅ SUCCESS!")
|
|
||||||
print(f"Response: {response.choices[0].message.content}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ FAILED: {e}")
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
Reference in New Issue
Block a user