54 lines
1.9 KiB
Python
54 lines
1.9 KiB
Python
|
|
"""Test script for PaddleOCR-VL integration in MineruOCRService."""
|
||
|
|
|
||
|
|
import cv2
|
||
|
|
import numpy as np
|
||
|
|
from app.services.ocr_service import MineruOCRService
|
||
|
|
from app.services.converter import Converter
|
||
|
|
from app.services.image_processor import ImageProcessor
|
||
|
|
|
||
|
|
def test_paddleocr_vl_integration():
|
||
|
|
"""Test that PaddleOCR-VL is called when image references are found."""
|
||
|
|
|
||
|
|
# Create a simple test image (white background with black text)
|
||
|
|
test_image = np.ones((100, 300, 3), dtype=np.uint8) * 255
|
||
|
|
cv2.putText(test_image, "x^2 + y^2 = 1", (50, 50),
|
||
|
|
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
|
||
|
|
|
||
|
|
# Initialize service
|
||
|
|
service = MineruOCRService(
|
||
|
|
api_url="http://127.0.0.1:8000/file_parse",
|
||
|
|
converter=Converter(),
|
||
|
|
image_processor=ImageProcessor(),
|
||
|
|
paddleocr_vl_url="http://localhost:8000/v1" # Your PaddleOCR-VL server
|
||
|
|
)
|
||
|
|
|
||
|
|
# Simulate markdown with image reference (this is what Mineru returns)
|
||
|
|
test_markdown = ""
|
||
|
|
|
||
|
|
print("Testing formula extraction...")
|
||
|
|
result = service._extract_and_recognize_formulas(test_markdown, test_image)
|
||
|
|
|
||
|
|
print(f"\nOriginal markdown: {test_markdown}")
|
||
|
|
print(f"Processed markdown: {result}")
|
||
|
|
|
||
|
|
# Check if the image reference was replaced
|
||
|
|
if "
|
||
|
|
else:
|
||
|
|
print("\n✅ SUCCESS: Image reference was replaced with formula")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
print("=" * 60)
|
||
|
|
print("PaddleOCR-VL Integration Test")
|
||
|
|
print("=" * 60)
|
||
|
|
print("\nMake sure your PaddleOCR-VL server is running at:")
|
||
|
|
print("http://localhost:8000/v1")
|
||
|
|
print("\n" + "=" * 60 + "\n")
|
||
|
|
|
||
|
|
try:
|
||
|
|
test_paddleocr_vl_integration()
|
||
|
|
except Exception as e:
|
||
|
|
print(f"\n❌ Test failed with error: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|