99 lines
3.2 KiB
Python
99 lines
3.2 KiB
Python
|
|
import numpy as np
|
||
|
|
import pytest
|
||
|
|
from fastapi import FastAPI
|
||
|
|
from fastapi.testclient import TestClient
|
||
|
|
|
||
|
|
from app.api.v1.endpoints.image import router
|
||
|
|
from app.core.dependencies import get_glmocr_endtoend_service, get_image_processor
|
||
|
|
|
||
|
|
|
||
|
|
class _FakeImageProcessor:
|
||
|
|
def preprocess(self, image_url=None, image_base64=None):
|
||
|
|
return np.zeros((8, 8, 3), dtype=np.uint8)
|
||
|
|
|
||
|
|
|
||
|
|
class _FakeOCRService:
|
||
|
|
def __init__(self, result=None, error=None):
|
||
|
|
self._result = result or {"markdown": "md", "latex": "tex", "mathml": "mml", "mml": "xml"}
|
||
|
|
self._error = error
|
||
|
|
|
||
|
|
def recognize(self, image):
|
||
|
|
if self._error:
|
||
|
|
raise self._error
|
||
|
|
return self._result
|
||
|
|
|
||
|
|
|
||
|
|
def _build_client(image_processor=None, ocr_service=None):
|
||
|
|
app = FastAPI()
|
||
|
|
app.include_router(router)
|
||
|
|
app.dependency_overrides[get_image_processor] = lambda: image_processor or _FakeImageProcessor()
|
||
|
|
app.dependency_overrides[get_glmocr_endtoend_service] = lambda: ocr_service or _FakeOCRService()
|
||
|
|
return TestClient(app)
|
||
|
|
|
||
|
|
|
||
|
|
def test_image_endpoint_requires_exactly_one_of_image_url_or_image_base64():
|
||
|
|
client = _build_client()
|
||
|
|
|
||
|
|
missing = client.post("/ocr", json={})
|
||
|
|
both = client.post("/ocr", json={"image_url": "https://example.com/a.png", "image_base64": "abc"})
|
||
|
|
|
||
|
|
assert missing.status_code == 422
|
||
|
|
assert both.status_code == 422
|
||
|
|
|
||
|
|
|
||
|
|
def test_image_endpoint_returns_503_for_runtime_error():
|
||
|
|
client = _build_client(ocr_service=_FakeOCRService(error=RuntimeError("backend unavailable")))
|
||
|
|
|
||
|
|
response = client.post("/ocr", json={"image_url": "https://example.com/a.png"})
|
||
|
|
|
||
|
|
assert response.status_code == 503
|
||
|
|
assert response.json()["detail"] == "backend unavailable"
|
||
|
|
|
||
|
|
|
||
|
|
def test_image_endpoint_returns_500_for_unexpected_error():
|
||
|
|
client = _build_client(ocr_service=_FakeOCRService(error=ValueError("boom")))
|
||
|
|
|
||
|
|
response = client.post("/ocr", json={"image_url": "https://example.com/a.png"})
|
||
|
|
|
||
|
|
assert response.status_code == 500
|
||
|
|
assert response.json()["detail"] == "Internal server error"
|
||
|
|
|
||
|
|
|
||
|
|
def test_image_endpoint_returns_ocr_payload():
|
||
|
|
client = _build_client()
|
||
|
|
|
||
|
|
response = client.post("/ocr", json={"image_base64": "ZmFrZQ=="})
|
||
|
|
|
||
|
|
assert response.status_code == 200
|
||
|
|
assert response.json() == {
|
||
|
|
"latex": "tex",
|
||
|
|
"markdown": "md",
|
||
|
|
"mathml": "mml",
|
||
|
|
"mml": "xml",
|
||
|
|
"layout_info": {"regions": [], "MixedRecognition": False},
|
||
|
|
"recognition_mode": "",
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def test_image_endpoint_real_e2e_with_env_services():
|
||
|
|
from app.main import app
|
||
|
|
|
||
|
|
image_url = (
|
||
|
|
"https://static.texpixel.com/formula/012dab3e-fb31-4ecd-90fc-6957458ee309.png"
|
||
|
|
"?Expires=1773049821&OSSAccessKeyId=TMP.3KnrJUz7aXHoU9rLTAih4MAyPGd9zyGRHiqg9AyH6TY6NKtzqT2yr4qo7Vwf8fMRFCBrWXiCFrbBwC3vn7U6mspV2NeU1K"
|
||
|
|
"&Signature=oynhP0OLIgFI0Sv3z2CWeHPT2Ck%3D"
|
||
|
|
)
|
||
|
|
|
||
|
|
with TestClient(app) as client:
|
||
|
|
response = client.post(
|
||
|
|
"/doc_process/v1/image/ocr",
|
||
|
|
json={"image_url": image_url},
|
||
|
|
headers={"x-request-id": "test-e2e"},
|
||
|
|
)
|
||
|
|
|
||
|
|
assert response.status_code == 200, response.text
|
||
|
|
payload = response.json()
|
||
|
|
assert isinstance(payload["markdown"], str)
|
||
|
|
assert payload["markdown"].strip()
|
||
|
|
assert set(payload) >= {"markdown", "latex", "mathml", "mml"}
|