diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 4a69a03..0124ba0 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -150,9 +150,7 @@ def _clean_latex_syntax_spaces(expr: str) -> str: # Strategy: remove spaces before \ and between non-command chars, # but preserve the space after \command when followed by a non-\ char cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd - cleaned = re.sub( - r"(? str: + def _recognize_formula_with_paddleocr_vl(self, image: np.ndarray, prompt: str = "Formula Recognition:") -> str: """Recognize formula using PaddleOCR-VL API. Args: @@ -673,9 +669,7 @@ class MineruOCRService(OCRServiceBase): except Exception as e: raise RuntimeError(f"PaddleOCR-VL formula recognition failed: {e}") from e - def _extract_and_recognize_formulas( - self, markdown_content: str, original_image: np.ndarray - ) -> str: + def _extract_and_recognize_formulas(self, markdown_content: str, original_image: np.ndarray) -> str: """Extract image references from markdown and recognize formulas. Args: @@ -757,9 +751,7 @@ class MineruOCRService(OCRServiceBase): markdown_content = result["results"]["image"].get("md_content", "") if "![](images/" in markdown_content: - markdown_content = self._extract_and_recognize_formulas( - markdown_content, original_image - ) + markdown_content = self._extract_and_recognize_formulas(markdown_content, original_image) # Apply postprocessing to fix OCR errors markdown_content = _postprocess_markdown(markdown_content) @@ -789,15 +781,11 @@ class MineruOCRService(OCRServiceBase): # Task-specific prompts (from GLM-OCR SDK config.yaml) _TASK_PROMPTS: dict[str, str] = { - "text": "Text Recognition:", + "text": "Text Recognition. If the content is a formula, please ouput latex code, else output text", "formula": "Formula Recognition:", "table": "Table Recognition:", } -_DEFAULT_PROMPT = ( - "Recognize the text in the image and output in Markdown format. " - "Preserve the original layout (headings/paragraphs/tables/formulas). " - "Do not fabricate content that does not exist in the image." -) +_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text" class GLMOCREndToEndService(OCRServiceBase): @@ -921,10 +909,7 @@ class GLMOCREndToEndService(OCRServiceBase): # Parallel OCR calls raw_results: dict[int, str] = {} with ThreadPoolExecutor(max_workers=min(self.max_workers, len(tasks))) as ex: - future_map = { - ex.submit(self._call_vllm, cropped, prompt): idx - for idx, region, cropped, prompt in tasks - } + future_map = {ex.submit(self._call_vllm, cropped, prompt): idx for idx, region, cropped, prompt in tasks} for future in as_completed(future_map): idx = future_map[future] try: