chore: optimize prompt

This commit is contained in:
2026-03-10 21:36:35 +08:00
parent d98fa7237c
commit a9d3a35dd7

View File

@@ -150,9 +150,7 @@ def _clean_latex_syntax_spaces(expr: str) -> str:
# Strategy: remove spaces before \ and between non-command chars,
# but preserve the space after \command when followed by a non-\ char
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
cleaned = re.sub(
r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned
) # remove space after non-letter non-\
cleaned = re.sub(r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned) # remove space after non-letter non-\
return f"{operator}{{{cleaned}}}"
# Match _{ ... } or ^{ ... }
@@ -630,9 +628,7 @@ class MineruOCRService(OCRServiceBase):
self.glm_ocr_url = glm_ocr_url
self.openai_client = OpenAI(api_key="EMPTY", base_url=glm_ocr_url, timeout=3600)
def _recognize_formula_with_paddleocr_vl(
self, image: np.ndarray, prompt: str = "Formula Recognition:"
) -> str:
def _recognize_formula_with_paddleocr_vl(self, image: np.ndarray, prompt: str = "Formula Recognition:") -> str:
"""Recognize formula using PaddleOCR-VL API.
Args:
@@ -673,9 +669,7 @@ class MineruOCRService(OCRServiceBase):
except Exception as e:
raise RuntimeError(f"PaddleOCR-VL formula recognition failed: {e}") from e
def _extract_and_recognize_formulas(
self, markdown_content: str, original_image: np.ndarray
) -> str:
def _extract_and_recognize_formulas(self, markdown_content: str, original_image: np.ndarray) -> str:
"""Extract image references from markdown and recognize formulas.
Args:
@@ -757,9 +751,7 @@ class MineruOCRService(OCRServiceBase):
markdown_content = result["results"]["image"].get("md_content", "")
if "![](images/" in markdown_content:
markdown_content = self._extract_and_recognize_formulas(
markdown_content, original_image
)
markdown_content = self._extract_and_recognize_formulas(markdown_content, original_image)
# Apply postprocessing to fix OCR errors
markdown_content = _postprocess_markdown(markdown_content)
@@ -789,15 +781,11 @@ class MineruOCRService(OCRServiceBase):
# Task-specific prompts (from GLM-OCR SDK config.yaml)
_TASK_PROMPTS: dict[str, str] = {
"text": "Text Recognition:",
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
"formula": "Formula Recognition:",
"table": "Table Recognition:",
}
_DEFAULT_PROMPT = (
"Recognize the text in the image and output in Markdown format. "
"Preserve the original layout (headings/paragraphs/tables/formulas). "
"Do not fabricate content that does not exist in the image."
)
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
class GLMOCREndToEndService(OCRServiceBase):
@@ -921,10 +909,7 @@ class GLMOCREndToEndService(OCRServiceBase):
# Parallel OCR calls
raw_results: dict[int, str] = {}
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(tasks))) as ex:
future_map = {
ex.submit(self._call_vllm, cropped, prompt): idx
for idx, region, cropped, prompt in tasks
}
future_map = {ex.submit(self._call_vllm, cropped, prompt): idx for idx, region, cropped, prompt in tasks}
for future in as_completed(future_map):
idx = future_map[future]
try: