chore: optimize prompt
This commit is contained in:
@@ -150,9 +150,7 @@ def _clean_latex_syntax_spaces(expr: str) -> str:
|
|||||||
# Strategy: remove spaces before \ and between non-command chars,
|
# Strategy: remove spaces before \ and between non-command chars,
|
||||||
# but preserve the space after \command when followed by a non-\ char
|
# but preserve the space after \command when followed by a non-\ char
|
||||||
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
|
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
|
||||||
cleaned = re.sub(
|
cleaned = re.sub(r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned) # remove space after non-letter non-\
|
||||||
r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned
|
|
||||||
) # remove space after non-letter non-\
|
|
||||||
return f"{operator}{{{cleaned}}}"
|
return f"{operator}{{{cleaned}}}"
|
||||||
|
|
||||||
# Match _{ ... } or ^{ ... }
|
# Match _{ ... } or ^{ ... }
|
||||||
@@ -630,9 +628,7 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
self.glm_ocr_url = glm_ocr_url
|
self.glm_ocr_url = glm_ocr_url
|
||||||
self.openai_client = OpenAI(api_key="EMPTY", base_url=glm_ocr_url, timeout=3600)
|
self.openai_client = OpenAI(api_key="EMPTY", base_url=glm_ocr_url, timeout=3600)
|
||||||
|
|
||||||
def _recognize_formula_with_paddleocr_vl(
|
def _recognize_formula_with_paddleocr_vl(self, image: np.ndarray, prompt: str = "Formula Recognition:") -> str:
|
||||||
self, image: np.ndarray, prompt: str = "Formula Recognition:"
|
|
||||||
) -> str:
|
|
||||||
"""Recognize formula using PaddleOCR-VL API.
|
"""Recognize formula using PaddleOCR-VL API.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -673,9 +669,7 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"PaddleOCR-VL formula recognition failed: {e}") from e
|
raise RuntimeError(f"PaddleOCR-VL formula recognition failed: {e}") from e
|
||||||
|
|
||||||
def _extract_and_recognize_formulas(
|
def _extract_and_recognize_formulas(self, markdown_content: str, original_image: np.ndarray) -> str:
|
||||||
self, markdown_content: str, original_image: np.ndarray
|
|
||||||
) -> str:
|
|
||||||
"""Extract image references from markdown and recognize formulas.
|
"""Extract image references from markdown and recognize formulas.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -757,9 +751,7 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
markdown_content = result["results"]["image"].get("md_content", "")
|
markdown_content = result["results"]["image"].get("md_content", "")
|
||||||
|
|
||||||
if "
|
||||||
markdown_content, original_image
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply postprocessing to fix OCR errors
|
# Apply postprocessing to fix OCR errors
|
||||||
markdown_content = _postprocess_markdown(markdown_content)
|
markdown_content = _postprocess_markdown(markdown_content)
|
||||||
@@ -789,15 +781,11 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
|
|
||||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||||
_TASK_PROMPTS: dict[str, str] = {
|
_TASK_PROMPTS: dict[str, str] = {
|
||||||
"text": "Text Recognition:",
|
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
|
||||||
"formula": "Formula Recognition:",
|
"formula": "Formula Recognition:",
|
||||||
"table": "Table Recognition:",
|
"table": "Table Recognition:",
|
||||||
}
|
}
|
||||||
_DEFAULT_PROMPT = (
|
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
|
||||||
"Recognize the text in the image and output in Markdown format. "
|
|
||||||
"Preserve the original layout (headings/paragraphs/tables/formulas). "
|
|
||||||
"Do not fabricate content that does not exist in the image."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GLMOCREndToEndService(OCRServiceBase):
|
class GLMOCREndToEndService(OCRServiceBase):
|
||||||
@@ -921,10 +909,7 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
# Parallel OCR calls
|
# Parallel OCR calls
|
||||||
raw_results: dict[int, str] = {}
|
raw_results: dict[int, str] = {}
|
||||||
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(tasks))) as ex:
|
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(tasks))) as ex:
|
||||||
future_map = {
|
future_map = {ex.submit(self._call_vllm, cropped, prompt): idx for idx, region, cropped, prompt in tasks}
|
||||||
ex.submit(self._call_vllm, cropped, prompt): idx
|
|
||||||
for idx, region, cropped, prompt in tasks
|
|
||||||
}
|
|
||||||
for future in as_completed(future_map):
|
for future in as_completed(future_map):
|
||||||
idx = future_map[future]
|
idx = future_map[future]
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user