chore: optimize prompt
This commit is contained in:
@@ -150,9 +150,7 @@ def _clean_latex_syntax_spaces(expr: str) -> str:
|
||||
# Strategy: remove spaces before \ and between non-command chars,
|
||||
# but preserve the space after \command when followed by a non-\ char
|
||||
cleaned = re.sub(r"\s+(?=\\)", "", content) # remove space before \cmd
|
||||
cleaned = re.sub(
|
||||
r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned
|
||||
) # remove space after non-letter non-\
|
||||
cleaned = re.sub(r"(?<!\\)(?<![a-zA-Z])\s+", "", cleaned) # remove space after non-letter non-\
|
||||
return f"{operator}{{{cleaned}}}"
|
||||
|
||||
# Match _{ ... } or ^{ ... }
|
||||
@@ -630,9 +628,7 @@ class MineruOCRService(OCRServiceBase):
|
||||
self.glm_ocr_url = glm_ocr_url
|
||||
self.openai_client = OpenAI(api_key="EMPTY", base_url=glm_ocr_url, timeout=3600)
|
||||
|
||||
def _recognize_formula_with_paddleocr_vl(
|
||||
self, image: np.ndarray, prompt: str = "Formula Recognition:"
|
||||
) -> str:
|
||||
def _recognize_formula_with_paddleocr_vl(self, image: np.ndarray, prompt: str = "Formula Recognition:") -> str:
|
||||
"""Recognize formula using PaddleOCR-VL API.
|
||||
|
||||
Args:
|
||||
@@ -673,9 +669,7 @@ class MineruOCRService(OCRServiceBase):
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"PaddleOCR-VL formula recognition failed: {e}") from e
|
||||
|
||||
def _extract_and_recognize_formulas(
|
||||
self, markdown_content: str, original_image: np.ndarray
|
||||
) -> str:
|
||||
def _extract_and_recognize_formulas(self, markdown_content: str, original_image: np.ndarray) -> str:
|
||||
"""Extract image references from markdown and recognize formulas.
|
||||
|
||||
Args:
|
||||
@@ -757,9 +751,7 @@ class MineruOCRService(OCRServiceBase):
|
||||
markdown_content = result["results"]["image"].get("md_content", "")
|
||||
|
||||
if "
|
||||
markdown_content = self._extract_and_recognize_formulas(markdown_content, original_image)
|
||||
|
||||
# Apply postprocessing to fix OCR errors
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
@@ -789,15 +781,11 @@ class MineruOCRService(OCRServiceBase):
|
||||
|
||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||
_TASK_PROMPTS: dict[str, str] = {
|
||||
"text": "Text Recognition:",
|
||||
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
|
||||
"formula": "Formula Recognition:",
|
||||
"table": "Table Recognition:",
|
||||
}
|
||||
_DEFAULT_PROMPT = (
|
||||
"Recognize the text in the image and output in Markdown format. "
|
||||
"Preserve the original layout (headings/paragraphs/tables/formulas). "
|
||||
"Do not fabricate content that does not exist in the image."
|
||||
)
|
||||
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
|
||||
|
||||
|
||||
class GLMOCREndToEndService(OCRServiceBase):
|
||||
@@ -921,10 +909,7 @@ class GLMOCREndToEndService(OCRServiceBase):
|
||||
# Parallel OCR calls
|
||||
raw_results: dict[int, str] = {}
|
||||
with ThreadPoolExecutor(max_workers=min(self.max_workers, len(tasks))) as ex:
|
||||
future_map = {
|
||||
ex.submit(self._call_vllm, cropped, prompt): idx
|
||||
for idx, region, cropped, prompt in tasks
|
||||
}
|
||||
future_map = {ex.submit(self._call_vllm, cropped, prompt): idx for idx, region, cropped, prompt in tasks}
|
||||
for future in as_completed(future_map):
|
||||
idx = future_map[future]
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user