From fd91819af0ba1685d96336a48a6efa97a6d5c90a Mon Sep 17 00:00:00 2001 From: liuyuanchuang Date: Wed, 25 Feb 2026 09:52:45 +0800 Subject: [PATCH] feat: no padding image --- app/core/config.py | 6 +- app/services/ocr_service.py | 2 +- diagnose_latex_rendering.py | 202 ------------------------------------ 3 files changed, 4 insertions(+), 206 deletions(-) delete mode 100644 diagnose_latex_rendering.py diff --git a/app/core/config.py b/app/core/config.py index 07bb04a..a31e309 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -45,15 +45,15 @@ class Settings(BaseSettings): return f"http://{self.base_host}:8002/v1" # padding ratio - is_padding: bool = True - padding_ratio: float = 0.15 + is_padding: bool = False + padding_ratio: float = 0.1 # Model Paths pp_doclayout_model_dir: Optional[str] = "/home/yoge/.cache/modelscope/hub/models/PaddlePaddle/PP-DocLayoutV3" # Image Processing max_image_size_mb: int = 10 - image_padding_ratio: float = 0.15 # 15% on each side = 30% total expansion + image_padding_ratio: float = 0.1 # 10% on each side = 20% total expansion device: torch.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # cuda:0 or cpu diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 28e571a..1465c93 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -668,7 +668,7 @@ class MineruOCRService(OCRServiceBase): if formula_text.startswith(r"\[") or formula_text.startswith(r"\("): formula_text = formula_text.replace(r"\[", "$$").replace(r"\(", "$$") formula_text = formula_text.replace(r"\]", "$$").replace(r"\)", "$$") - else: + elif not formula_text.startswith("$$") and not formula_text.startswith("$"): formula_text = f"$${formula_text}$$" return formula_text diff --git a/diagnose_latex_rendering.py b/diagnose_latex_rendering.py deleted file mode 100644 index 07e7700..0000000 --- a/diagnose_latex_rendering.py +++ /dev/null @@ -1,202 +0,0 @@ -"""Diagnostic tool for LaTeX rendering issues. - -Usage: - python diagnose_latex_rendering.py "\\lambda + \\vdots" - python diagnose_latex_rendering.py "$\\lambda_1, \\lambda_2, \\vdots, \\lambda_n$" -""" - -import sys -import re -from typing import Dict, Any - -# Simulate the OCR postprocessing pipeline -_COMMANDS_NEED_SPACE = { - "cdot", - "times", - "div", - "pm", - "mp", - "int", - "iint", - "iiint", - "oint", - "sum", - "prod", - "lim", - "sin", - "cos", - "tan", - "cot", - "sec", - "csc", - "log", - "ln", - "exp", - "partial", - "nabla", -} - -_COMMAND_TOKEN_PATTERN = re.compile(r"\\[a-zA-Z]+") -_DIFFERENTIAL_UPPER_PATTERN = re.compile(r"(? str: - """Split OCR-glued LaTeX command token by whitelist longest-prefix.""" - if not token.startswith("\\"): - return token - - body = token[1:] - if len(body) < 2: - return token - - best = None - for i in range(1, len(body)): - prefix = body[:i] - if prefix in _COMMANDS_NEED_SPACE: - best = prefix - - if not best: - return token - - suffix = body[len(best) :] - if not suffix: - return token - - return f"\\{best} {suffix}" - - -def _fix_ocr_number_errors(expr: str) -> str: - """Fix common OCR errors in LaTeX math expressions.""" - expr = re.sub(r"(\d)\s+(\d+)\.\s*(\d+)", r"\1\2.\3", expr) - expr = re.sub(r"(\d+)\.\s+(\d+)", r"\1.\2", expr) - expr = re.sub(r"(\d)\s+(\d)(?=\s*[,\)]|$)", r"\1\2", expr) - expr = re.sub(r"(\d)\s+(\d)(?=\s*\.)", r"\1\2", expr) - return expr - - -def _postprocess_math(expr: str) -> str: - """Postprocess a *math* expression (already inside $...$ or $$...$$).""" - original = expr - - # Stage 0: fix OCR number errors - expr = _fix_ocr_number_errors(expr) - stage0 = expr - - # Stage 1: split glued command tokens - expr = _COMMAND_TOKEN_PATTERN.sub(lambda m: _split_glued_command_token(m.group(0)), expr) - stage1 = expr - - # Stage 2: normalize differentials - expr = _DIFFERENTIAL_UPPER_PATTERN.sub(r"\\mathrm{d} \1", expr) - expr = _DIFFERENTIAL_LOWER_PATTERN.sub(r"d \1", expr) - stage2 = expr - - return {"original": original, "after_stage0_numbers": stage0, "after_stage1_commands": stage1, "after_stage2_differentials": stage2, "final": expr} - - -def _postprocess_markdown(markdown_content: str) -> Dict[str, Any]: - """Apply LaTeX postprocessing to markdown segments.""" - if not markdown_content: - return {"original": markdown_content, "final": markdown_content, "segments": []} - - segments = [] - - def _fix_segment(m: re.Match) -> str: - seg = m.group(0) - inner = None - - if seg.startswith("$$") and seg.endswith("$$"): - inner = seg[2:-2] - result = _postprocess_math(inner) - segments.append({"type": "display", "original": seg, "processing": result}) - return f"$${result['final']}$$" - elif seg.startswith("$") and seg.endswith("$"): - inner = seg[1:-1] - result = _postprocess_math(inner) - segments.append({"type": "inline", "original": seg, "processing": result}) - return f"${result['final']}$" - - return seg - - final = _MATH_SEGMENT_PATTERN.sub(_fix_segment, markdown_content) - - return {"original": markdown_content, "final": final, "segments": segments, "changed": markdown_content != final} - - -def diagnose(latex_input: str) -> None: - """Run diagnostic on LaTeX input.""" - print("=" * 80) - print("LaTeX Rendering Diagnostic Tool") - print("=" * 80) - print(f"\nInput: {latex_input}") - print("-" * 80) - - # Check if input contains problematic characters - print("\n1. Character Detection:") - if "\\lambda" in latex_input: - print(" ✅ Found \\lambda") - if "\\vdots" in latex_input: - print(" ✅ Found \\vdots") - if "\\cdots" in latex_input: - print(" ℹ️ Found \\cdots (similar to \\vdots)") - if "\\ldots" in latex_input: - print(" ℹ️ Found \\ldots (similar to \\vdots)") - - # Run postprocessing pipeline - print("\n2. Postprocessing Pipeline:") - result = _postprocess_markdown(latex_input) - - if result["segments"]: - for i, seg in enumerate(result["segments"], 1): - print(f"\n Segment {i} ({seg['type']}):") - print(f" Original: {seg['original']}") - - proc = seg["processing"] - - # Check each stage for changes - if proc["original"] != proc["after_stage0_numbers"]: - print(f" ⚠️ Stage 0 (numbers): {proc['after_stage0_numbers']}") - else: - print(f" ✅ Stage 0 (numbers): No change") - - if proc["after_stage0_numbers"] != proc["after_stage1_commands"]: - print(f" ⚠️ Stage 1 (commands): {proc['after_stage1_commands']}") - else: - print(f" ✅ Stage 1 (commands): No change") - - if proc["after_stage1_commands"] != proc["after_stage2_differentials"]: - print(f" ⚠️ Stage 2 (differentials): {proc['after_stage2_differentials']}") - else: - print(f" ✅ Stage 2 (differentials): No change") - - print(f" Final: {proc['final']}") - else: - print(" ℹ️ No math segments found (not wrapped in $ or $$)") - - print("\n3. Final Output:") - print(f" {result['final']}") - - if result["changed"]: - print("\n ⚠️ WARNING: The input was modified during postprocessing!") - print(" This could be the cause of rendering issues.") - else: - print("\n ✅ No changes made during postprocessing.") - print(" If rendering fails, the issue is likely in:") - print(" - Pandoc conversion (LaTeX → MathML)") - print(" - Frontend rendering (MathJax/KaTeX)") - - print("\n" + "=" * 80) - - -if __name__ == "__main__": - if len(sys.argv) < 2: - print('Usage: python diagnose_latex_rendering.py ""') - print("\nExamples:") - print(' python diagnose_latex_rendering.py "$\\lambda + \\vdots$"') - print(' python diagnose_latex_rendering.py "$$\\lambda_1, \\lambda_2, \\vdots, \\lambda_n$$"') - sys.exit(1) - - latex_input = sys.argv[1] - diagnose(latex_input)