optimize: formula is recognize text
This commit is contained in:
@@ -148,7 +148,7 @@ class LayoutDetector:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
mixed_recognition = any(region.type == "text" and region.score > 0.3 for region in regions)
|
mixed_recognition = any(region.type == "text" and region.score > 0.85 for region in regions)
|
||||||
|
|
||||||
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)
|
return LayoutInfo(regions=regions, MixedRecognition=mixed_recognition)
|
||||||
|
|
||||||
|
|||||||
@@ -781,11 +781,11 @@ class MineruOCRService(OCRServiceBase):
|
|||||||
|
|
||||||
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
# Task-specific prompts (from GLM-OCR SDK config.yaml)
|
||||||
_TASK_PROMPTS: dict[str, str] = {
|
_TASK_PROMPTS: dict[str, str] = {
|
||||||
"text": "Text Recognition. If the content is a formula, please ouput latex code, else output text",
|
"text": "Text Recognition. If the content is a formula, please ouput display latex code, else output text",
|
||||||
"formula": "Formula Recognition:",
|
"formula": "Formula Recognition:",
|
||||||
"table": "Table Recognition:",
|
"table": "Table Recognition:",
|
||||||
}
|
}
|
||||||
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput latex code, else output text"
|
_DEFAULT_PROMPT = "Text Recognition. If the content is a formula, please ouput display latex code, else output text"
|
||||||
|
|
||||||
|
|
||||||
class GLMOCREndToEndService(OCRServiceBase):
|
class GLMOCREndToEndService(OCRServiceBase):
|
||||||
@@ -874,7 +874,7 @@ class GLMOCREndToEndService(OCRServiceBase):
|
|||||||
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
layout_info.regions.sort(key=lambda r: (r.bbox[1], r.bbox[0]))
|
||||||
|
|
||||||
# 3. OCR: per-region (parallel) or full-image fallback
|
# 3. OCR: per-region (parallel) or full-image fallback
|
||||||
if not layout_info.regions:
|
if not layout_info.regions or (len(layout_info.regions) == 1 and not layout_info.MixedRecognition):
|
||||||
# No layout detected → assume it's a formula, use formula recognition
|
# No layout detected → assume it's a formula, use formula recognition
|
||||||
logger.info("No layout regions detected, treating image as formula")
|
logger.info("No layout regions detected, treating image as formula")
|
||||||
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
|
raw_content = self._call_vllm(image, _TASK_PROMPTS["formula"])
|
||||||
|
|||||||
Reference in New Issue
Block a user