fix:glm max tokens
This commit is contained in:
@@ -532,50 +532,51 @@ class GLMOCRService(OCRServiceBase):
|
||||
|
||||
Returns:
|
||||
Dict with 'latex', 'markdown', 'mathml', 'mml' keys.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If recognition fails (preserves original exception for fallback handling).
|
||||
"""
|
||||
try:
|
||||
# Add padding to image
|
||||
padded_image = self.image_processor.add_padding(image)
|
||||
# Add padding to image
|
||||
padded_image = self.image_processor.add_padding(image)
|
||||
|
||||
# Encode image to base64
|
||||
success, encoded_image = cv2.imencode(".png", padded_image)
|
||||
if not success:
|
||||
raise RuntimeError("Failed to encode image")
|
||||
# Encode image to base64
|
||||
success, encoded_image = cv2.imencode(".png", padded_image)
|
||||
if not success:
|
||||
raise RuntimeError("Failed to encode image")
|
||||
|
||||
image_base64 = base64.b64encode(encoded_image.tobytes()).decode("utf-8")
|
||||
image_url = f"data:image/png;base64,{image_base64}"
|
||||
image_base64 = base64.b64encode(encoded_image.tobytes()).decode("utf-8")
|
||||
image_url = f"data:image/png;base64,{image_base64}"
|
||||
|
||||
# Call OpenAI-compatible API with formula recognition prompt
|
||||
prompt = "Formula Recognition:"
|
||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||
# Call OpenAI-compatible API with formula recognition prompt
|
||||
prompt = "Formula Recognition:"
|
||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model="glm-ocr",
|
||||
messages=messages,
|
||||
temperature=0.0,
|
||||
)
|
||||
# Don't catch exceptions here - let them propagate for fallback handling
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model="glm-ocr",
|
||||
messages=messages,
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
markdown_content = response.choices[0].message.content
|
||||
markdown_content = response.choices[0].message.content
|
||||
|
||||
# Process LaTeX delimiters
|
||||
if markdown_content.startswith(r"\[") or markdown_content.startswith(r"\("):
|
||||
markdown_content = markdown_content.replace(r"\[", "$$").replace(r"\(", "$$")
|
||||
markdown_content = markdown_content.replace(r"\]", "$$").replace(r"\)", "$$")
|
||||
elif not markdown_content.startswith("$$") and not markdown_content.startswith("$"):
|
||||
markdown_content = f"$${markdown_content}$$"
|
||||
# Process LaTeX delimiters
|
||||
if markdown_content.startswith(r"\[") or markdown_content.startswith(r"\("):
|
||||
markdown_content = markdown_content.replace(r"\[", "$$").replace(r"\(", "$$")
|
||||
markdown_content = markdown_content.replace(r"\]", "$$").replace(r"\)", "$$")
|
||||
elif not markdown_content.startswith("$$") and not markdown_content.startswith("$"):
|
||||
markdown_content = f"$${markdown_content}$$"
|
||||
|
||||
# Apply postprocessing
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
convert_result = self.converter.convert_to_formats(markdown_content)
|
||||
# Apply postprocessing
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
convert_result = self.converter.convert_to_formats(markdown_content)
|
||||
|
||||
return {
|
||||
"latex": convert_result.latex,
|
||||
"mathml": convert_result.mathml,
|
||||
"mml": convert_result.mml,
|
||||
"markdown": markdown_content,
|
||||
}
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"GLM formula recognition failed: {e}") from e
|
||||
return {
|
||||
"latex": convert_result.latex,
|
||||
"mathml": convert_result.mathml,
|
||||
"mml": convert_result.mml,
|
||||
"markdown": markdown_content,
|
||||
}
|
||||
|
||||
def recognize(self, image: np.ndarray) -> dict:
|
||||
"""Recognize content using GLM-4V.
|
||||
|
||||
Reference in New Issue
Block a user