fix: image as element
This commit is contained in:
@@ -637,7 +637,7 @@ class MineruOCRService(OCRServiceBase):
|
||||
messages = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": image_url}}, {"type": "text", "text": prompt}]}]
|
||||
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model="PaddleOCR-VL-0.9B",
|
||||
model="glm-ocr",
|
||||
messages=messages,
|
||||
temperature=0.0,
|
||||
)
|
||||
@@ -714,6 +714,9 @@ class MineruOCRService(OCRServiceBase):
|
||||
if "results" in result and "image" in result["results"]:
|
||||
markdown_content = result["results"]["image"].get("md_content", "")
|
||||
|
||||
if "
|
||||
|
||||
# Apply postprocessing to fix OCR errors
|
||||
markdown_content = _postprocess_markdown(markdown_content)
|
||||
|
||||
|
||||
100
create_table.py
Normal file
100
create_table.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Alignment, Border, Side
|
||||
|
||||
wb = Workbook()
|
||||
sheet = wb.active
|
||||
sheet.title = "数据表"
|
||||
|
||||
# Headers
|
||||
headers = ["类别:电动", "结果", "类别:电工", "结果", "类别:黑板", "结果", "类别:小", "结果", "类别:大", "结果"]
|
||||
|
||||
for col, header in enumerate(headers, start=1):
|
||||
cell = sheet.cell(row=1, column=col, value=header)
|
||||
cell.font = Font(bold=True)
|
||||
cell.alignment = Alignment(horizontal="center")
|
||||
|
||||
# Data for each category
|
||||
data_电动 = [
|
||||
["2615 - 243", 2372],
|
||||
["2633 - 244", 2389],
|
||||
["2542 - 243", 2299],
|
||||
["2453 - 369", 2084],
|
||||
["2670 - 244", 2426],
|
||||
["2416 - 357", 2059],
|
||||
["2574 - 239", 2335],
|
||||
["2641 - 243", 2398],
|
||||
["2640 - 242", 2398],
|
||||
["2257 - 244", 2013],
|
||||
["3114 - 410", 2704],
|
||||
["2815 - 412", 2403],
|
||||
["2545 - 243", 2302],
|
||||
["2299 - 243", 2056],
|
||||
]
|
||||
|
||||
data_电工 = [["2443 - 20", 2423], ["2460 - 13", 2447], ["2339 - 17", 2322], ["2405 - 13", 2392], ["2661 - 16", 2645], ["3065 - 15", 3050], ["2912 - 14", 2898], ["2883 - 14", 2869]]
|
||||
|
||||
data_黑板 = [
|
||||
["1902 - 14", 1888],
|
||||
["2319 - 14", 2305],
|
||||
["2041 - 16", 2025],
|
||||
["2451 - 13", 2438],
|
||||
["1993 - 14", 1979],
|
||||
["1841 - 18", 1823],
|
||||
["2083 - 16", 2067],
|
||||
["2009 - 18", 1991],
|
||||
["2001 - 16", 1985],
|
||||
["2014 - 14", 2000],
|
||||
["2073 - 17", 2056],
|
||||
["2008 - 15", 1993],
|
||||
["2030 - 16", 2014],
|
||||
["1928 - 22", 1906],
|
||||
["1991 - 19", 1972],
|
||||
["2235 - 12", 2223],
|
||||
["2335 - 19", 2316],
|
||||
["1920 - 16", 1904],
|
||||
["1942 - 16", 1926],
|
||||
["1974 - 19", 1955],
|
||||
["1863 - 16", 1847],
|
||||
["512 - 15", 497],
|
||||
]
|
||||
|
||||
data_小 = [["1881 - 20", 1861], ["2055 - 22", 2033], ["2034 - 20", 2014], ["1981 - 17", 1964], ["1629 - 12", 1617], ["913 - 18", 895], ["1842 - 19", 1823], ["1891 - 20", 1871]]
|
||||
|
||||
data_大 = [["1931 - 20", 1911], ["1775 - 11", 1764], ["1885 - 21", 1864], ["120 - 18", 102], ["1675 - 13", 1662]]
|
||||
|
||||
# Write data starting from row 2
|
||||
row = 2
|
||||
max_rows = max(len(data_电动), len(data_电工), len(data_黑板), len(data_小), len(data_大))
|
||||
|
||||
for i in range(max_rows):
|
||||
# 电动
|
||||
if i < len(data_电动):
|
||||
sheet.cell(row=row + i, column=1, value=data_电动[i][0])
|
||||
sheet.cell(row=row + i, column=2, value=data_电动[i][1])
|
||||
|
||||
# 电工
|
||||
if i < len(data_电工):
|
||||
sheet.cell(row=row + i, column=3, value=data_电工[i][0])
|
||||
sheet.cell(row=row + i, column=4, value=data_电工[i][1])
|
||||
|
||||
# 黑板
|
||||
if i < len(data_黑板):
|
||||
sheet.cell(row=row + i, column=5, value=data_黑板[i][0])
|
||||
sheet.cell(row=row + i, column=6, value=data_黑板[i][1])
|
||||
|
||||
# 小
|
||||
if i < len(data_小):
|
||||
sheet.cell(row=row + i, column=7, value=data_小[i][0])
|
||||
sheet.cell(row=row + i, column=8, value=data_小[i][1])
|
||||
|
||||
# 大
|
||||
if i < len(data_大):
|
||||
sheet.cell(row=row + i, column=9, value=data_大[i][0])
|
||||
sheet.cell(row=row + i, column=10, value=data_大[i][1])
|
||||
|
||||
# Set column widths
|
||||
for col in range(1, 11):
|
||||
sheet.column_dimensions[chr(64 + col)].width = 15
|
||||
|
||||
wb.save("数据表.xlsx")
|
||||
print("Excel文件已创建: 数据表.xlsx")
|
||||
91
create_table_pandas.py
Normal file
91
create_table_pandas.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, "/opt/homebrew/lib/python3.13/site-packages")
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# Define data for each category
|
||||
data_电动 = [
|
||||
["2615 - 243", 2372],
|
||||
["2633 - 244", 2389],
|
||||
["2542 - 243", 2299],
|
||||
["2453 - 369", 2084],
|
||||
["2670 - 244", 2426],
|
||||
["2416 - 357", 2059],
|
||||
["2574 - 239", 2335],
|
||||
["2641 - 243", 2398],
|
||||
["2640 - 242", 2398],
|
||||
["2257 - 244", 2013],
|
||||
["3114 - 410", 2704],
|
||||
["2815 - 412", 2403],
|
||||
["2545 - 243", 2302],
|
||||
["2299 - 243", 2056],
|
||||
]
|
||||
|
||||
data_电工 = [["2443 - 20", 2423], ["2460 - 13", 2447], ["2339 - 17", 2322], ["2405 - 13", 2392], ["2661 - 16", 2645], ["3065 - 15", 3050], ["2912 - 14", 2898], ["2883 - 14", 2869]]
|
||||
|
||||
data_黑板 = [
|
||||
["1902 - 14", 1888],
|
||||
["2319 - 14", 2305],
|
||||
["2041 - 16", 2025],
|
||||
["2451 - 13", 2438],
|
||||
["1993 - 14", 1979],
|
||||
["1841 - 18", 1823],
|
||||
["2083 - 16", 2067],
|
||||
["2009 - 18", 1991],
|
||||
["2001 - 16", 1985],
|
||||
["2014 - 14", 2000],
|
||||
["2073 - 17", 2056],
|
||||
["2008 - 15", 1993],
|
||||
["2030 - 16", 2014],
|
||||
["1928 - 22", 1906],
|
||||
["1991 - 19", 1972],
|
||||
["2235 - 12", 2223],
|
||||
["2335 - 19", 2316],
|
||||
["1920 - 16", 1904],
|
||||
["1942 - 16", 1926],
|
||||
["1974 - 19", 1955],
|
||||
["1863 - 16", 1847],
|
||||
["512 - 15", 497],
|
||||
]
|
||||
|
||||
data_小 = [["1881 - 20", 1861], ["2055 - 22", 2033], ["2034 - 20", 2014], ["1981 - 17", 1964], ["1629 - 12", 1617], ["913 - 18", 895], ["1842 - 19", 1823], ["1891 - 20", 1871]]
|
||||
|
||||
data_大 = [["1931 - 20", 1911], ["1775 - 11", 1764], ["1885 - 21", 1864], ["120 - 18", 102], ["1675 - 13", 1662]]
|
||||
|
||||
# Pad all lists to the same length
|
||||
max_len = max(len(data_电动), len(data_电工), len(data_黑板), len(data_小), len(data_大))
|
||||
|
||||
|
||||
def pad_list(lst, length):
|
||||
return lst + [["", ""]] * (length - len(lst))
|
||||
|
||||
|
||||
data_电动 = pad_list(data_电动, max_len)
|
||||
data_电工 = pad_list(data_电工, max_len)
|
||||
data_黑板 = pad_list(data_黑板, max_len)
|
||||
data_小 = pad_list(data_小, max_len)
|
||||
data_大 = pad_list(data_大, max_len)
|
||||
|
||||
# Create DataFrame
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"类别:电动": [row[0] for row in data_电动],
|
||||
"结果_1": [row[1] for row in data_电动],
|
||||
"类别:电工": [row[0] for row in data_电工],
|
||||
"结果_2": [row[1] for row in data_电工],
|
||||
"类别:黑板": [row[0] for row in data_黑板],
|
||||
"结果_3": [row[1] for row in data_黑板],
|
||||
"类别:小": [row[0] for row in data_小],
|
||||
"结果_4": [row[1] for row in data_小],
|
||||
"类别:大": [row[0] for row in data_大],
|
||||
"结果_5": [row[1] for row in data_大],
|
||||
}
|
||||
)
|
||||
|
||||
# Replace empty strings with NaN for cleaner Excel output
|
||||
df = df.replace("", pd.NA)
|
||||
|
||||
# Save to Excel
|
||||
df.to_excel("数据表.xlsx", index=False, sheet_name="数据表")
|
||||
print("Excel文件已创建: 数据表.xlsx")
|
||||
88
csv_to_xlsx.py
Normal file
88
csv_to_xlsx.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python3
|
||||
import csv
|
||||
|
||||
# Read CSV and create a simple Python script to generate XLSX using xlsxwriter
|
||||
csv_file = "数据表.csv"
|
||||
xlsx_file = "数据表.xlsx"
|
||||
|
||||
# Check if xlsxwriter is available
|
||||
try:
|
||||
import xlsxwriter
|
||||
|
||||
# Create a workbook and add a worksheet
|
||||
workbook = xlsxwriter.Workbook(xlsx_file)
|
||||
worksheet = workbook.add_worksheet("数据表")
|
||||
|
||||
# Add a bold format for headers
|
||||
bold = workbook.add_format({"bold": True, "align": "center"})
|
||||
center = workbook.add_format({"align": "center"})
|
||||
|
||||
# Read CSV and write to Excel
|
||||
with open(csv_file, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
for row_idx, row in enumerate(reader):
|
||||
for col_idx, value in enumerate(row):
|
||||
if row_idx == 0: # Header row
|
||||
worksheet.write(row_idx, col_idx, value, bold)
|
||||
else:
|
||||
# Try to convert to number if possible
|
||||
try:
|
||||
if value:
|
||||
num_value = int(value)
|
||||
worksheet.write_number(row_idx, col_idx, num_value, center)
|
||||
else:
|
||||
worksheet.write(row_idx, col_idx, value)
|
||||
except ValueError:
|
||||
worksheet.write(row_idx, col_idx, value, center)
|
||||
|
||||
# Set column widths
|
||||
for col in range(10):
|
||||
worksheet.set_column(col, col, 15)
|
||||
|
||||
workbook.close()
|
||||
print(f"Excel file created: {xlsx_file}")
|
||||
|
||||
except ImportError:
|
||||
print("xlsxwriter not found, trying openpyxl...")
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, Alignment
|
||||
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "数据表"
|
||||
|
||||
# Read CSV and write to Excel
|
||||
with open(csv_file, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
for row_idx, row in enumerate(reader, start=1):
|
||||
for col_idx, value in enumerate(row, start=1):
|
||||
cell = ws.cell(row=row_idx, column=col_idx)
|
||||
|
||||
# Try to convert to number
|
||||
try:
|
||||
if value:
|
||||
cell.value = int(value)
|
||||
else:
|
||||
cell.value = value
|
||||
except ValueError:
|
||||
cell.value = value
|
||||
|
||||
# Format header row
|
||||
if row_idx == 1:
|
||||
cell.font = Font(bold=True)
|
||||
cell.alignment = Alignment(horizontal="center")
|
||||
else:
|
||||
cell.alignment = Alignment(horizontal="center")
|
||||
|
||||
# Set column widths
|
||||
for col in range(1, 11):
|
||||
ws.column_dimensions[chr(64 + col)].width = 15
|
||||
|
||||
wb.save(xlsx_file)
|
||||
print(f"Excel file created: {xlsx_file}")
|
||||
|
||||
except ImportError:
|
||||
print("Neither xlsxwriter nor openpyxl is available.")
|
||||
print("CSV file has been created: 数据表.csv")
|
||||
print("You can manually convert it to Excel format.")
|
||||
23
数据表.csv
Normal file
23
数据表.csv
Normal file
@@ -0,0 +1,23 @@
|
||||
类别:电动,结果,类别:电工,结果,类别:黑板,结果,类别:小,结果,类别:大,结果
|
||||
2615 - 243,2372,2443 - 20,2423,1902 - 14,1888,1881 - 20,1861,1931 - 20,1911
|
||||
2633 - 244,2389,2460 - 13,2447,2319 - 14,2305,2055 - 22,2033,1775 - 11,1764
|
||||
2542 - 243,2299,2339 - 17,2322,2041 - 16,2025,2034 - 20,2014,1885 - 21,1864
|
||||
2453 - 369,2084,2405 - 13,2392,2451 - 13,2438,1981 - 17,1964,120 - 18,102
|
||||
2670 - 244,2426,2661 - 16,2645,1993 - 14,1979,1629 - 12,1617,1675 - 13,1662
|
||||
2416 - 357,2059,3065 - 15,3050,1841 - 18,1823,913 - 18,895,,
|
||||
2574 - 239,2335,2912 - 14,2898,2083 - 16,2067,1842 - 19,1823,,
|
||||
2641 - 243,2398,2883 - 14,2869,2009 - 18,1991,1891 - 20,1871,,
|
||||
2640 - 242,2398,,,2001 - 16,1985,,,
|
||||
2257 - 244,2013,,,2014 - 14,2000,,,
|
||||
3114 - 410,2704,,,2073 - 17,2056,,,
|
||||
2815 - 412,2403,,,2008 - 15,1993,,,
|
||||
2545 - 243,2302,,,2030 - 16,2014,,,
|
||||
2299 - 243,2056,,,1928 - 22,1906,,,
|
||||
,,,1991 - 19,1972,,,
|
||||
,,,2235 - 12,2223,,,
|
||||
,,,2335 - 19,2316,,,
|
||||
,,,1920 - 16,1904,,,
|
||||
,,,1942 - 16,1926,,,
|
||||
,,,1974 - 19,1955,,,
|
||||
,,,1863 - 16,1847,,,
|
||||
,,,512 - 15,497,,,
|
||||
|
Reference in New Issue
Block a user