初步修改完成,但仍然有问题
This commit is contained in:
BIN
src/models/ocr_model/train/fonts/JINKY.ttf
Normal file
BIN
src/models/ocr_model/train/fonts/JINKY.ttf
Normal file
Binary file not shown.
14
src/models/ocr_model/train/fonts/Rotodesign License.txt
Normal file
14
src/models/ocr_model/train/fonts/Rotodesign License.txt
Normal file
@@ -0,0 +1,14 @@
|
||||
Congratulations on your download of this fine Rotodesign brand font product. We hope it will bring you many hours of typesetting pleasure and riches beyond your wildest dreams. We DO NOT, however, guarantee either of these things. Your mileage may vary.
|
||||
|
||||
This font is freeware, and is provided with no warranties as to its quality or its utility. After all, how much did you pay? Anyway, this font can be copied and used as you wish provided all copies include this readme file. Don't lie to your friends and tell 'em you made it yourself. You only cheat yourself when you do that. In the unlikely event you use this font to design something really cool or that makes you a ton of cash money, that's okay with me, just send me a copy or two of the finished item, and remember me when you get rich and famous. Enjoy!
|
||||
|
||||
©2006
|
||||
Patrick Broderick
|
||||
Rotodesign
|
||||
|
||||
http://www.rotodesign.com
|
||||
roto@rotodesign.net
|
||||
|
||||
Rotodesign
|
||||
1288 Columbus Ave. #176
|
||||
San Francisco, CA 94133
|
||||
BIN
src/models/ocr_model/train/fonts/font_type.zip
Normal file
BIN
src/models/ocr_model/train/fonts/font_type.zip
Normal file
Binary file not shown.
BIN
src/models/ocr_model/train/foo.png
Normal file
BIN
src/models/ocr_model/train/foo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
256
src/models/ocr_model/utils/ocr_aug.py
Normal file
256
src/models/ocr_model/utils/ocr_aug.py
Normal file
@@ -0,0 +1,256 @@
|
||||
from augraphy import *
|
||||
import random
|
||||
|
||||
def ocr_augmentation_pipeline():
|
||||
pre_phase = [
|
||||
# Rescale(scale="optimal", target_dpi = 300, p = 1.0),
|
||||
]
|
||||
|
||||
ink_phase = [
|
||||
# 6ms
|
||||
InkColorSwap(
|
||||
ink_swap_color="random",
|
||||
ink_swap_sequence_number_range=(5, 10),
|
||||
ink_swap_min_width_range=(2, 3),
|
||||
ink_swap_max_width_range=(100, 120),
|
||||
ink_swap_min_height_range=(2, 3),
|
||||
ink_swap_max_height_range=(100, 120),
|
||||
ink_swap_min_area_range=(10, 20),
|
||||
ink_swap_max_area_range=(400, 500),
|
||||
p=0.1
|
||||
),
|
||||
# 10ms
|
||||
Dithering(
|
||||
dither=random.choice(["ordered", "floyd-steinberg"]),
|
||||
order=(3, 5),
|
||||
p=0.05
|
||||
),
|
||||
# 10ms
|
||||
InkBleed(
|
||||
intensity_range=(0.1, 0.2),
|
||||
kernel_size=random.choice([(7, 7), (5, 5), (3, 3)]),
|
||||
severity=(0.4, 0.6),
|
||||
p=0.2,
|
||||
),
|
||||
# 40ms
|
||||
InkShifter(
|
||||
text_shift_scale_range=(18, 27),
|
||||
text_shift_factor_range=(1, 4),
|
||||
text_fade_range=(0, 2),
|
||||
blur_kernel_size=(5, 5),
|
||||
blur_sigma=0,
|
||||
noise_type="random",
|
||||
p=0.1
|
||||
),
|
||||
# 90ms
|
||||
# Letterpress(
|
||||
# n_samples=(100, 400),
|
||||
# n_clusters=(200, 400),
|
||||
# std_range=(500, 3000),
|
||||
# value_range=(150, 224),
|
||||
# value_threshold_range=(96, 128),
|
||||
# blur=1,
|
||||
# p=0.1
|
||||
# ),
|
||||
]
|
||||
|
||||
paper_phase = [
|
||||
# 50ms
|
||||
# OneOf(
|
||||
# [
|
||||
# ColorPaper(
|
||||
# hue_range=(0, 255),
|
||||
# saturation_range=(10, 40),
|
||||
# ),
|
||||
# PatternGenerator(
|
||||
# imgx=random.randint(256, 512),
|
||||
# imgy=random.randint(256, 512),
|
||||
# n_rotation_range=(10, 15),
|
||||
# color="random",
|
||||
# alpha_range=(0.25, 0.5),
|
||||
# ),
|
||||
# NoiseTexturize(
|
||||
# sigma_range=(3, 10),
|
||||
# turbulence_range=(2, 5),
|
||||
# texture_width_range=(300, 500),
|
||||
# texture_height_range=(300, 500),
|
||||
# ),
|
||||
# ],
|
||||
# p=0.05
|
||||
# ),
|
||||
# 10ms
|
||||
BrightnessTexturize(
|
||||
texturize_range=(0.9, 0.99),
|
||||
deviation=0.03,
|
||||
p=0.1
|
||||
)
|
||||
]
|
||||
|
||||
post_phase = [
|
||||
# 13ms
|
||||
ColorShift(
|
||||
color_shift_offset_x_range=(3, 5),
|
||||
color_shift_offset_y_range=(3, 5),
|
||||
color_shift_iterations=(2, 3),
|
||||
color_shift_brightness_range=(0.9, 1.1),
|
||||
color_shift_gaussian_kernel_range=(3, 3),
|
||||
p=0.05
|
||||
),
|
||||
# 13ms
|
||||
DirtyDrum(
|
||||
line_width_range=(1, 6),
|
||||
line_concentration=random.uniform(0.05, 0.15),
|
||||
direction=random.randint(0, 2),
|
||||
noise_intensity=random.uniform(0.6, 0.95),
|
||||
noise_value=(64, 224),
|
||||
ksize=random.choice([(3, 3), (5, 5), (7, 7)]),
|
||||
sigmaX=0,
|
||||
p=0.05,
|
||||
),
|
||||
# 10ms
|
||||
OneOf(
|
||||
[
|
||||
LightingGradient(
|
||||
light_position=None,
|
||||
direction=None,
|
||||
max_brightness=255,
|
||||
min_brightness=0,
|
||||
mode="gaussian",
|
||||
linear_decay_rate=None,
|
||||
transparency=None,
|
||||
),
|
||||
Brightness(
|
||||
brightness_range=(0.9, 1.1),
|
||||
min_brightness=0,
|
||||
min_brightness_value=(120, 150),
|
||||
),
|
||||
Gamma(
|
||||
gamma_range=(0.9, 1.1),
|
||||
),
|
||||
],
|
||||
p=0.05
|
||||
),
|
||||
# 6ms
|
||||
Jpeg(
|
||||
quality_range=(25, 95),
|
||||
p=0.1
|
||||
),
|
||||
# 12ms
|
||||
Markup(
|
||||
num_lines_range=(2, 7),
|
||||
markup_length_range=(0.5, 1),
|
||||
markup_thickness_range=(1, 2),
|
||||
markup_type=random.choice(["strikethrough", "crossed", "highlight", "underline"]),
|
||||
markup_color="random",
|
||||
single_word_mode=False,
|
||||
repetitions=1,
|
||||
p=0.05
|
||||
),
|
||||
# 65ms
|
||||
# OneOf(
|
||||
# [
|
||||
# BadPhotoCopy(
|
||||
# noise_mask=None,
|
||||
# noise_type=-1,
|
||||
# noise_side="random",
|
||||
# noise_iteration=(1, 2),
|
||||
# noise_size=(1, 3),
|
||||
# noise_value=(128, 196),
|
||||
# noise_sparsity=(0.3, 0.6),
|
||||
# noise_concentration=(0.1, 0.6),
|
||||
# blur_noise=random.choice([True, False]),
|
||||
# blur_noise_kernel=random.choice([(3, 3), (5, 5), (7, 7)]),
|
||||
# wave_pattern=random.choice([True, False]),
|
||||
# edge_effect=random.choice([True, False]),
|
||||
# ),
|
||||
# ShadowCast(
|
||||
# shadow_side="random",
|
||||
# shadow_vertices_range=(1, 20),
|
||||
# shadow_width_range=(0.3, 0.8),
|
||||
# shadow_height_range=(0.3, 0.8),
|
||||
# shadow_color=(0, 0, 0),
|
||||
# shadow_opacity_range=(0.2, 0.9),
|
||||
# shadow_iterations_range=(1, 2),
|
||||
# shadow_blur_kernel_range=(101, 301),
|
||||
# ),
|
||||
# LowLightNoise(
|
||||
# num_photons_range=(50, 100),
|
||||
# alpha_range=(0.7, 1.0),
|
||||
# beta_range=(10, 30),
|
||||
# gamma_range=(1, 1.8),
|
||||
# bias_range=(20, 40),
|
||||
# dark_current_value=1.0,
|
||||
# exposure_time=0.2,
|
||||
# gain=0.1,
|
||||
# ),
|
||||
# ],
|
||||
# p=0.05,
|
||||
# ),
|
||||
# 10ms
|
||||
OneOf(
|
||||
[
|
||||
NoisyLines(
|
||||
noisy_lines_direction="random",
|
||||
noisy_lines_location="random",
|
||||
noisy_lines_number_range=(5, 20),
|
||||
noisy_lines_color=(0, 0, 0),
|
||||
noisy_lines_thickness_range=(1, 2),
|
||||
noisy_lines_random_noise_intensity_range=(0.01, 0.1),
|
||||
noisy_lines_length_interval_range=(0, 100),
|
||||
noisy_lines_gaussian_kernel_value_range=(3, 5),
|
||||
noisy_lines_overlay_method="ink_to_paper",
|
||||
),
|
||||
BindingsAndFasteners(
|
||||
overlay_types="darken",
|
||||
foreground=None,
|
||||
effect_type="random",
|
||||
width_range="random",
|
||||
height_range="random",
|
||||
angle_range=(-30, 30),
|
||||
ntimes=(2, 6),
|
||||
nscales=(0.9, 1.0),
|
||||
edge="random",
|
||||
edge_offset=(10, 50),
|
||||
use_figshare_library=0,
|
||||
),
|
||||
],
|
||||
p=0.05,
|
||||
),
|
||||
# 20ms
|
||||
OneOf(
|
||||
[
|
||||
PageBorder(
|
||||
page_border_width_height="random",
|
||||
page_border_color=(0, 0, 0),
|
||||
page_border_background_color=(0, 0, 0),
|
||||
page_numbers="random",
|
||||
page_rotation_angle_range=(-3, 3),
|
||||
curve_frequency=(2, 8),
|
||||
curve_height=(2, 4),
|
||||
curve_length_one_side=(50, 100),
|
||||
same_page_border=random.choice([0, 1]),
|
||||
),
|
||||
Folding(
|
||||
fold_x=None,
|
||||
fold_deviation=(0, 0),
|
||||
fold_count=random.randint(2, 8),
|
||||
fold_noise=0.01,
|
||||
fold_angle_range=(-360, 360),
|
||||
gradient_width=(0.1, 0.2),
|
||||
gradient_height=(0.01, 0.02),
|
||||
backdrop_color=(0, 0, 0),
|
||||
),
|
||||
],
|
||||
p=0.05
|
||||
),
|
||||
]
|
||||
|
||||
pipeline = AugraphyPipeline(
|
||||
ink_phase=ink_phase,
|
||||
paper_phase=paper_phase,
|
||||
post_phase=post_phase,
|
||||
pre_phase=pre_phase,
|
||||
log=False,
|
||||
)
|
||||
|
||||
return pipeline
|
||||
@@ -131,6 +131,14 @@ def random_resize(
|
||||
]
|
||||
|
||||
|
||||
def ocr_aug(image: np.ndarray) -> np.ndarray:
|
||||
# 增加白边
|
||||
image = add_white_border(image, max_size=35).permute(1, 2, 0).numpy()
|
||||
# 数据增强
|
||||
image = train_pipeline(image)
|
||||
return image
|
||||
|
||||
|
||||
def train_transform(images: List[Image.Image]) -> List[torch.Tensor]:
|
||||
assert OCR_IMG_CHANNELS == 1 , "Only support grayscale images for now"
|
||||
assert OCR_FIX_SIZE == True, "Only support fixed size images for now"
|
||||
@@ -140,13 +148,15 @@ def train_transform(images: List[Image.Image]) -> List[torch.Tensor]:
|
||||
images = random_resize(images, MIN_RESIZE_RATIO, MAX_RESIZE_RATIO)
|
||||
# 裁剪掉白边
|
||||
images = [trim_white_border(image) for image in images]
|
||||
|
||||
# 增加白边
|
||||
# images = [add_white_border(image, max_size=35) for image in images]
|
||||
# 数据增强
|
||||
# images = [train_pipeline(image.permute(1, 2, 0).numpy()) for image in images]
|
||||
images = [ocr_aug(image) for image in images]
|
||||
|
||||
# general transform pipeline
|
||||
images = general_transform_pipeline(images)
|
||||
# images = [general_transform_pipeline(image) for image in images]
|
||||
images = [general_transform_pipeline(image) for image in images]
|
||||
# padding to fixed size
|
||||
images = padding(images, OCR_IMG_SIZE)
|
||||
return images
|
||||
|
||||
Reference in New Issue
Block a user