初步修改完成,但仍然有问题

This commit is contained in:
三洋三洋
2024-03-27 04:55:00 +00:00
parent dbf35fe9c4
commit fb2ab8230d
6 changed files with 282 additions and 2 deletions

Binary file not shown.

View File

@@ -0,0 +1,14 @@
Congratulations on your download of this fine Rotodesign brand font product. We hope it will bring you many hours of typesetting pleasure and riches beyond your wildest dreams. We DO NOT, however, guarantee either of these things. Your mileage may vary.
This font is freeware, and is provided with no warranties as to its quality or its utility. After all, how much did you pay? Anyway, this font can be copied and used as you wish provided all copies include this readme file. Don't lie to your friends and tell 'em you made it yourself. You only cheat yourself when you do that. In the unlikely event you use this font to design something really cool or that makes you a ton of cash money, that's okay with me, just send me a copy or two of the finished item, and remember me when you get rich and famous. Enjoy!
©2006
Patrick Broderick
Rotodesign
http://www.rotodesign.com
roto@rotodesign.net
Rotodesign
1288 Columbus Ave. #176
San Francisco, CA 94133

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@@ -0,0 +1,256 @@
from augraphy import *
import random
def ocr_augmentation_pipeline():
pre_phase = [
# Rescale(scale="optimal", target_dpi = 300, p = 1.0),
]
ink_phase = [
# 6ms
InkColorSwap(
ink_swap_color="random",
ink_swap_sequence_number_range=(5, 10),
ink_swap_min_width_range=(2, 3),
ink_swap_max_width_range=(100, 120),
ink_swap_min_height_range=(2, 3),
ink_swap_max_height_range=(100, 120),
ink_swap_min_area_range=(10, 20),
ink_swap_max_area_range=(400, 500),
p=0.1
),
# 10ms
Dithering(
dither=random.choice(["ordered", "floyd-steinberg"]),
order=(3, 5),
p=0.05
),
# 10ms
InkBleed(
intensity_range=(0.1, 0.2),
kernel_size=random.choice([(7, 7), (5, 5), (3, 3)]),
severity=(0.4, 0.6),
p=0.2,
),
# 40ms
InkShifter(
text_shift_scale_range=(18, 27),
text_shift_factor_range=(1, 4),
text_fade_range=(0, 2),
blur_kernel_size=(5, 5),
blur_sigma=0,
noise_type="random",
p=0.1
),
# 90ms
# Letterpress(
# n_samples=(100, 400),
# n_clusters=(200, 400),
# std_range=(500, 3000),
# value_range=(150, 224),
# value_threshold_range=(96, 128),
# blur=1,
# p=0.1
# ),
]
paper_phase = [
# 50ms
# OneOf(
# [
# ColorPaper(
# hue_range=(0, 255),
# saturation_range=(10, 40),
# ),
# PatternGenerator(
# imgx=random.randint(256, 512),
# imgy=random.randint(256, 512),
# n_rotation_range=(10, 15),
# color="random",
# alpha_range=(0.25, 0.5),
# ),
# NoiseTexturize(
# sigma_range=(3, 10),
# turbulence_range=(2, 5),
# texture_width_range=(300, 500),
# texture_height_range=(300, 500),
# ),
# ],
# p=0.05
# ),
# 10ms
BrightnessTexturize(
texturize_range=(0.9, 0.99),
deviation=0.03,
p=0.1
)
]
post_phase = [
# 13ms
ColorShift(
color_shift_offset_x_range=(3, 5),
color_shift_offset_y_range=(3, 5),
color_shift_iterations=(2, 3),
color_shift_brightness_range=(0.9, 1.1),
color_shift_gaussian_kernel_range=(3, 3),
p=0.05
),
# 13ms
DirtyDrum(
line_width_range=(1, 6),
line_concentration=random.uniform(0.05, 0.15),
direction=random.randint(0, 2),
noise_intensity=random.uniform(0.6, 0.95),
noise_value=(64, 224),
ksize=random.choice([(3, 3), (5, 5), (7, 7)]),
sigmaX=0,
p=0.05,
),
# 10ms
OneOf(
[
LightingGradient(
light_position=None,
direction=None,
max_brightness=255,
min_brightness=0,
mode="gaussian",
linear_decay_rate=None,
transparency=None,
),
Brightness(
brightness_range=(0.9, 1.1),
min_brightness=0,
min_brightness_value=(120, 150),
),
Gamma(
gamma_range=(0.9, 1.1),
),
],
p=0.05
),
# 6ms
Jpeg(
quality_range=(25, 95),
p=0.1
),
# 12ms
Markup(
num_lines_range=(2, 7),
markup_length_range=(0.5, 1),
markup_thickness_range=(1, 2),
markup_type=random.choice(["strikethrough", "crossed", "highlight", "underline"]),
markup_color="random",
single_word_mode=False,
repetitions=1,
p=0.05
),
# 65ms
# OneOf(
# [
# BadPhotoCopy(
# noise_mask=None,
# noise_type=-1,
# noise_side="random",
# noise_iteration=(1, 2),
# noise_size=(1, 3),
# noise_value=(128, 196),
# noise_sparsity=(0.3, 0.6),
# noise_concentration=(0.1, 0.6),
# blur_noise=random.choice([True, False]),
# blur_noise_kernel=random.choice([(3, 3), (5, 5), (7, 7)]),
# wave_pattern=random.choice([True, False]),
# edge_effect=random.choice([True, False]),
# ),
# ShadowCast(
# shadow_side="random",
# shadow_vertices_range=(1, 20),
# shadow_width_range=(0.3, 0.8),
# shadow_height_range=(0.3, 0.8),
# shadow_color=(0, 0, 0),
# shadow_opacity_range=(0.2, 0.9),
# shadow_iterations_range=(1, 2),
# shadow_blur_kernel_range=(101, 301),
# ),
# LowLightNoise(
# num_photons_range=(50, 100),
# alpha_range=(0.7, 1.0),
# beta_range=(10, 30),
# gamma_range=(1, 1.8),
# bias_range=(20, 40),
# dark_current_value=1.0,
# exposure_time=0.2,
# gain=0.1,
# ),
# ],
# p=0.05,
# ),
# 10ms
OneOf(
[
NoisyLines(
noisy_lines_direction="random",
noisy_lines_location="random",
noisy_lines_number_range=(5, 20),
noisy_lines_color=(0, 0, 0),
noisy_lines_thickness_range=(1, 2),
noisy_lines_random_noise_intensity_range=(0.01, 0.1),
noisy_lines_length_interval_range=(0, 100),
noisy_lines_gaussian_kernel_value_range=(3, 5),
noisy_lines_overlay_method="ink_to_paper",
),
BindingsAndFasteners(
overlay_types="darken",
foreground=None,
effect_type="random",
width_range="random",
height_range="random",
angle_range=(-30, 30),
ntimes=(2, 6),
nscales=(0.9, 1.0),
edge="random",
edge_offset=(10, 50),
use_figshare_library=0,
),
],
p=0.05,
),
# 20ms
OneOf(
[
PageBorder(
page_border_width_height="random",
page_border_color=(0, 0, 0),
page_border_background_color=(0, 0, 0),
page_numbers="random",
page_rotation_angle_range=(-3, 3),
curve_frequency=(2, 8),
curve_height=(2, 4),
curve_length_one_side=(50, 100),
same_page_border=random.choice([0, 1]),
),
Folding(
fold_x=None,
fold_deviation=(0, 0),
fold_count=random.randint(2, 8),
fold_noise=0.01,
fold_angle_range=(-360, 360),
gradient_width=(0.1, 0.2),
gradient_height=(0.01, 0.02),
backdrop_color=(0, 0, 0),
),
],
p=0.05
),
]
pipeline = AugraphyPipeline(
ink_phase=ink_phase,
paper_phase=paper_phase,
post_phase=post_phase,
pre_phase=pre_phase,
log=False,
)
return pipeline

View File

@@ -131,6 +131,14 @@ def random_resize(
]
def ocr_aug(image: np.ndarray) -> np.ndarray:
# 增加白边
image = add_white_border(image, max_size=35).permute(1, 2, 0).numpy()
# 数据增强
image = train_pipeline(image)
return image
def train_transform(images: List[Image.Image]) -> List[torch.Tensor]:
assert OCR_IMG_CHANNELS == 1 , "Only support grayscale images for now"
assert OCR_FIX_SIZE == True, "Only support fixed size images for now"
@@ -140,13 +148,15 @@ def train_transform(images: List[Image.Image]) -> List[torch.Tensor]:
images = random_resize(images, MIN_RESIZE_RATIO, MAX_RESIZE_RATIO)
# 裁剪掉白边
images = [trim_white_border(image) for image in images]
# 增加白边
# images = [add_white_border(image, max_size=35) for image in images]
# 数据增强
# images = [train_pipeline(image.permute(1, 2, 0).numpy()) for image in images]
images = [ocr_aug(image) for image in images]
# general transform pipeline
images = general_transform_pipeline(images)
# images = [general_transform_pipeline(image) for image in images]
images = [general_transform_pipeline(image) for image in images]
# padding to fixed size
images = padding(images, OCR_IMG_SIZE)
return images