From 511f69555c20056f87c35c16082c1028217a8a1a Mon Sep 17 00:00:00 2001 From: OleehyO <1258009915@qq.com> Date: Mon, 21 Apr 2025 13:52:16 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20Fix=20all=20ruff=20typo=20errors?= =?UTF-8?q?=20&=20test=20CI/CD=20workflow=20(#109)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [chore] Fix ruff typo * [robot] Fix welcome robot --- .github/workflows/pr-welcome.yml | 4 + assets/fire.svg | 2 +- docs/source/conf.py | 66 +++++++-------- examples/client_demo.py | 4 +- texteller/api/format.py | 26 +++--- texteller/api/katex.py | 134 +++++++++++++++---------------- texteller/models/__init__.py | 2 +- texteller/utils/image.py | 4 +- texteller/utils/latex.py | 8 +- 9 files changed, 127 insertions(+), 123 deletions(-) diff --git a/.github/workflows/pr-welcome.yml b/.github/workflows/pr-welcome.yml index 16841ed..5e981ac 100644 --- a/.github/workflows/pr-welcome.yml +++ b/.github/workflows/pr-welcome.yml @@ -4,6 +4,10 @@ on: pull_request: types: [opened] +permissions: + pull-requests: write + issues: write + jobs: welcome: runs-on: ubuntu-latest diff --git a/assets/fire.svg b/assets/fire.svg index 8f9f7eb..522dff7 100644 --- a/assets/fire.svg +++ b/assets/fire.svg @@ -457,4 +457,4 @@ - \ No newline at end of file + diff --git a/docs/source/conf.py b/docs/source/conf.py index 33d39e4..7a246ae 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,64 +12,64 @@ import os import sys -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- -project = 'TexTeller' -copyright = '2025, TexTeller Team' -author = 'TexTeller Team' +project = "TexTeller" +copyright = "2025, TexTeller Team" +author = "TexTeller Team" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ - 'myst_parser', - 'sphinx.ext.duration', - 'sphinx.ext.intersphinx', - 'sphinx.ext.autosectionlabel', - 'sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon', - 'sphinx.ext.autosummary', - 'sphinx_copybutton', + "myst_parser", + "sphinx.ext.duration", + "sphinx.ext.intersphinx", + "sphinx.ext.autosectionlabel", + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.autosummary", + "sphinx_copybutton", # 'sphinx.ext.linkcode', # 'sphinxarg.ext', - 'sphinx_design', - 'nbsphinx', + "sphinx_design", + "nbsphinx", ] -templates_path = ['_templates'] +templates_path = ["_templates"] exclude_patterns = [] # Autodoc settings -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" add_module_names = False -autoclass_content = 'both' +autoclass_content = "both" autodoc_default_options = { - 'members': True, - 'member-order': 'bysource', - 'undoc-members': True, - 'show-inheritance': True, - 'imported-members': True, + "members": True, + "member-order": "bysource", + "undoc-members": True, + "show-inheritance": True, + "imported-members": True, } # Intersphinx settings intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), - 'numpy': ('https://numpy.org/doc/stable', None), - 'torch': ('https://pytorch.org/docs/stable', None), - 'transformers': ('https://huggingface.co/docs/transformers/main/en', None), + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable", None), + "torch": ("https://pytorch.org/docs/stable", None), + "transformers": ("https://huggingface.co/docs/transformers/main/en", None), } -html_theme = 'sphinx_book_theme' +html_theme = "sphinx_book_theme" html_theme_options = { - 'repository_url': 'https://github.com/OleehyO/TexTeller', - 'use_repository_button': True, - 'use_issues_button': True, - 'use_edit_page_button': True, - 'use_download_button': True, + "repository_url": "https://github.com/OleehyO/TexTeller", + "use_repository_button": True, + "use_issues_button": True, + "use_edit_page_button": True, + "use_download_button": True, } html_logo = "../../assets/logo.svg" diff --git a/examples/client_demo.py b/examples/client_demo.py index a6445ad..cb3b76f 100644 --- a/examples/client_demo.py +++ b/examples/client_demo.py @@ -3,8 +3,8 @@ import requests server_url = "http://127.0.0.1:8000/predict" img_path = "/path/to/your/image" -with open(img_path, 'rb') as img: - files = {'img': img} +with open(img_path, "rb") as img: + files = {"img": img} response = requests.post(server_url, files=files) print(response.text) diff --git a/texteller/api/format.py b/texteller/api/format.py index 0b76bc8..5e88981 100644 --- a/texteller/api/format.py +++ b/texteller/api/format.py @@ -19,8 +19,8 @@ TEXT_LINE_START = "" COMMENT_LINE_START = "% " # Opening and closing delimiters -OPENS = ['{', '(', '['] -CLOSES = ['}', ')', ']'] +OPENS = ["{", "(", "["] +CLOSES = ["}", ")", "]"] # Names of LaTeX verbatim environments VERBATIMS = ["verbatim", "Verbatim", "lstlisting", "minted", "comment"] @@ -138,7 +138,7 @@ class Pattern: contains_env_end=ENV_END in s, contains_item=ITEM in s, contains_splitting=True, - contains_comment='%' in s, + contains_comment="%" in s, ) else: return cls( @@ -146,7 +146,7 @@ class Pattern: contains_env_end=False, contains_item=False, contains_splitting=False, - contains_comment='%' in s, + contains_comment="%" in s, ) @@ -169,11 +169,11 @@ def find_comment_index(line: str, pattern: Pattern) -> Optional[int]: in_command = False for i, c in enumerate(line): - if c == '\\': + if c == "\\": in_command = True elif in_command and not c.isalpha(): in_command = False - elif c == '%' and not in_command: + elif c == "%" and not in_command: return i return None @@ -390,10 +390,10 @@ def find_wrap_point(line: str, indent_length: int, args: Args) -> Optional[int]: line_width += 1 if line_width > wrap_boundary and wrap_point is not None: break - if c == ' ' and prev_char != '\\': + if c == " " and prev_char != "\\": if after_char: wrap_point = i - elif c != '%': + elif c != "%": after_char = True prev_char = c @@ -483,8 +483,8 @@ def split_line(line: str, state: State, file: str, args: Args, logs: List[Log]) if not match: return line, "" - prev = match.group('prev') - rest = match.group('env') + prev = match.group("prev") + rest = match.group("env") if args.verbosity >= 3: # Trace level logs.append( @@ -517,8 +517,8 @@ def clean_text(text: str, args: Args) -> str: text = RE_NEWLINES.sub(f"{LINE_END}{LINE_END}", text) # Remove tabs if they shouldn't be used - if args.tabchar != '\t': - text = text.replace('\t', ' ' * args.tabsize) + if args.tabchar != "\t": + text = text.replace("\t", " " * args.tabsize) # Remove trailing spaces text = RE_TRAIL.sub(LINE_END, text) @@ -577,7 +577,7 @@ def _format_latex(old_text: str, file: str, args: Args) -> Tuple[str, List[Log]] new_text = "" # Select the character used for indentation - indent_char = '\t' if args.tabchar == '\t' else ' ' + indent_char = "\t" if args.tabchar == "\t" else " " # Get any extra environments to be indented as lists lists_begin = [f"\\begin{{{l}}}" for l in args.lists] diff --git a/texteller/api/katex.py b/texteller/api/katex.py index 83eefdf..81c350c 100644 --- a/texteller/api/katex.py +++ b/texteller/api/katex.py @@ -5,13 +5,13 @@ from .format import format_latex def _rm_dollar_surr(content): - pattern = re.compile(r'\\[a-zA-Z]+\$.*?\$|\$.*?\$') + pattern = re.compile(r"\\[a-zA-Z]+\$.*?\$|\$.*?\$") matches = pattern.findall(content) for match in matches: - if not re.match(r'\\[a-zA-Z]+', match): - new_match = match.strip('$') - content = content.replace(match, ' ' + new_match + ' ') + if not re.match(r"\\[a-zA-Z]+", match): + new_match = match.strip("$") + content = content.replace(match, " " + new_match + " ") return content @@ -33,97 +33,97 @@ def to_katex(formula: str) -> str: """ res = formula # remove mbox surrounding - res = change_all(res, r'\mbox ', r' ', r'{', r'}', r'', r'') - res = change_all(res, r'\mbox', r' ', r'{', r'}', r'', r'') + res = change_all(res, r"\mbox ", r" ", r"{", r"}", r"", r"") + res = change_all(res, r"\mbox", r" ", r"{", r"}", r"", r"") # remove hbox surrounding - res = re.sub(r'\\hbox to ?-? ?\d+\.\d+(pt)?\{', r'\\hbox{', res) - res = change_all(res, r'\hbox', r' ', r'{', r'}', r'', r' ') + res = re.sub(r"\\hbox to ?-? ?\d+\.\d+(pt)?\{", r"\\hbox{", res) + res = change_all(res, r"\hbox", r" ", r"{", r"}", r"", r" ") # remove raise surrounding - res = re.sub(r'\\raise ?-? ?\d+\.\d+(pt)?', r' ', res) + res = re.sub(r"\\raise ?-? ?\d+\.\d+(pt)?", r" ", res) # remove makebox - res = re.sub(r'\\makebox ?\[\d+\.\d+(pt)?\]\{', r'\\makebox{', res) - res = change_all(res, r'\makebox', r' ', r'{', r'}', r'', r' ') + res = re.sub(r"\\makebox ?\[\d+\.\d+(pt)?\]\{", r"\\makebox{", res) + res = change_all(res, r"\makebox", r" ", r"{", r"}", r"", r" ") # remove vbox surrounding, scalebox surrounding - res = re.sub(r'\\raisebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\raisebox{', res) - res = re.sub(r'\\scalebox\{-? ?\d+\.\d+(pt)?\}\{', r'\\scalebox{', res) - res = change_all(res, r'\scalebox', r' ', r'{', r'}', r'', r' ') - res = change_all(res, r'\raisebox', r' ', r'{', r'}', r'', r' ') - res = change_all(res, r'\vbox', r' ', r'{', r'}', r'', r' ') + res = re.sub(r"\\raisebox\{-? ?\d+\.\d+(pt)?\}\{", r"\\raisebox{", res) + res = re.sub(r"\\scalebox\{-? ?\d+\.\d+(pt)?\}\{", r"\\scalebox{", res) + res = change_all(res, r"\scalebox", r" ", r"{", r"}", r"", r" ") + res = change_all(res, r"\raisebox", r" ", r"{", r"}", r"", r" ") + res = change_all(res, r"\vbox", r" ", r"{", r"}", r"", r" ") origin_instructions = [ - r'\Huge', - r'\huge', - r'\LARGE', - r'\Large', - r'\large', - r'\normalsize', - r'\small', - r'\footnotesize', - r'\tiny', + r"\Huge", + r"\huge", + r"\LARGE", + r"\Large", + r"\large", + r"\normalsize", + r"\small", + r"\footnotesize", + r"\tiny", ] for old_ins, new_ins in zip(origin_instructions, origin_instructions): - res = change_all(res, old_ins, new_ins, r'$', r'$', '{', '}') - res = change_all(res, r'\mathbf', r'\bm', r'{', r'}', r'{', r'}') - res = change_all(res, r'\boldmath ', r'\bm', r'{', r'}', r'{', r'}') - res = change_all(res, r'\boldmath', r'\bm', r'{', r'}', r'{', r'}') - res = change_all(res, r'\boldmath ', r'\bm', r'$', r'$', r'{', r'}') - res = change_all(res, r'\boldmath', r'\bm', r'$', r'$', r'{', r'}') - res = change_all(res, r'\scriptsize', r'\scriptsize', r'$', r'$', r'{', r'}') - res = change_all(res, r'\emph', r'\textit', r'{', r'}', r'{', r'}') - res = change_all(res, r'\emph ', r'\textit', r'{', r'}', r'{', r'}') + res = change_all(res, old_ins, new_ins, r"$", r"$", "{", "}") + res = change_all(res, r"\mathbf", r"\bm", r"{", r"}", r"{", r"}") + res = change_all(res, r"\boldmath ", r"\bm", r"{", r"}", r"{", r"}") + res = change_all(res, r"\boldmath", r"\bm", r"{", r"}", r"{", r"}") + res = change_all(res, r"\boldmath ", r"\bm", r"$", r"$", r"{", r"}") + res = change_all(res, r"\boldmath", r"\bm", r"$", r"$", r"{", r"}") + res = change_all(res, r"\scriptsize", r"\scriptsize", r"$", r"$", r"{", r"}") + res = change_all(res, r"\emph", r"\textit", r"{", r"}", r"{", r"}") + res = change_all(res, r"\emph ", r"\textit", r"{", r"}", r"{", r"}") # remove bold command - res = change_all(res, r'\bm', r' ', r'{', r'}', r'', r'') + res = change_all(res, r"\bm", r" ", r"{", r"}", r"", r"") origin_instructions = [ - r'\left', - r'\middle', - r'\right', - r'\big', - r'\Big', - r'\bigg', - r'\Bigg', - r'\bigl', - r'\Bigl', - r'\biggl', - r'\Biggl', - r'\bigm', - r'\Bigm', - r'\biggm', - r'\Biggm', - r'\bigr', - r'\Bigr', - r'\biggr', - r'\Biggr', + r"\left", + r"\middle", + r"\right", + r"\big", + r"\Big", + r"\bigg", + r"\Bigg", + r"\bigl", + r"\Bigl", + r"\biggl", + r"\Biggl", + r"\bigm", + r"\Bigm", + r"\biggm", + r"\Biggm", + r"\bigr", + r"\Bigr", + r"\biggr", + r"\Biggr", ] for origin_ins in origin_instructions: - res = change_all(res, origin_ins, origin_ins, r'{', r'}', r'', r'') + res = change_all(res, origin_ins, origin_ins, r"{", r"}", r"", r"") - res = re.sub(r'\\\[(.*?)\\\]', r'\1\\newline', res) + res = re.sub(r"\\\[(.*?)\\\]", r"\1\\newline", res) - if res.endswith(r'\newline'): + if res.endswith(r"\newline"): res = res[:-8] # remove multiple spaces - res = re.sub(r'(\\,){1,}', ' ', res) - res = re.sub(r'(\\!){1,}', ' ', res) - res = re.sub(r'(\\;){1,}', ' ', res) - res = re.sub(r'(\\:){1,}', ' ', res) - res = re.sub(r'\\vspace\{.*?}', '', res) + res = re.sub(r"(\\,){1,}", " ", res) + res = re.sub(r"(\\!){1,}", " ", res) + res = re.sub(r"(\\;){1,}", " ", res) + res = re.sub(r"(\\:){1,}", " ", res) + res = re.sub(r"\\vspace\{.*?}", "", res) # merge consecutive text def merge_texts(match): texts = match.group(0) - merged_content = ''.join(re.findall(r'\\text\{([^}]*)\}', texts)) - return f'\\text{{{merged_content}}}' + merged_content = "".join(re.findall(r"\\text\{([^}]*)\}", texts)) + return f"\\text{{{merged_content}}}" - res = re.sub(r'(\\text\{[^}]*\}\s*){2,}', merge_texts, res) + res = re.sub(r"(\\text\{[^}]*\}\s*){2,}", merge_texts, res) - res = res.replace(r'\bf ', '') + res = res.replace(r"\bf ", "") res = _rm_dollar_surr(res) # remove extra spaces (keeping only one) - res = re.sub(r' +', ' ', res) + res = re.sub(r" +", " ", res) # format latex res = res.strip() diff --git a/texteller/models/__init__.py b/texteller/models/__init__.py index 1beda4e..f0cf366 100644 --- a/texteller/models/__init__.py +++ b/texteller/models/__init__.py @@ -1,3 +1,3 @@ from .texteller import TexTeller -__all__ = ['TexTeller'] +__all__ = ["TexTeller"] diff --git a/texteller/utils/image.py b/texteller/utils/image.py index cc50a3c..3af5000 100644 --- a/texteller/utils/image.py +++ b/texteller/utils/image.py @@ -41,7 +41,7 @@ def readimgs(image_paths: list[str]) -> list[np.ndarray]: if image is None: raise ValueError(f"Image at {path} could not be read.") if image.dtype == np.uint16: - _logger.warning(f'Converting {path} to 8-bit, image may be lossy.') + _logger.warning(f"Converting {path} to 8-bit, image may be lossy.") image = cv2.convertScaleAbs(image, alpha=(255.0 / 65535.0)) channels = 1 if len(image.shape) == 2 else image.shape[2] @@ -112,7 +112,7 @@ def transform(images: List[Union[np.ndarray, Image.Image]]) -> List[torch.Tensor assert IMG_CHANNELS == 1, "Only support grayscale images for now" images = [ - np.array(img.convert('RGB')) if isinstance(img, Image.Image) else img for img in images + np.array(img.convert("RGB")) if isinstance(img, Image.Image) else img for img in images ] images = [trim_white_border(image) for image in images] images = [general_transform_pipeline(image) for image in images] diff --git a/texteller/utils/latex.py b/texteller/utils/latex.py index d778924..85c729a 100644 --- a/texteller/utils/latex.py +++ b/texteller/utils/latex.py @@ -21,7 +21,7 @@ def _change(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, n j = start + 1 escaped = False while j < n and count > 0: - if input_str[j] == '\\' and not escaped: + if input_str[j] == "\\" and not escaped: escaped = True j += 1 continue @@ -71,10 +71,10 @@ def change_all(input_str, old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l for p in pos[::-1]: res[p:] = list( _change( - ''.join(res[p:]), old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r + "".join(res[p:]), old_inst, new_inst, old_surr_l, old_surr_r, new_surr_l, new_surr_r ) ) - res = ''.join(res) + res = "".join(res) return res @@ -121,7 +121,7 @@ def add_newlines(latex_str: str) -> str: # 4. Cleanup: Collapse multiple consecutive newlines into a single newline. # This handles cases where the replacements above might have created \n\n. - processed_str = re.sub(r'\n{2,}', '\n', processed_str) + processed_str = re.sub(r"\n{2,}", "\n", processed_str) # Remove leading/trailing whitespace (including potential single newlines # at the very start/end resulting from the replacements) from the entire result.