diff --git a/.gitignore b/.gitignore index 62d5992..e639ee4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,28 +1,172 @@ -**/.DS_Store -**/__pycache__ -**/.vscode -**/pyrightconfig.json +# Created by https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode,pycharm +# Edit at https://www.toptal.com/developers/gitignore?templates=macos,visualstudiocode,pycharm -**/dist -**/build -*.egg-info +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode,pycharm + +uv.lock **/train_result -**/ckpt -**/ckpts -**/*.safetensor -**/trocr-* -**/large*.onnx -**/rtdetr_r50vd_6x_coco.onnx - -**/*cache -**/.cache - -**/tmp -**/tmp* -**/log -**/logs - -**/data - -**/*.bin \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ac95090 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.5 + hooks: + - id: ruff + args: [--fix, --respect-gitignore, --config=pyproject.toml] + - id: ruff-format + args: [--config=pyproject.toml] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: debug-statements diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..c8cfe39 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..be171e6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,51 @@ +[project] +name = "texteller" +authors = [ + { name="OleehyO", email="leehy0357@gmail.com" } +] +version = "0.1.2" +description = "Texteller is a tool for converting latex image to text" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "accelerate>=1.4.0", + "augraphy>=8.2.6", + "datasets>=3.3.2", + "evaluate>=0.4.3", + "onnx>=1.17.0", + "onnxruntime>=1.20.1", + "onnxruntime-gpu>=1.20.1 ; sys_platform == 'linux'", + "opencv-python>=4.11.0.86", + "optimum>=1.24.0", + "pyclipper>=1.3.0.post6", + "shapely>=2.0.7", + "streamlit>=1.42.2", + "streamlit-paste-button>=0.1.2", + "tensorboardx>=2.6.2.2", + "torch>=2.6.0", + "torchvision>=0.21.0", + "transformers==4.45.2", +] + +[tool.ruff] +exclude = ['.git', '.mypy_cache', '.ruff_cache', '.venv', 'dist'] +target-version = 'py310' +line-length = 100 + +[tool.ruff.format] +line-ending = 'lf' +quote-style = 'preserve' + +[tool.ruff.lint] +select = ["E", "W"] +ignore = [ + "E999", + "EXE001", + "UP009", + "F401", + "TID252", + "F403", + "F841", + "E501", + "W293", +] diff --git a/setup_platform.py b/setup_platform.py new file mode 100644 index 0000000..03adc33 --- /dev/null +++ b/setup_platform.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +""" +Platform-specific setup script for TexTeller. +This script detects the platform and installs the appropriate version of onnxruntime. +""" + +import platform +import subprocess +import sys + + +def install_platform_specific_dependencies(): + """Install the appropriate version of onnxruntime based on the platform.""" + system = platform.system().lower() + + if system == "linux": + print("Linux detected. Installing onnxruntime-gpu...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "onnxruntime-gpu>=1.20.0"]) + else: + print(f"{system.capitalize()} detected. Installing onnxruntime...") + subprocess.check_call([sys.executable, "-m", "pip", "install", "onnxruntime>=1.20.0"]) + + print("Platform-specific dependencies installed successfully.") + + +if __name__ == "__main__": + install_platform_specific_dependencies()