fix: markdown post handel
This commit is contained in:
@@ -419,6 +419,7 @@ class Converter:
|
||||
|
||||
# Step 7: Decode common Unicode entities to actual characters (Word prefers this)
|
||||
unicode_map = {
|
||||
# Basic operators
|
||||
'+': '+',
|
||||
'-': '-',
|
||||
'*': '*',
|
||||
@@ -431,30 +432,177 @@ class Converter:
|
||||
',': ',',
|
||||
'.': '.',
|
||||
'|': '|',
|
||||
'…': '⋯',
|
||||
'⋮': '⋮',
|
||||
'⋯': '⋯',
|
||||
'°': '°',
|
||||
'γ': 'γ',
|
||||
'φ': 'φ',
|
||||
'ϕ': 'ϕ',
|
||||
'α': 'α',
|
||||
'β': 'β',
|
||||
'δ': 'δ',
|
||||
'ε': 'ε',
|
||||
'θ': 'θ',
|
||||
'λ': 'λ',
|
||||
'μ': 'μ',
|
||||
'π': 'π',
|
||||
'ρ': 'ρ',
|
||||
'σ': 'σ',
|
||||
'τ': 'τ',
|
||||
'ω': 'ω',
|
||||
'×': '×', # times
|
||||
'÷': '÷', # div
|
||||
'±': '±', # pm
|
||||
'∓': '∓', # mp
|
||||
|
||||
# Ellipsis symbols
|
||||
'…': '…', # ldots (horizontal)
|
||||
'⋮': '⋮', # vdots (vertical)
|
||||
'⋯': '⋯', # cdots (centered)
|
||||
'⋰': '⋰', # iddots (diagonal up)
|
||||
'⋱': '⋱', # ddots (diagonal down)
|
||||
|
||||
# Greek letters (lowercase)
|
||||
'α': 'α', # alpha
|
||||
'β': 'β', # beta
|
||||
'γ': 'γ', # gamma
|
||||
'δ': 'δ', # delta
|
||||
'ε': 'ε', # epsilon
|
||||
'ζ': 'ζ', # zeta
|
||||
'η': 'η', # eta
|
||||
'θ': 'θ', # theta
|
||||
'ι': 'ι', # iota
|
||||
'κ': 'κ', # kappa
|
||||
'λ': 'λ', # lambda
|
||||
'μ': 'μ', # mu
|
||||
'ν': 'ν', # nu
|
||||
'ξ': 'ξ', # xi
|
||||
'ο': 'ο', # omicron
|
||||
'π': 'π', # pi
|
||||
'ρ': 'ρ', # rho
|
||||
'ς': 'ς', # final sigma
|
||||
'σ': 'σ', # sigma
|
||||
'τ': 'τ', # tau
|
||||
'υ': 'υ', # upsilon
|
||||
'φ': 'φ', # phi
|
||||
'χ': 'χ', # chi
|
||||
'ψ': 'ψ', # psi
|
||||
'ω': 'ω', # omega
|
||||
'ϕ': 'ϕ', # phi variant
|
||||
|
||||
# Greek letters (uppercase)
|
||||
'Α': 'Α', # Alpha
|
||||
'Β': 'Β', # Beta
|
||||
'Γ': 'Γ', # Gamma
|
||||
'Δ': 'Δ', # Delta
|
||||
'Ε': 'Ε', # Epsilon
|
||||
'Ζ': 'Ζ', # Zeta
|
||||
'Η': 'Η', # Eta
|
||||
'Θ': 'Θ', # Theta
|
||||
'Ι': 'Ι', # Iota
|
||||
'Κ': 'Κ', # Kappa
|
||||
'Λ': 'Λ', # Lambda
|
||||
'Μ': 'Μ', # Mu
|
||||
'Ν': 'Ν', # Nu
|
||||
'Ξ': 'Ξ', # Xi
|
||||
'Ο': 'Ο', # Omicron
|
||||
'Π': 'Π', # Pi
|
||||
'Ρ': 'Ρ', # Rho
|
||||
'Σ': 'Σ', # Sigma
|
||||
'Τ': 'Τ', # Tau
|
||||
'Υ': 'Υ', # Upsilon
|
||||
'Φ': 'Φ', # Phi
|
||||
'Χ': 'Χ', # Chi
|
||||
'Ψ': 'Ψ', # Psi
|
||||
'Ω': 'Ω', # Omega
|
||||
|
||||
# Math symbols
|
||||
'∅': '∅', # emptyset
|
||||
'∈': '∈', # in
|
||||
'∉': '∉', # notin
|
||||
'∋': '∋', # ni
|
||||
'∌': '∌', # nni
|
||||
'∑': '∑', # sum
|
||||
'∏': '∏', # prod
|
||||
'√': '√', # sqrt
|
||||
'∛': '∛', # cbrt
|
||||
'∜': '∜', # fourthroot
|
||||
'∞': '∞', # infty
|
||||
'∩': '∩', # cap
|
||||
'∪': '∪', # cup
|
||||
'∫': '∫', # int
|
||||
'∬': '∬', # iint
|
||||
'∭': '∭', # iiint
|
||||
'∮': '∮', # oint
|
||||
'⊂': '⊂', # subset
|
||||
'⊃': '⊃', # supset
|
||||
'⊄': '⊄', # nsubset
|
||||
'⊅': '⊅', # nsupset
|
||||
'⊆': '⊆', # subseteq
|
||||
'⊇': '⊇', # supseteq
|
||||
'⊈': '⊈', # nsubseteq
|
||||
'⊉': '⊉', # nsupseteq
|
||||
'≤': '≤', # leq
|
||||
'≥': '≥', # geq
|
||||
'≠': '≠', # neq
|
||||
'≡': '≡', # equiv
|
||||
'≈': '≈', # approx
|
||||
'≃': '≃', # simeq
|
||||
'≅': '≅', # cong
|
||||
'∂': '∂', # partial
|
||||
'∇': '∇', # nabla
|
||||
'∀': '∀', # forall
|
||||
'∃': '∃', # exists
|
||||
'∄': '∄', # nexists
|
||||
'¬': '¬', # neg/lnot
|
||||
'∧': '∧', # wedge/land
|
||||
'∨': '∨', # vee/lor
|
||||
'→': '→', # to/rightarrow
|
||||
'←': '←', # leftarrow
|
||||
'↔': '↔', # leftrightarrow
|
||||
'⇒': '⇒', # Rightarrow
|
||||
'⇐': '⇐', # Leftarrow
|
||||
'⇔': '⇔', # Leftrightarrow
|
||||
'↑': '↑', # uparrow
|
||||
'↓': '↓', # downarrow
|
||||
'⇑': '⇑', # Uparrow
|
||||
'⇓': '⇓', # Downarrow
|
||||
'↕': '↕', # updownarrow
|
||||
'⇕': '⇕', # Updownarrow
|
||||
'≠': '≠', # ne
|
||||
'≪': '≪', # ll
|
||||
'≫': '≫', # gg
|
||||
'⩽': '⩽', # leqslant
|
||||
'⩾': '⩾', # geqslant
|
||||
'⊥': '⊥', # perp
|
||||
'∥': '∥', # parallel
|
||||
'∠': '∠', # angle
|
||||
'△': '△', # triangle
|
||||
'□': '□', # square
|
||||
'◊': '◊', # diamond
|
||||
'♠': '♠', # spadesuit
|
||||
'♡': '♡', # heartsuit
|
||||
'♢': '♢', # diamondsuit
|
||||
'♣': '♣', # clubsuit
|
||||
'ℓ': 'ℓ', # ell
|
||||
'℘': '℘', # wp (Weierstrass p)
|
||||
'ℜ': 'ℜ', # Re (real part)
|
||||
'ℑ': 'ℑ', # Im (imaginary part)
|
||||
'ℵ': 'ℵ', # aleph
|
||||
'ℶ': 'ℶ', # beth
|
||||
}
|
||||
|
||||
for entity, char in unicode_map.items():
|
||||
mathml = mathml.replace(entity, char)
|
||||
|
||||
# Also handle decimal entity format (&#NNNN;) for common characters
|
||||
# Convert decimal to hex-based lookup
|
||||
decimal_patterns = [
|
||||
(r'λ', 'λ'), # lambda (decimal 955 = hex 03BB)
|
||||
(r'⋮', '⋮'), # vdots (decimal 8942 = hex 22EE)
|
||||
(r'⋯', '⋯'), # cdots (decimal 8943 = hex 22EF)
|
||||
(r'…', '…'), # ldots (decimal 8230 = hex 2026)
|
||||
(r'∞', '∞'), # infty (decimal 8734 = hex 221E)
|
||||
(r'∑', '∑'), # sum (decimal 8721 = hex 2211)
|
||||
(r'∏', '∏'), # prod (decimal 8719 = hex 220F)
|
||||
(r'√', '√'), # sqrt (decimal 8730 = hex 221A)
|
||||
(r'∈', '∈'), # in (decimal 8712 = hex 2208)
|
||||
(r'∉', '∉'), # notin (decimal 8713 = hex 2209)
|
||||
(r'∩', '∩'), # cap (decimal 8745 = hex 2229)
|
||||
(r'∪', '∪'), # cup (decimal 8746 = hex 222A)
|
||||
(r'≤', '≤'), # leq (decimal 8804 = hex 2264)
|
||||
(r'≥', '≥'), # geq (decimal 8805 = hex 2265)
|
||||
(r'≠', '≠'), # neq (decimal 8800 = hex 2260)
|
||||
(r'≈', '≈'), # approx (decimal 8776 = hex 2248)
|
||||
(r'≡', '≡'), # equiv (decimal 8801 = hex 2261)
|
||||
]
|
||||
|
||||
for pattern, char in decimal_patterns:
|
||||
mathml = mathml.replace(pattern, char)
|
||||
|
||||
# Step 8: Clean up extra whitespace
|
||||
mathml = re.sub(r'>\s+<', '><', mathml)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user