diff --git a/html4docx/h4d.py b/html4docx/h4d.py
index 85b992a..603dc8f 100644
--- a/html4docx/h4d.py
+++ b/html4docx/h4d.py
@@ -2,30 +2,30 @@
import logging
import os
import re
-from io import BytesIO
+from functools import lru_cache
from html.parser import HTMLParser
-from typing import Dict, Any
+from io import BytesIO
+from typing import Any, Dict
import docx
from bs4 import BeautifulSoup
from docx import Document
-from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_ALIGN_VERTICAL
+from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import RGBColor
-from functools import lru_cache
-
-from html4docx import constants
-from html4docx import utils
+from html4docx import constants, utils
from html4docx.metadata import Metadata
+
class HtmlToDocx(HTMLParser):
"""
- Class to convert HTML to Docx
- source: https://docs.python.org/3/library/html.parser.html
+ Class to convert HTML to Docx
+ source: https://docs.python.org/3/library/html.parser.html
"""
+
def __init__(self, style_map=None, tag_style_overrides=None, default_paragraph_style="Normal"):
super().__init__()
self.options = dict(constants.DEFAULT_OPTIONS)
@@ -38,11 +38,11 @@ def __init__(self, style_map=None, tag_style_overrides=None, default_paragraph_s
def set_initial_attrs(self, document=None):
self.tags = {
- 'span': [],
- 'list': [],
+ "span": [],
+ "list": [],
}
self.doc = document if document else Document()
- self.bs = self.options['fix-html'] # whether or not to clean with BeautifulSoup
+ self.bs = self.options["fix-html"] # whether or not to clean with BeautifulSoup
self.paragraph = None
self.run = None
self.skip = False
@@ -66,25 +66,25 @@ def set_initial_attrs(self, document=None):
@property
def metadata(self) -> Dict[str, Any]:
- if not hasattr(self, '_metadata'):
+ if not hasattr(self, "_metadata"):
self._metadata = Metadata(self.doc)
return self._metadata
@property
def include_tables(self) -> bool:
- return self.options.get('tables', True)
+ return self.options.get("tables", True)
@property
def include_images(self) -> bool:
- return self.options.get('images', True)
+ return self.options.get("images", True)
@property
def include_styles(self) -> bool:
- return self.options.get('styles', True)
+ return self.options.get("styles", True)
@property
def include_html_comments(self) -> bool:
- return self.options.get('html-comments', False)
+ return self.options.get("html-comments", False)
@property
def include_stylemap(self) -> bool:
@@ -98,11 +98,11 @@ def save(self, destination) -> None:
"""Save the document to a file path or BytesIO object."""
if isinstance(destination, str):
destination, _ = os.path.splitext(destination)
- self.doc.save(f'{destination}.docx')
+ self.doc.save(f"{destination}.docx")
elif isinstance(destination, BytesIO):
self.doc.save(destination)
else:
- raise TypeError('destination must be a str path or BytesIO object')
+ raise TypeError("destination must be a str path or BytesIO object")
def copy_settings_from(self, other):
"""Copy settings from another instance of HtmlToDocx"""
@@ -160,9 +160,7 @@ def apply_style_to_paragraph(self, paragraph, style_name):
return True
except KeyError:
# Style doesn't exist in document
- print(
- f"Warning: Style '{style_name}' not found in document. Using default."
- )
+ print(f"Warning: Style '{style_name}' not found in document. Using default.")
return False
def apply_style_to_run(self, style_name):
@@ -184,12 +182,8 @@ def apply_style_to_run(self, style_name):
return False
except ValueError as e:
if "need type CHARACTER" in str(e):
- print(
- f"Warning: '{style_name}' is a paragraph style, not a character style."
- )
- print(
- "For inline elements like Line 1 with P tags Line 1 with P tags Line 0 with p tags Line 0 with p tags Add HTML to non-empty cell. Add HTML to non-empty cell. This is a sentence that contains This is a sentence that contains paragraph paragraph paragraph paragraph Click here: Link to intro, please create a character style in Word."
- )
+ print(f"Warning: '{style_name}' is a paragraph style, not a character style.")
+ print("For inline elements like , please create a character style in Word.")
return False
def parse_inline_styles(self, style_string):
@@ -271,9 +265,7 @@ def apply_inline_styles_to_run(self, styles_dict):
# Apply font-family
if "font-family" in styles_dict:
- font_family = (
- styles_dict["font-family"].split(",")[0].strip().strip('"').strip("'")
- )
+ font_family = styles_dict["font-family"].split(",")[0].strip().strip('"').strip("'")
self.run.font.name = font_family
def get_cell_html(self, soup):
@@ -282,14 +274,14 @@ def get_cell_html(self, soup):
Cannot use find_all as it only finds element tags and does not find text which
is not inside an element
"""
- return ' '.join([str(i) for i in soup.contents])
+ return " ".join([str(i) for i in soup.contents])
def set_cell_background(self, cell, color):
"""Set the background color of a table cell."""
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
- shd = OxmlElement('w:shd')
- shd.set(qn('w:fill'), color.lstrip('#'))
+ shd = OxmlElement("w:shd")
+ shd.set(qn("w:fill"), color.lstrip("#"))
tcPr.append(shd)
def set_cell_borders(self, cell, styles):
@@ -314,11 +306,11 @@ def set_cell_borders(self, cell, styles):
border_styles = constants.BORDER_STYLES
keywords = constants.BORDER_KEYWORDS
border_sides = ("top", "right", "bottom", "left")
- border_width_pattern = re.compile(r'^[0-9]*\.?[0-9]+(px|pt|cm|in|rem|em|%)$')
+ border_width_pattern = re.compile(r"^[0-9]*\.?[0-9]+(px|pt|cm|in|rem|em|%)$")
def parse_border_style(value: str) -> str:
"""Parses border styles to match word standart"""
- return constants.BORDER_STYLES[value] if value in constants.BORDER_STYLES.keys() else 'none'
+ return constants.BORDER_STYLES[value] if value in constants.BORDER_STYLES.keys() else "none"
def check_unit_keywords(value: str) -> str:
"""Convert medium, thin, thick keywords to numeric values (px)"""
@@ -332,23 +324,23 @@ def border_unit_converter(unit_value: str):
unit_value = check_unit_keywords(unit_value)
# Return default if no value or empty
- if not unit_value or unit_value == '':
+ if not unit_value or unit_value == "":
return default_size
- unit = re.sub(r'[0-9\.]+', '', unit_value)
- value = float(re.sub(r'[a-zA-Z\!\%]+', '', unit_value)) # Allow float values
+ unit = re.sub(r"[0-9\.]+", "", unit_value)
+ value = float(re.sub(r"[a-zA-Z\!\%]+", "", unit_value)) # Allow float values
- if unit == 'px':
+ if unit == "px":
result = value * 0.75 # 1 px = 0.75 pt
- elif unit == 'cm':
+ elif unit == "cm":
result = value * 28.35 # 1 cm = 28.35 pt
- elif unit == 'in':
+ elif unit == "in":
result = value * 72 # 1 inch = 72 pt
- elif unit == 'pt':
- result = value # default is pt
- elif unit == 'rem' or unit == 'em':
+ elif unit == "pt":
+ result = value # default is pt
+ elif unit == "rem" or unit == "em":
result = value * 12 # Assuming 1rem/em = 16px, converted to pt
- elif unit == '%':
+ elif unit == "%":
result = constants.MAX_INDENT * (value / 100)
else:
return None # Unsupported units return None
@@ -363,7 +355,7 @@ def parse_border_value(value: str):
parts = value.split()
# Return all default if there is only 'none' or empty
- if (len(parts) == 1 and parts[0] == 'none') or (not value or value.strip() == ''):
+ if (len(parts) == 1 and parts[0] == "none") or (not value or value.strip() == ""):
return default_size, default_style, default_color
size = None
@@ -452,9 +444,9 @@ def parse_border_value(value: str):
borders[side].update({"size": size, "style": style, "color": color})
# Check if w:tcBorders exists, otherwise create it
- tcBorders = tcPr.first_child_found_in('w:tcBorders')
+ tcBorders = tcPr.first_child_found_in("w:tcBorders")
if tcBorders is None:
- tcBorders = OxmlElement('w:tcBorders')
+ tcBorders = OxmlElement("w:tcBorders")
tcPr.append(tcBorders)
# Apply borders to the cell
@@ -463,17 +455,17 @@ def parse_border_value(value: str):
border = OxmlElement(f"w:{side}")
border.set(qn("w:val"), border_info["style"]) # Set border style
border.set(qn("w:sz"), str(border_info["size"] * 8)) # Word uses eighths of a point
- border.set(qn("w:color"), border_info["color"].replace('#', '')) # Set border color
+ border.set(qn("w:color"), border_info["color"].replace("#", "")) # Set border color
tcBorders.append(border)
def add_bookmark(self, bookmark_name):
"""Adds a word bookmark to an existing paragraph"""
- bookmark_start = OxmlElement('w:bookmarkStart')
- bookmark_start.set(qn('w:id'), str(self.bookmark_id))
- bookmark_start.set(qn('w:name'), bookmark_name)
+ bookmark_start = OxmlElement("w:bookmarkStart")
+ bookmark_start.set(qn("w:id"), str(self.bookmark_id))
+ bookmark_start.set(qn("w:name"), bookmark_name)
- bookmark_end = OxmlElement('w:bookmarkEnd')
- bookmark_end.set(qn('w:id'), str(self.bookmark_id))
+ bookmark_end = OxmlElement("w:bookmarkEnd")
+ bookmark_end.set(qn("w:id"), str(self.bookmark_id))
if not self.paragraph:
self.paragraph = self.doc.add_paragraph()
@@ -492,18 +484,14 @@ def apply_styles_to_run(self, run, style, isCustom=False):
return
except ValueError as e:
if "need type CHARACTER" in str(e):
- print(
- f"Warning: '{style}' is a paragraph style, not a character style."
- )
- print(
- "For inline elements like , please create a character style in Word."
- )
-
- if not style or not hasattr(run, 'font'):
+ print(f"Warning: '{style}' is a paragraph style, not a character style.")
+ print("For inline elements like , please create a character style in Word.")
+
+ if not style or not hasattr(run, "font"):
return
# Find current paragraph and run position
- if not hasattr(self, 'paragraph') or self.paragraph is None:
+ if not hasattr(self, "paragraph") or self.paragraph is None:
return
paragraph_id = id(self.paragraph)
@@ -518,17 +506,17 @@ def apply_styles_to_run(self, run, style, isCustom=False):
for style_name, style_value in style.items():
if style_name in constants.RUN_STYLES:
- if style_name.startswith('background-color') and style_value in ('inherit', 'initial'):
+ if style_name.startswith("background-color") and style_value in ("inherit", "initial"):
continue
self.paragraph_span_styles[paragraph_id][run_index].add(style_name)
- if style_name == 'text-decoration':
+ if style_name == "text-decoration":
# If span sets text-decoration shorthand, it conflicts with all text-decoration-* properties
- self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-line')
- self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-style')
- self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-color')
- elif style_name.startswith('text-decoration-'):
+ self.paragraph_span_styles[paragraph_id][run_index].add("text-decoration-line")
+ self.paragraph_span_styles[paragraph_id][run_index].add("text-decoration-style")
+ self.paragraph_span_styles[paragraph_id][run_index].add("text-decoration-color")
+ elif style_name.startswith("text-decoration-"):
pass
for style_name, style_value in style.items():
@@ -536,7 +524,7 @@ def apply_styles_to_run(self, run, style, isCustom=False):
continue
elif style_name in constants.RUN_STYLES:
handler = getattr(self, constants.RUN_STYLES[style_name])
- param_name = style_name.replace('-', '_')
+ param_name = style_name.replace("-", "_")
handler(run=run, **{param_name: style_value})
else:
logging.warning(f"Warning: Unrecognized style '{style_name}', will be skipped.")
@@ -550,7 +538,7 @@ def apply_styles_to_paragraph(self, paragraph, style, isCustom=False):
print(f"Warning: Style '{style}' not found in document. Using default.")
return
- if not style or not hasattr(paragraph, 'paragraph_format'):
+ if not style or not hasattr(paragraph, "paragraph_format"):
return
for style_name, style_value in style.items():
@@ -562,40 +550,35 @@ def apply_styles_to_paragraph(self, paragraph, style, isCustom=False):
logging.warning(f"Warning: Unrecognized paragraph style '{style_name}', will be skipped.")
continue
- handler(
- paragraph=paragraph,
- style_name=style_name,
- value=style_value,
- all_styles=style
- )
+ handler(paragraph=paragraph, style_name=style_name, value=style_value, all_styles=style)
def _apply_alignment_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
align = utils.remove_important_from_style(value)
- if 'center' in align:
+ if "center" in align:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- elif 'left' in align:
+ elif "left" in align:
paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
- elif 'right' in align:
+ elif "right" in align:
paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
- elif 'justify' in align:
+ elif "justify" in align:
paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
def _apply_line_height_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
line_height = utils.remove_important_from_style(value)
- if line_height in ('normal', 'inherit'):
+ if line_height in ("normal", "inherit"):
paragraph.paragraph_format.line_spacing = None
else:
try:
- if line_height.replace('.', '').replace('%', '').isdigit():
- multiplier = float(line_height[:-1]) / 100.0 if line_height.endswith('%') else float(line_height)
+ if line_height.replace(".", "").replace("%", "").isdigit():
+ multiplier = float(line_height[:-1]) / 100.0 if line_height.endswith("%") else float(line_height)
paragraph.paragraph_format.line_spacing = multiplier
else:
converted = utils.unit_converter(line_height, target_unit="pt")
@@ -605,35 +588,35 @@ def _apply_line_height_paragraph(self, **kwargs):
paragraph.paragraph_format.line_spacing = None
def _apply_margins_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- style_name = kwargs['style_name']
- all_styles = kwargs['all_styles']
+ paragraph = kwargs["paragraph"]
+ style_name = kwargs["style_name"]
+ all_styles = kwargs["all_styles"]
- margin_left = all_styles.get('margin-left')
- margin_right = all_styles.get('margin-right')
+ margin_left = all_styles.get("margin-left")
+ margin_right = all_styles.get("margin-right")
if margin_left and margin_right:
- if 'auto' in margin_left and 'auto' in margin_right:
+ if "auto" in margin_left and "auto" in margin_right:
paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
return
- if style_name == 'margin-left' and margin_left and 'auto' not in margin_left:
+ if style_name == "margin-left" and margin_left and "auto" not in margin_left:
paragraph.paragraph_format.left_indent = utils.unit_converter(margin_left)
- if style_name == 'margin-right' and margin_right and 'auto' not in margin_right:
+ if style_name == "margin-right" and margin_right and "auto" not in margin_right:
paragraph.paragraph_format.right_indent = utils.unit_converter(margin_right)
def _apply_text_indent_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
indent_value = utils.remove_important_from_style(value)
paragraph.paragraph_format.first_line_indent = utils.unit_converter(indent_value, target_unit="pt")
def _apply_font_weight_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
font_weight = utils.remove_important_from_style(value).lower()
@@ -641,7 +624,7 @@ def _apply_font_weight_paragraph(self, **kwargs):
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'font-weight' in paragraph_spans[i]:
+ if i in paragraph_spans and "font-weight" in paragraph_spans[i]:
continue
self._apply_font_weight_to_run(
@@ -650,11 +633,11 @@ def _apply_font_weight_paragraph(self, **kwargs):
)
def _apply_font_weight_to_run(self, **kwargs):
- font_weight = kwargs['font_weight']
- run = kwargs['run']
- if font_weight in ('bold', 'bolder', '700', '800', '900'):
+ font_weight = kwargs["font_weight"]
+ run = kwargs["run"]
+ if font_weight in ("bold", "bolder", "700", "800", "900"):
run.font.bold = True
- elif font_weight in ('normal', 'lighter', '400', '300', '100'):
+ elif font_weight in ("normal", "lighter", "400", "300", "100"):
run.font.bold = False
# Note: Decide what to do for values between 400-700
elif font_weight.isdigit():
@@ -662,8 +645,8 @@ def _apply_font_weight_to_run(self, **kwargs):
run.font.bold = weight >= 700
def _apply_font_style_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
font_style = utils.remove_important_from_style(value).lower()
@@ -671,7 +654,7 @@ def _apply_font_style_paragraph(self, **kwargs):
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'font-style' in paragraph_spans[i]:
+ if i in paragraph_spans and "font-style" in paragraph_spans[i]:
continue
self._apply_font_style_to_run(
@@ -680,17 +663,17 @@ def _apply_font_style_paragraph(self, **kwargs):
)
def _apply_font_style_to_run(self, **kwargs):
- font_style = kwargs['font_style']
- run = kwargs['run']
+ font_style = kwargs["font_style"]
+ run = kwargs["run"]
- if font_style in ('italic', 'oblique'):
+ if font_style in ("italic", "oblique"):
run.font.italic = True
- elif font_style == 'normal':
+ elif font_style == "normal":
run.font.italic = False
def _apply_font_size_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
font_size = utils.remove_important_from_style(value).lower()
@@ -701,7 +684,7 @@ def _apply_font_size_paragraph(self, **kwargs):
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'font-size' in paragraph_spans[i]:
+ if i in paragraph_spans and "font-size" in paragraph_spans[i]:
continue
self._apply_font_size_to_run(
@@ -710,14 +693,14 @@ def _apply_font_size_paragraph(self, **kwargs):
)
def _apply_font_size_to_run(self, **kwargs):
- run = kwargs['run']
- font_size = kwargs['font_size']
+ run = kwargs["run"]
+ font_size = kwargs["font_size"]
font_size = utils.remove_important_from_style(font_size).lower()
font_size = utils.adapt_font_size(font_size)
try:
- if font_size in ('normal', 'initial', 'inherit'):
+ if font_size in ("normal", "initial", "inherit"):
run.font.size = None
else:
converted_size = utils.unit_converter(font_size, target_unit="pt")
@@ -728,8 +711,8 @@ def _apply_font_size_to_run(self, **kwargs):
logging.warning(f"Warning: Could not parse font-size '{font_size}': {e}")
def _apply_font_family_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
font_family = utils.remove_important_from_style(value).strip()
@@ -737,7 +720,7 @@ def _apply_font_family_paragraph(self, **kwargs):
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'font-family' in paragraph_spans[i]:
+ if i in paragraph_spans and "font-family" in paragraph_spans[i]:
continue
self._apply_font_family_to_run(
@@ -746,21 +729,30 @@ def _apply_font_family_paragraph(self, **kwargs):
)
def _apply_font_family_to_run(self, **kwargs):
- run = kwargs['run']
- font_family = kwargs['font_family']
+ run = kwargs["run"]
+ font_family = kwargs["font_family"]
- if not font_family or font_family in ('inherit', 'initial', 'unset'):
+ if not font_family or font_family in ("inherit", "initial", "unset"):
return
try:
- font_families = [f.strip().strip('"\'') for f in font_family.split(',')]
+ font_families = [f.strip().strip("\"'") for f in font_family.split(",")]
for font_name in font_families:
- if font_name and font_name not in ('inherit', 'initial', 'unset', 'serif', 'sans-serif', 'monospace',
- 'cursive', 'fantasy', 'system-ui'):
+ if font_name and font_name not in (
+ "inherit",
+ "initial",
+ "unset",
+ "serif",
+ "sans-serif",
+ "monospace",
+ "cursive",
+ "fantasy",
+ "system-ui",
+ ):
run.font.name = font_name
break
- elif font_name in ('serif', 'sans-serif', 'monospace'):
+ elif font_name in ("serif", "sans-serif", "monospace"):
run.font.name = constants.GENERIC_FONT_STYLES[font_name]
break
@@ -768,17 +760,17 @@ def _apply_font_family_to_run(self, **kwargs):
logging.warning(f"Warning: Could not apply font-family '{font_family}': {e}")
def _apply_color_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- all_styles = kwargs['all_styles']
- color_value = utils.remove_important_from_style(all_styles.get('color', '')).lower().strip()
- if color_value in ('inherit', 'initial', 'transparent', 'currentcolor'):
+ paragraph = kwargs["paragraph"]
+ all_styles = kwargs["all_styles"]
+ color_value = utils.remove_important_from_style(all_styles.get("color", "")).lower().strip()
+ if color_value in ("inherit", "initial", "transparent", "currentcolor"):
return
paragraph_id = id(paragraph)
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'color' in paragraph_spans[i]:
+ if i in paragraph_spans and "color" in paragraph_spans[i]:
continue
self._apply_color_to_run(
run=run,
@@ -786,8 +778,8 @@ def _apply_color_paragraph(self, **kwargs):
)
def _apply_color_to_run(self, **kwargs):
- run = kwargs['run']
- color_value = kwargs['color']
+ run = kwargs["run"]
+ color_value = kwargs["color"]
try:
colors = utils.parse_color(color_value)
run.font.color.rgb = RGBColor(*colors)
@@ -795,8 +787,8 @@ def _apply_color_to_run(self, **kwargs):
logging.warning(f"Could not apply color '{color_value}': {e}")
def _apply_text_transform_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
text_transform = utils.remove_important_from_style(value).lower()
@@ -804,7 +796,7 @@ def _apply_text_transform_paragraph(self, **kwargs):
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'text-transform' in paragraph_spans[i]:
+ if i in paragraph_spans and "text-transform" in paragraph_spans[i]:
continue
self._apply_text_transform_to_run(
@@ -813,54 +805,50 @@ def _apply_text_transform_paragraph(self, **kwargs):
)
def _apply_text_transform_to_run(self, **kwargs):
- run = kwargs['run']
- text_transform = kwargs['text_transform']
+ run = kwargs["run"]
+ text_transform = kwargs["text_transform"]
if not run.text:
return
try:
- if text_transform == 'uppercase':
+ if text_transform == "uppercase":
run.text = run.text.upper()
- elif text_transform == 'lowercase':
+ elif text_transform == "lowercase":
run.text = run.text.lower()
- elif text_transform == 'capitalize':
+ elif text_transform == "capitalize":
run.text = run.text.title()
- elif text_transform in ('none', 'initial', 'inherit'):
+ elif text_transform in ("none", "initial", "inherit"):
# No transformation needed
pass
- elif text_transform in ('full-width', 'math-auto', 'full-size-kana'):
+ elif text_transform in ("full-width", "math-auto", "full-size-kana"):
logging.warning(f"Warning: Unsupported text transform '{text_transform}'")
except (AttributeError, Exception) as e:
logging.warning(f"Warning: Could not apply text-transform '{text_transform}': {e}")
def _apply_text_decoration_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- all_styles = kwargs['all_styles']
+ paragraph = kwargs["paragraph"]
+ all_styles = kwargs["all_styles"]
# Initialize decorations
- decorations = {
- 'line_type': None,
- 'line_style': None,
- 'color': None
- }
+ decorations = {"line_type": None, "line_style": None, "color": None}
- if 'text-decoration' in all_styles:
- text_decoration_value = utils.remove_important_from_style(all_styles['text-decoration']).lower()
+ if "text-decoration" in all_styles:
+ text_decoration_value = utils.remove_important_from_style(all_styles["text-decoration"]).lower()
decorations = utils.parse_text_decoration(text_decoration_value)
- if 'text-decoration-line' in all_styles:
- line_value = utils.remove_important_from_style(all_styles['text-decoration-line']).lower()
- decorations['line_type'] = line_value
+ if "text-decoration-line" in all_styles:
+ line_value = utils.remove_important_from_style(all_styles["text-decoration-line"]).lower()
+ decorations["line_type"] = line_value
- if 'text-decoration-style' in all_styles:
- style_value = utils.remove_important_from_style(all_styles['text-decoration-style']).lower()
- decorations['line_style'] = style_value
+ if "text-decoration-style" in all_styles:
+ style_value = utils.remove_important_from_style(all_styles["text-decoration-style"]).lower()
+ decorations["line_style"] = style_value
- if 'text-decoration-color' in all_styles:
- color_value = utils.remove_important_from_style(all_styles['text-decoration-color']).lower()
- decorations['color'] = color_value
+ if "text-decoration-color" in all_styles:
+ color_value = utils.remove_important_from_style(all_styles["text-decoration-color"]).lower()
+ decorations["color"] = color_value
paragraph_id = id(paragraph)
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
@@ -869,82 +857,82 @@ def _apply_text_decoration_paragraph(self, **kwargs):
span_styles = paragraph_spans.get(i, set())
# If span has text-decoration shorthand, skip entirely
- if 'text-decoration' in span_styles:
+ if "text-decoration" in span_styles:
continue
- if decorations['line_type'] and 'text-decoration-line' not in span_styles:
+ if decorations["line_type"] and "text-decoration-line" not in span_styles:
self._apply_text_decoration_line_to_run(
run=run,
- text_decoration_line=decorations['line_type'],
+ text_decoration_line=decorations["line_type"],
)
- if decorations['line_style'] and 'text-decoration-style' not in span_styles:
+ if decorations["line_style"] and "text-decoration-style" not in span_styles:
self._apply_text_decoration_style_to_run(
run=run,
- text_decoration_style=decorations['line_style'],
+ text_decoration_style=decorations["line_style"],
)
- if decorations['color'] and 'text-decoration-color' not in span_styles:
+ if decorations["color"] and "text-decoration-color" not in span_styles:
self._apply_text_decoration_color_to_run(
run=run,
- text_decoration_color=decorations['color'],
+ text_decoration_color=decorations["color"],
)
def _apply_text_decoration_to_run(self, **kwargs):
- run = kwargs['run']
- text_decoration = kwargs['text_decoration']
+ run = kwargs["run"]
+ text_decoration = kwargs["text_decoration"]
if not text_decoration:
return
decorations = utils.parse_text_decoration(text_decoration)
- if decorations['line_type']:
+ if decorations["line_type"]:
self._apply_text_decoration_line_to_run(
run=run,
- text_decoration_line=decorations['line_type'],
+ text_decoration_line=decorations["line_type"],
)
- if decorations['line_style']:
+ if decorations["line_style"]:
self._apply_text_decoration_style_to_run(
run=run,
- text_decoration_style=decorations['line_style'],
+ text_decoration_style=decorations["line_style"],
)
- if decorations['color']:
+ if decorations["color"]:
self._apply_text_decoration_color_to_run(
run=run,
- text_decoration_color=decorations['color'],
+ text_decoration_color=decorations["color"],
)
def _apply_text_decoration_line_to_run(self, **kwargs):
- run = kwargs['run']
- text_decoration_line = kwargs['text_decoration_line']
+ run = kwargs["run"]
+ text_decoration_line = kwargs["text_decoration_line"]
if text_decoration_line in constants.FONT_UNDERLINE:
- if text_decoration_line == 'underline':
+ if text_decoration_line == "underline":
run.font.underline = True
run.font.strike = False
- elif text_decoration_line == 'line-through':
+ elif text_decoration_line == "line-through":
run.font.strike = True
run.font.underline = False
- elif text_decoration_line == 'none':
+ elif text_decoration_line == "none":
run.font.underline = False
run.font.strike = False
else:
logging.warning(f"Warning: Unsupported text decoration '{text_decoration_line}'")
def _apply_text_decoration_style_to_run(self, **kwargs):
- run = kwargs['run']
- text_decoration_style = kwargs['text_decoration_style']
+ run = kwargs["run"]
+ text_decoration_style = kwargs["text_decoration_style"]
if not text_decoration_style or run.font.underline is False:
return False
should_apply = False
if run.font.underline:
should_apply = True
- elif hasattr(self.paragraph, '_pending_styles'):
+ elif hasattr(self.paragraph, "_pending_styles"):
for pending_style in self.paragraph._pending_styles:
- if 'text-decoration' in pending_style or 'text-decoration-line' in pending_style:
+ if "text-decoration" in pending_style or "text-decoration-line" in pending_style:
should_apply = True
break
@@ -959,31 +947,31 @@ def _apply_text_decoration_style_to_run(self, **kwargs):
paragraph_id = id(self.paragraph)
run_index = len(self.paragraph.runs) - 1
if paragraph_id in self.paragraph_span_styles and run_index in self.paragraph_span_styles[paragraph_id]:
- self.paragraph_span_styles[paragraph_id][run_index].add('text-decoration-line')
+ self.paragraph_span_styles[paragraph_id][run_index].add("text-decoration-line")
return True
def _apply_text_decoration_color_to_run(self, **kwargs):
- run = kwargs['run']
- text_decoration_color = kwargs['text_decoration_color']
+ run = kwargs["run"]
+ text_decoration_color = kwargs["text_decoration_color"]
if not text_decoration_color or not utils.is_color(text_decoration_color):
return
color_hex = utils.parse_color(text_decoration_color, return_hex=True)
rPr = run._r.get_or_add_rPr()
- u = rPr.find(qn('w:u'))
+ u = rPr.find(qn("w:u"))
if u is not None:
- u.set(qn('w:color'), color_hex.upper().lstrip('#'))
+ u.set(qn("w:color"), color_hex.upper().lstrip("#"))
def _apply_background_color_paragraph(self, **kwargs):
- paragraph = kwargs['paragraph']
- value = kwargs['value']
+ paragraph = kwargs["paragraph"]
+ value = kwargs["value"]
background_color = utils.remove_important_from_style(value).lower().strip()
- if background_color in ('inherit', 'initial'):
+ if background_color in ("inherit", "initial"):
return
- elif background_color in ('transparent', 'none'):
+ elif background_color in ("transparent", "none"):
logging.warning(f"Warning: Unsupported background color '{background_color}'")
return
@@ -996,7 +984,7 @@ def _apply_background_color_paragraph(self, **kwargs):
paragraph_spans = self.paragraph_span_styles.get(paragraph_id, {})
for i, run in enumerate(paragraph.runs):
- if i in paragraph_spans and 'background-color' in paragraph_spans[i]:
+ if i in paragraph_spans and "background-color" in paragraph_spans[i]:
continue
self._apply_background_color_to_run(
run=run,
@@ -1007,12 +995,12 @@ def _apply_background_color_paragraph(self, **kwargs):
logging.warning(f"Could not apply background-color to paragraph: {e}")
def _apply_background_color_to_run(self, **kwargs):
- run = kwargs['run']
- background_color = kwargs['background_color']
+ run = kwargs["run"]
+ background_color = kwargs["background_color"]
try:
- if background_color in ('inherit', 'initial'):
+ if background_color in ("inherit", "initial"):
return
- elif background_color in ('transparent', 'none'):
+ elif background_color in ("transparent", "none"):
logging.warning(f"Warning: Unsupported background color '{background_color}'")
return
@@ -1020,15 +1008,15 @@ def _apply_background_color_to_run(self, **kwargs):
if not color_hex:
return
- shd = OxmlElement('w:shd')
- shd.set(qn('w:val'), 'clear')
- shd.set(qn('w:color'), 'auto')
- shd.set(qn('w:fill'), color_hex.lstrip('#'))
+ shd = OxmlElement("w:shd")
+ shd.set(qn("w:val"), "clear")
+ shd.set(qn("w:color"), "auto")
+ shd.set(qn("w:fill"), color_hex.lstrip("#"))
r_pr = run._element.get_or_add_rPr()
# Remove existing shading
- existing_shd = r_pr.find(qn('w:shd'))
+ existing_shd = r_pr.find(qn("w:shd"))
if existing_shd is not None:
r_pr.remove(existing_shd)
@@ -1039,66 +1027,66 @@ def _apply_background_color_to_run(self, **kwargs):
def add_text_align_or_margin_to(self, obj, style):
"""Styles that can be applied on multiple objects"""
- if 'text-align' in style:
- align = utils.remove_important_from_style(style['text-align'])
+ if "text-align" in style:
+ align = utils.remove_important_from_style(style["text-align"])
- if 'center' in align:
+ if "center" in align:
obj.alignment = WD_ALIGN_PARAGRAPH.CENTER
- elif 'left' in align:
+ elif "left" in align:
obj.alignment = WD_ALIGN_PARAGRAPH.LEFT
- elif 'right' in align:
+ elif "right" in align:
obj.alignment = WD_ALIGN_PARAGRAPH.RIGHT
- elif 'justify' in align:
+ elif "justify" in align:
obj.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
- if 'margin-left' in style and 'margin-right' in style:
- if 'auto' in style['margin-left'] and 'auto' in style['margin-right']:
+ if "margin-left" in style and "margin-right" in style:
+ if "auto" in style["margin-left"] and "auto" in style["margin-right"]:
obj.alignment = WD_ALIGN_PARAGRAPH.CENTER
- elif 'margin-left' in style:
- obj.left_indent = utils.unit_converter(style['margin-left'])
+ elif "margin-left" in style:
+ obj.left_indent = utils.unit_converter(style["margin-left"])
def add_styles_to_table_cell(self, styles, doc_cell, cell_row):
"""Styles that must be applied specifically in a _Cell object"""
# Set background color
- if 'background-color' in styles:
- self.set_cell_background(doc_cell, styles['background-color'])
+ if "background-color" in styles:
+ self.set_cell_background(doc_cell, styles["background-color"])
# Set width (approximate, since DOCX uses different units)
- if 'width' in styles:
- doc_cell.width = utils.unit_converter(styles['width'])
+ if "width" in styles:
+ doc_cell.width = utils.unit_converter(styles["width"])
# Set height (due word limitations, cannot set individually cell height, only whole row)
- if 'height' in styles:
- cell_row.height = utils.unit_converter(styles['height'])
+ if "height" in styles:
+ cell_row.height = utils.unit_converter(styles["height"])
# Set text color
- if 'color' in styles:
- color = utils.parse_color(styles['color'])
+ if "color" in styles:
+ color = utils.parse_color(styles["color"])
if color:
for paragraph in doc_cell.paragraphs:
for run in paragraph.runs:
run.font.color.rgb = color
# Set vertical align (for individual cells)
- if 'vertical-align' in styles:
- align = utils.remove_important_from_style(styles['vertical-align'])
+ if "vertical-align" in styles:
+ align = utils.remove_important_from_style(styles["vertical-align"])
- if 'top' in align:
+ if "top" in align:
doc_cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP
- elif 'middle' in align:
+ elif "middle" in align:
doc_cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
- elif 'bottom' in align:
+ elif "bottom" in align:
doc_cell.vertical_alignment = WD_ALIGN_VERTICAL.BOTTOM
# Set borders
- if any('border' in style for style in styles.keys()):
+ if any("border" in style for style in styles.keys()):
self.set_cell_borders(doc_cell, styles)
self.add_text_align_or_margin_to(doc_cell.paragraphs[0], styles)
def add_styles_to_run(self, style):
- if 'font-size' in style:
- font_size = utils.remove_important_from_style(style['font-size'])
+ if "font-size" in style:
+ font_size = utils.remove_important_from_style(style["font-size"])
# Adapt font_size when text, ex.: small, medium, etc.
font_size = utils.adapt_font_size(font_size)
@@ -1106,21 +1094,21 @@ def add_styles_to_run(self, style):
for run in self.paragraph.runs:
run.font.size = utils.unit_converter(font_size)
- if 'color' in style:
- colors = utils.parse_color(style['color'])
+ if "color" in style:
+ colors = utils.parse_color(style["color"])
self.run.font.color.rgb = RGBColor(*colors)
- if 'background-color' in style:
+ if "background-color" in style:
# This should stay here for div.
# Little trick to apply background-color to paragraph
# because `self.run.font.highlight_color`
# has a very limited amount of colors
- color = utils.parse_color(style['background-color'], return_hex=True)
+ color = utils.parse_color(style["background-color"], return_hex=True)
- shd = OxmlElement('w:shd')
- shd.set(qn('w:val'), 'clear')
- shd.set(qn('w:color'), 'auto')
- shd.set(qn('w:fill'), color.lstrip('#'))
+ shd = OxmlElement("w:shd")
+ shd.set(qn("w:val"), "clear")
+ shd.set(qn("w:color"), "auto")
+ shd.set(qn("w:fill"), color.lstrip("#"))
# Make sure the paragraph styling element exists
self.paragraph.paragraph_format.element.get_or_add_pPr()
@@ -1129,15 +1117,15 @@ def add_styles_to_run(self, style):
self.paragraph.paragraph_format.element.pPr.append(shd)
def handle_li(self):
- '''
- Handle li tags
- source: https://stackoverflow.com/a/78685353/17274446
- '''
- list_depth = len(self.tags['list']) or 1
- list_type = self.tags['list'][-1] if self.tags['list'] else 'ul'
+ """
+ Handle li tags
+ source: https://stackoverflow.com/a/78685353/17274446
+ """
+ list_depth = len(self.tags["list"]) or 1
+ list_type = self.tags["list"][-1] if self.tags["list"] else "ul"
level = min(list_depth, 3)
style_key = list_type if level <= 1 else f"{list_type}{level}"
- list_style = constants.STYLES.get(style_key, 'List Number' if list_type == 'ol' else 'List Bullet')
+ list_style = constants.STYLES.get(style_key, "List Number" if list_type == "ol" else "List Bullet")
self.paragraph = self.doc.add_paragraph(style=list_style)
self.in_li = True
@@ -1152,7 +1140,7 @@ def handle_li(self):
style_obj = self.paragraph.style
num_id_style = None
- if hasattr(style_obj._element.pPr, 'numPr'):
+ if hasattr(style_obj._element.pPr, "numPr"):
num_id_style = style_obj._element.pPr.numPr.numId.val
if num_id_style is not None:
@@ -1176,13 +1164,13 @@ def handle_li(self):
# Assign this numId to the paragraph
pPr = self.paragraph._p.get_or_add_pPr()
- numPr = OxmlElement('w:numPr')
+ numPr = OxmlElement("w:numPr")
- numId_elem = OxmlElement('w:numId')
- numId_elem.set(qn('w:val'), str(new_num_id))
+ numId_elem = OxmlElement("w:numId")
+ numId_elem.set(qn("w:val"), str(new_num_id))
- ilvl = OxmlElement('w:ilvl')
- ilvl.set(qn('w:val'), str(level - 1))
+ ilvl = OxmlElement("w:ilvl")
+ ilvl.set(qn("w:val"), str(level - 1))
numPr.append(ilvl)
numPr.append(numId_elem)
@@ -1197,18 +1185,18 @@ def add_image_to_cell(self, cell, image, width=None, height=None):
def handle_img(self, current_attrs):
if not self.include_images:
self.skip = True
- self.skip_tag = 'img'
+ self.skip_tag = "img"
return
- if 'src' not in current_attrs:
+ if "src" not in current_attrs:
self.doc.add_paragraph("some code elements that " \
- "should appear as code.some code elements that should appear as code.
This is a code block.
@@ -290,10 +261,7 @@ def test_code_block(self):
self.parser.add_html_to_document(html, self.document)
def test_pre_block(self):
- self.document.add_heading(
- 'Test: pre block',
- level=1
- )
+ self.document.add_heading("Test: pre block", level=1)
html = """
This is a pre-formatted block.
@@ -306,25 +274,19 @@ def test_pre_block(self):
self.parser.add_html_to_document(html, self.document)
def test_handling_hr(self):
- hr_html_example = 'Introduction Header
Introduction Header
Click here: Link to intro
' ) - self.document.add_heading( - 'Test: Handling internal hyperlink', - level=1 - ) + self.document.add_heading("Test: Handling internal hyperlink", level=1) self.parser.add_html_to_document(hyperlink_html_example, self.document) document = self.parser.parse_html_string(hyperlink_html_example) @@ -358,8 +317,8 @@ def test_internal_hyperlink(self): def test_internal_hyperlink_without_paragraph(self): hyperlink_html_example = ( - "Click here: Link to intro
" + 'Click here: Link to intro
' ) document = self.parser.parse_html_string(hyperlink_html_example) @@ -370,9 +329,7 @@ def test_internal_hyperlink_without_paragraph(self): assert 'Click here: Link to intro
" - ) + hyperlink_html_example = 'Click here: Link to intro
' document = self.parser.parse_html_string(hyperlink_html_example) document_body = document._body._body.xml @@ -382,18 +339,15 @@ def test_internal_hyperlink_without_anchor(self): assert '
'
self.parser.add_html_to_document(html_local_img, self.document)
document = self.parser.parse_html_string(html_local_img)
@@ -411,33 +365,50 @@ def test_local_img(self):
assert image_found, "No image was found in the document"
- def test_inline_images(self):
- self.document.add_heading(
- 'Test: Handling inline images',
- level=1
+ def test_img_with_dimensions(self):
+ self.document.add_heading("Test: Image With Dimensions", level=1)
+
+ html = '
'
+ self.parser.add_html_to_document(html, self.document)
+ document = self.parser.parse_html_string(html)
+
+ # Ensure at least one image exists
+ assert len(document.inline_shapes) > 0, "No image was found in the document"
+
+ shape = document.inline_shapes[-1]
+
+ # Convert expected px → inches (assuming 96 DPI)
+ expected_width_in = 520 / 96
+ expected_height_in = 306 / 96
+
+ actual_width_in = shape.width.inches
+ actual_height_in = shape.height.inches
+
+ # Allow tolerance (floating point + conversion differences)
+ tolerance = 0.05 # ~1.27 mm
+
+ assert abs(actual_width_in - expected_width_in) < tolerance, (
+ f"Width mismatch: expected ~{expected_width_in:.2f}in, got {actual_width_in:.2f}in"
)
- test_img_src = 'https://github.com/dfop02/html4docx/blob/main/tests/assets/images/test_img.png?raw=true'
- html_example = (
- f""
- f"
"
- f"
This text has Bold Words.
" @@ -500,8 +462,7 @@ def test_bold_italic_underline_and_strike(self): self.assertIn("Marked Words", paragraphs[6].text) self.assertEqual( - self.get_run_shading_fill(paragraphs[6].runs[1]), 'FFFF00', - " should apply yellow shading (FFFF00)" + self.get_run_shading_fill(paragraphs[6].runs[1]), "FFFF00", " should apply yellow shading (FFFF00)" ) self.assertIn("Bold, Italic, Underline and Strike Words", paragraphs[7].text) @@ -513,64 +474,58 @@ def test_bold_italic_underline_and_strike(self): def test_font_size(self): font_size_html_example = ( - "paragraph 8px
" - "paragraph 1cm
" - "paragraph 6em
" - "paragraph 12cm
" - "paragraph 12vh not supported
" - "paragraph 5pc
" - "paragraph 14pt
" - "paragraph 16pt
" - "paragraph 2mm
" - "paragraph small
" - ) - - self.document.add_heading( - 'Test: Font-Size', - level=1 - ) + 'paragraph 8px
' + 'paragraph 1cm
' + 'paragraph 6em
' + 'paragraph 12cm
' + 'paragraph 12vh not supported
' + 'paragraph 5pc
' + 'paragraph 14pt
' + 'paragraph 16pt
' + 'paragraph 2mm
' + 'paragraph small
' + ) + + self.document.add_heading("Test: Font-Size", level=1) # Add on document for human validation self.parser.add_html_to_document(font_size_html_example, self.document) document = self.parser.parse_html_string(font_size_html_example) font_sizes = [str(p.runs[0].font.size) for p in document.paragraphs] - assert ['76200', '355600', '914400', '431800', 'None', '762000', '177800', '203200', '69850', '120650'] == font_sizes + assert ["76200", "355600", "914400", "431800", "None", "762000", "177800", "203200", "69850", "120650"] == font_sizes def test_font_size_paragraph(self): font_size_html_example = ( - "paragraph 8px
" - "paragraph 1cm
" - "paragraph 6em
" - "paragraph 12cm
" - "paragraph 12vh not supported
" - "paragraph 5pc
" - "paragraph 14pt
" - "paragraph 16pt
" - "paragraph 2mm
" - "paragraph small
" - ) - - self.document.add_heading( - 'Test: Font-Size on', - level=1 - ) + '
paragraph 8px
' + 'paragraph 1cm
' + 'paragraph 6em
' + 'paragraph 12cm
' + 'paragraph 12vh not supported
' + 'paragraph 5pc
' + 'paragraph 14pt
' + 'paragraph 16pt
' + 'paragraph 2mm
' + 'paragraph small
' + ) + + self.document.add_heading("Test: Font-Size on", level=1) self.parser.add_html_to_document(font_size_html_example, self.document) document = self.parser.parse_html_string(font_size_html_example) font_sizes = [str(p.runs[0].font.size) for p in document.paragraphs] - assert ['76200', '355600', '914400', '431800', 'None', '762000', '177800', '203200', '69850', '120650'] == font_sizes + assert ["76200", "355600", "914400", "431800", "None", "762000", "177800", "203200", "69850", "120650"] == font_sizes def test_font_weight_paragraph(self): - self.document.add_heading('Test: font weight on
', level=1) + self.document.add_heading("Test: font weight on
", level=1) font_weight_html_example = ( - "
bold text
" - "bolder text
" - "700 weight
" - "900 weight
" - "normal text
" - "lighter text
" - "400 weight
" - "100 weight
" + 'bold text
' + 'bolder text
' + '700 weight
' + '900 weight
' + 'normal text
' + 'lighter text
' + '400 weight
' + '100 weight
' ) self.parser.add_html_to_document(font_weight_html_example, self.document) @@ -580,10 +535,10 @@ def test_font_weight_paragraph(self): font_weights = [p.runs[0].font.bold for p in document.paragraphs] expected_weights = [ - True, # bold - True, # bolder - True, # 700 - True, # 900 + True, # bold + True, # bolder + True, # 700 + True, # 900 False, # normal False, # lighter False, # 400 @@ -593,11 +548,11 @@ def test_font_weight_paragraph(self): self.assertEqual(font_weights, expected_weights) def test_font_style_paragraph(self): - self.document.add_heading('Test: font style on', level=1) + self.document.add_heading("Test: font style on
", level=1) font_style_html_example = ( - "
italic text
" - "oblique text
" - "normal text
" + 'italic text
' + 'oblique text
' + 'normal text
' ) self.parser.add_html_to_document(font_style_html_example, self.document) @@ -607,76 +562,70 @@ def test_font_style_paragraph(self): font_styles = [p.runs[0].font.italic for p in document.paragraphs] expected_styles = [ - True, # italic - True, # oblique (should be treated as italic) + True, # italic + True, # oblique (should be treated as italic) False, # normal ] self.assertEqual(font_styles, expected_styles) def test_font_family_paragraph(self): - self.document.add_heading('Test: font family on', level=1) + self.document.add_heading("Test: font family on
", level=1) font_family_html_example = ( - "
Arial font text
" + 'Arial font text
' "Helvetica font text
" "Noto Sans font text
" "Times New Roman font text
" - "Generic serif font text
" - "Generic sans-serif font text
" - "Generic monospace font text
" + 'Generic serif font text
' + 'Generic sans-serif font text
' + 'Generic monospace font text
' "Courier New font text
" - "Inherit font text
" + 'Inherit font text
' ) self.parser.add_html_to_document(font_family_html_example, self.document) def test_text_transform_paragraph(self): - self.document.add_heading('Test: text-transform on', level=1) + self.document.add_heading("Test: text-transform on
", level=1) text_transform_html_example = ( - "
uppercase text
" - "LOWERCASE TEXT
" - "capitalize each word
" - "normal text
" + 'uppercase text
' + 'LOWERCASE TEXT
' + 'capitalize each word
' + 'normal text
' "default text
" ) self.parser.add_html_to_document(text_transform_html_example, self.document) def test_text_decoration_span(self): - self.document.add_heading('Test: text-decoration on ', level=1) + self.document.add_heading("Test: text-decoration on ", level=1) text_decoration_html_example = ( # Standalone spans - "underlined span (red)" - "no decoration span (rgb(0, 0, 0))" - "strikethrough span (gray) (not supported)" - "underline+line-through span (orange)\ - (should be strike)" - + 'underlined span (red)' + 'no decoration span (rgb(0, 0, 0))' + 'strikethrough span (gray) (not supported)' + 'underline+line-through span (orange)\ + (should be strike)' # Spans inside paragraphs - "Normal text wavy underlined span (blue) continues
" - "Normal text dotted underlined span (purple) continues
" - "Normal text strikethrough span (red) continues
" - + 'Normal text wavy underlined span (blue) continues
' + 'Normal text dotted underlined span (purple) continues
' + 'Normal text strikethrough span (red) continues
' # Multiple spans with different decorations in same paragraph - "Start underlined " - "strikethrough " - "dashed underline end
" - + 'Start underlined ' + 'strikethrough ' + 'dashed underline end
' # Span with no decoration inside decorated paragraph - "Underlined paragraph with " - "normal span inside
" - + 'Underlined paragraph with ' + 'normal span inside
' # Span with decoration inside decorated paragraph (should override) - "Strikethrough paragraph with " - "underlined red span inside
" - + 'Strikethrough paragraph with ' + 'underlined red span inside
' # Override behavior with individual properties - "Blue underlined paragraph with " - "strikethrough span inside
" - + 'Blue underlined paragraph with ' + 'strikethrough span inside
' # Check if equal - shorthand vs individual properties - "Blue underlined paragraph
" - "Blue underlined paragraph
" + 'Blue underlined paragraph
' + 'Blue underlined paragraph
' ) self.parser.add_html_to_document(text_decoration_html_example, self.document) @@ -802,24 +751,24 @@ def test_text_decoration_span(self): assert self.get_underline_color(p9.runs[0]) == self.hexcolor("blue") def test_text_decoration_paragraph(self): - self.document.add_heading('Test: text-decoration on', level=1) + self.document.add_heading("Test: text-decoration on
", level=1) text_decoration_html_example = ( - "
underlined text (red)
" - "no decoration text (rgb(0, 0, 0))
" - "strikethrough text (gray) (color not supported)
" - "underline+line-through (orange)\ - (should be strike)
" - "wavy underline (blue)
" - "dotted underline (rgb(0, 128, 0))
" - "dotted underline (rgb(0, 255, 0))
" - "dashed underline (purple)
" - "double underline (rgb(255, 69, 0))
" - "overline text (hotpink) (not supported)
" - "blink text (hotpink) (not supported)
" + 'underlined text (red)
' + 'no decoration text (rgb(0, 0, 0))
' + 'strikethrough text (gray) (color not supported)
' + 'underline+line-through (orange)\ + (should be strike)
' + 'wavy underline (blue)
' + 'dotted underline (rgb(0, 128, 0))
' + 'dotted underline (rgb(0, 255, 0))
' + 'dashed underline (purple)
' + 'double underline (rgb(255, 69, 0))
' + 'overline text (hotpink) (not supported)
' + 'blink text (hotpink) (not supported)
' ) self.parser.add_html_to_document(text_decoration_html_example, self.document) - with self.assertLogs(level='WARNING') as log: + with self.assertLogs(level="WARNING") as log: document = self.parser.parse_html_string(text_decoration_html_example) underline_states = [] @@ -856,45 +805,45 @@ def test_text_decoration_paragraph(self): strike_states.append(strike) expected_underline_states = [ - True, # underline (default single) - explicitly True + True, # underline (default single) - explicitly True False, # none - explicitly False for both underline and strike False, # line-through - explicitly False for underline when strike is True False, # underline + line-through - line-through wins, underline explicitly False - WD_UNDERLINE.WAVY, # wavy underline - explicitly set to wavy - WD_UNDERLINE.DOTTED, # dotted underline - explicitly set to dotted - WD_UNDERLINE.DOTTED, # dotted underline - explicitly set to dotted - WD_UNDERLINE.DASH, # dashed underline - explicitly set to dash - WD_UNDERLINE.DOUBLE, # double underline - explicitly set to double + WD_UNDERLINE.WAVY, # wavy underline - explicitly set to wavy + WD_UNDERLINE.DOTTED, # dotted underline - explicitly set to dotted + WD_UNDERLINE.DOTTED, # dotted underline - explicitly set to dotted + WD_UNDERLINE.DASH, # dashed underline - explicitly set to dash + WD_UNDERLINE.DOUBLE, # double underline - explicitly set to double None, # overline (not supported) - remains None/unchanged None, # blink (not supported) - remains None/unchanged ] expected_underline_colors = [ - self.hexcolor("red"), # underline red - None, # none rgb(0,0,0) - None, # line-through gray (strike only, but color captured) - None, # underline + line-through (color should be orange) - self.hexcolor("blue"), # wavy underline blue - self.hexcolor("rgb(0,128,0)"), # dotted underline rgb(0,128,0) - self.hexcolor("rgb(0,255,0)"), # dotted underline rgb(0,255,0) - self.hexcolor("purple"), # dashed underline purple - self.hexcolor("rgb(255,69,0)"), # double underline rgb(255,69,0) - None, # overline hotpink (unsupported → underline None, but color still parsed) - None, # blink hotpink (unsupported) + self.hexcolor("red"), # underline red + None, # none rgb(0,0,0) + None, # line-through gray (strike only, but color captured) + None, # underline + line-through (color should be orange) + self.hexcolor("blue"), # wavy underline blue + self.hexcolor("rgb(0,128,0)"), # dotted underline rgb(0,128,0) + self.hexcolor("rgb(0,255,0)"), # dotted underline rgb(0,255,0) + self.hexcolor("purple"), # dashed underline purple + self.hexcolor("rgb(255,69,0)"), # double underline rgb(255,69,0) + None, # overline hotpink (unsupported → underline None, but color still parsed) + None, # blink hotpink (unsupported) ] expected_strike_states = [ False, # underline only - explicitly False for strike when underline is True False, # none - explicitly False for both underline and strike - True, # line-through - explicitly True - True, # underline + line-through - line-through wins, strike explicitly True + True, # line-through - explicitly True + True, # underline + line-through - line-through wins, strike explicitly True False, # wavy underline only - explicitly False for strike when underline is set False, # dotted underline only - explicitly False for strike when underline is set False, # dotted underline only - explicitly False for strike when underline is set False, # dashed underline only - explicitly False for strike when underline is set False, # double underline only - explicitly False for strike when underline is set - None, # overline (not supported) - remains None/unchanged - None, # blink (not supported) - remains None/unchanged + None, # overline (not supported) - remains None/unchanged + None, # blink (not supported) - remains None/unchanged ] # Test that the underline states, colors, and strike states are correct @@ -904,13 +853,17 @@ def test_text_decoration_paragraph(self): # Test that the correct warnings are logged self.assertEqual(len(log.records), 4) - self.assertIn('Word does not support colored strike-through. Color \'gray\' will be ignored for line-through.', log.output[0]) - self.assertIn('Word does not support colored strike-through. Color \'orange\' will be ignored for line-through.', log.output[1]) - self.assertIn('Blink or overline not supported.', log.output[2]) - self.assertIn('Blink or overline not supported.', log.output[3]) + self.assertIn( + "Word does not support colored strike-through. Color 'gray' will be ignored for line-through.", log.output[0] + ) + self.assertIn( + "Word does not support colored strike-through. Color 'orange' will be ignored for line-through.", log.output[1] + ) + self.assertIn("Blink or overline not supported.", log.output[2]) + self.assertIn("Blink or overline not supported.", log.output[3]) def test_first_line_paragraph(self): - self.document.add_heading('Test text-indent ontags', level=1) + self.document.add_heading("Test text-indent on
tags", level=1) self.parser.add_html_to_document(self.paragraph_first_line_indent, self.document) document = self.parser.parse_html_string(self.paragraph_first_line_indent) @@ -922,26 +875,26 @@ def test_first_line_paragraph(self): indent_values.append(indent_pt) expected_values = [ - 1080000, # 3cm - 254000, # 20pt - 381000, # 40px - 1260000, # 35mm - None, # Word does not support negative values here + 1080000, # 3cm + 254000, # 20pt + 381000, # 40px + 1260000, # 35mm + None, # Word does not support negative values here ] for actual, expected in zip(indent_values, expected_values): self.assertAlmostEqual(actual, expected, delta=634) def test_color_paragraph(self): - self.document.add_heading('Test: color on p tags', level=1) + self.document.add_heading("Test: color on p tags", level=1) color_html_example = ( - "
red text
" - "green hex text
" - "blue rgb text
" - "inherit color text
" - "transparent color text
" - "current color text
" - "red with other styles
" + 'red text
' + 'green hex text
' + 'blue rgb text
' + 'inherit color text
' + 'transparent color text
' + 'current color text
' + 'red with other styles
' "default text
" ) @@ -974,7 +927,7 @@ def test_color_paragraph(self): self.assertEqual(color_states, expected_colors) def test_line_height_paragraph(self): - self.document.add_heading('Test: line-height on', level=1) + self.document.add_heading("Test: line-height on
", level=1) self.parser.add_html_to_document(self.paragraph_line_height, self.document) document = self.parser.parse_html_string(self.paragraph_line_height) @@ -984,64 +937,67 @@ def test_line_height_paragraph(self): for p in document.paragraphs: line_spacing = p.paragraph_format.line_spacing line_rule = p.paragraph_format.line_spacing_rule - line_heights.append(str(line_spacing) if line_spacing is not None else 'None') - line_rules.append(str(line_rule) if line_rule is not None else 'None') + line_heights.append(str(line_spacing) if line_spacing is not None else "None") + line_rules.append(str(line_rule) if line_rule is not None else "None") expected_line_heights = [ - '1.0', - '1.15', - '1.5', - '2.0', - '190500', # line-height: 20px - '182880', # line-height: 1.2em - '228600', # line-height: 1.5em - '304800', # line-height: 2em - '182880', # line-height: 1.2rem - '228600', # line-height: 1.5rem - '304800', # line-height: 2rem - '1.5', # line-height: 150% - '2.0', # line-height: 200% + "1.0", + "1.15", + "1.5", + "2.0", + "190500", # line-height: 20px + "182880", # line-height: 1.2em + "228600", # line-height: 1.5em + "304800", # line-height: 2em + "182880", # line-height: 1.2rem + "228600", # line-height: 1.5rem + "304800", # line-height: 2rem + "1.5", # line-height: 150% + "2.0", # line-height: 200% ] - self.assertEqual(line_heights, expected_line_heights, - f"Line heights don't match expected values. Got {line_heights}, expected {expected_line_heights}") + self.assertEqual( + line_heights, + expected_line_heights, + f"Line heights don't match expected values. Got {line_heights}, expected {expected_line_heights}", + ) def test_margins_paragraph(self): margins_html_example = ( - "
centered paragraph
" - "left margin 20px
" - "right margin 1.5cm
" - "left margin 1cm
" - "both margins set
" - "only left auto
" - "only right auto
" - "zero margins
" - "left margin 2in
" - ) - - self.document.add_heading('Test margins on', level=1) + '
centered paragraph
' + 'left margin 20px
' + 'right margin 1.5cm
' + 'left margin 1cm
' + 'both margins set
' + 'only left auto
' + 'only right auto
' + 'zero margins
' + 'left margin 2in
' + ) + + self.document.add_heading("Test margins on", level=1) self.parser.add_html_to_document(margins_html_example, self.document) document = self.parser.parse_html_string(margins_html_example) expected_margins = [ # Paragraph 1: "centered paragraph" - auto margins (None values) - {'left': None, 'right': None}, + {"left": None, "right": None}, # Paragraph 2: "left margin 20px" - 20px = 20 * 9525 = 190500 EMU - {'left': 190500, 'right': None}, + {"left": 190500, "right": None}, # Paragraph 3: "right margin 1.5cm" - 1.5cm = 1.5 * 360000 = 540000 EMU - {'left': None, 'right': 540000}, + {"left": None, "right": 540000}, # Paragraph 4: "left margin 1cm" - 1cm = 360000 EMU - {'left': 360000, 'right': None}, + {"left": 360000, "right": None}, # Paragraph 5: "both margins set" - 10px=95250 EMU, 15px=142875 EMU - {'left': 95250, 'right': 142875}, + {"left": 95250, "right": 142875}, # Paragraph 6: "only left auto" - auto margin - {'left': None, 'right': None}, + {"left": None, "right": None}, # Paragraph 7: "only right auto" - auto margin - {'left': None, 'right': None}, + {"left": None, "right": None}, # Paragraph 8: "zero margins" - 0px = 0 EMU - {'left': 0, 'right': 0}, + {"left": 0, "right": 0}, # Paragraph 9: "left margin 2in" - 2in = 2 * 914400 = 1828800 EMU - {'left': 1828800, 'right': None}, + {"left": 1828800, "right": None}, ] self.assertEqual(len(document.paragraphs), len(expected_margins)) @@ -1052,23 +1008,27 @@ def test_margins_paragraph(self): actual_right = paragraph.paragraph_format.right_indent # Check left margin - if expected['left'] is None: + if expected["left"] is None: self.assertIsNone(actual_left, f"Paragraph {i} left margin should be None") else: self.assertIsNotNone(actual_left, f"Paragraph {i} left margin should not be None") - self.assertTrue(abs(actual_left - expected['left']) <= 634, - f"Paragraph {i} left margin: expected {expected['left']} EMU, got {actual_left} EMU") + self.assertTrue( + abs(actual_left - expected["left"]) <= 634, + f"Paragraph {i} left margin: expected {expected['left']} EMU, got {actual_left} EMU", + ) # Check right margin - if expected['right'] is None: + if expected["right"] is None: self.assertIsNone(actual_right, f"Paragraph {i} right margin should be None") else: self.assertIsNotNone(actual_right, f"Paragraph {i} right margin should not be None") - self.assertTrue(abs(actual_right - expected['right']) <= 634, - f"Paragraph {i} right margin: expected {expected['right']} EMU, got {actual_right} EMU") + self.assertTrue( + abs(actual_right - expected["right"]) <= 634, + f"Paragraph {i} right margin: expected {expected['right']} EMU, got {actual_right} EMU", + ) def test_background_color_styles(self): - self.document.add_heading('Test background color on
, multiple cases', level=1) + self.document.add_heading("Test background color on
, multiple cases", level=1) html_example2 = """
Start of paragraph @@ -1185,61 +1145,61 @@ def test_background_color_styles(self): self.parser.add_html_to_document(html_example9, self.document) def test_headers_with_css(self): - self.document.add_heading('Test: headers with css', level=1) + self.document.add_heading("Test: headers with css", level=1) self.parser.add_html_to_document(self.css_properties_header, self.document) document = self.parser.parse_html_string(self.css_properties_header) # Test H1 - Large and Centered h1 = document.paragraphs[0] - assert h1.style.name.startswith('Heading 1') - assert str(h1.runs[0].font.color.rgb) == '2C3E50' + assert h1.style.name.startswith("Heading 1") + assert str(h1.runs[0].font.color.rgb) == "2C3E50" assert h1.runs[0].font.bold is True assert h1.runs[0].font.size == 342900 assert h1.alignment == WD_ALIGN_PARAGRAPH.CENTER - assert h1.runs[0].text == 'MAIN HEADING H1 - LARGE AND CENTERED' # uppercase due to text-transform + assert h1.runs[0].text == "MAIN HEADING H1 - LARGE AND CENTERED" # uppercase due to text-transform # Test H2 - Underlined with Background (no span in this one) h2 = document.paragraphs[1] - assert h2.style.name.startswith('Heading 2') - assert str(h2.runs[0].font.color.rgb) == '34495E' + assert h2.style.name.startswith("Heading 2") + assert str(h2.runs[0].font.color.rgb) == "34495E" assert h2.runs[0].font.underline is True - assert h2.runs[0].font.name == 'Arial' + assert h2.runs[0].font.name == "Arial" assert h2.runs[0].font.size == 266700 # Test H3 - Italic and Right Aligned h3 = document.paragraphs[2] - assert h3.style.name.startswith('Heading 3') - assert str(h3.runs[0].font.color.rgb) == '7F8C8D' + assert h3.style.name.startswith("Heading 3") + assert str(h3.runs[0].font.color.rgb) == "7F8C8D" assert h3.runs[0].font.italic is True assert h3.runs[0].font.size == 209550 assert h3.alignment == WD_ALIGN_PARAGRAPH.RIGHT # Test H4 - Normal Weight and Capitalized h4 = document.paragraphs[3] - assert h4.style.name.startswith('Heading 4') - assert str(h4.runs[0].font.color.rgb) == '95A5A6' + assert h4.style.name.startswith("Heading 4") + assert str(h4.runs[0].font.color.rgb) == "95A5A6" assert h4.runs[0].font.bold is False # font-weight: normal - assert h4.runs[0].font.name == 'Georgia' + assert h4.runs[0].font.name == "Georgia" assert h4.runs[0].font.size == 171450 - assert h4.runs[0].text == 'Quaternary Heading H4 - Normal Weight And Capitalized' # capitalized + assert h4.runs[0].text == "Quaternary Heading H4 - Normal Weight And Capitalized" # capitalized # Test H1 with Complex Text Decoration and Span h1_complex = document.paragraphs[4] assert h1_complex.runs[0].font.strike is True # line-through - assert str(h1_complex.runs[0].font.color.rgb) == '8E44AD' + assert str(h1_complex.runs[0].font.color.rgb) == "8E44AD" assert h1_complex.runs[0].font.size == 381000 # Test span in complex H1 assert len(h1_complex.runs) >= 2 span_in_h1 = h1_complex.runs[1] assert span_in_h1.font.underline is True # underline in span - assert str(span_in_h1.font.color.rgb) == '2980B9' + assert str(span_in_h1.font.color.rgb) == "2980B9" # Test H3 with Light Weight and Span h3_light = document.paragraphs[5] assert h3_light.runs[0].font.bold is False # font-weight: 100 - assert str(h3_light.runs[0].font.color.rgb) == 'D35400' + assert str(h3_light.runs[0].font.color.rgb) == "D35400" assert h3_light.runs[0].font.size == 190500 # Test bold span in light H3 @@ -1249,21 +1209,21 @@ def test_headers_with_css(self): # Test H3 with Text Transform h3_transform = document.paragraphs[6] - assert h3_transform.runs[0].text == 'h3 forced to lowercase with text-transform ' + assert h3_transform.runs[0].text == "h3 forced to lowercase with text-transform " assert len(h3_transform.runs) >= 2 uppercase_span = h3_transform.runs[1] - assert uppercase_span.text == 'SPAN FORCED TO UPPERCASE' + assert uppercase_span.text == "SPAN FORCED TO UPPERCASE" # Test H4 with Serif Font h4_serif = document.paragraphs[7] - assert h4_serif.runs[0].font.name == 'Times New Roman' - assert str(h4_serif.runs[0].font.color.rgb) == '7D3C98' + assert h4_serif.runs[0].font.name == "Times New Roman" + assert str(h4_serif.runs[0].font.color.rgb) == "7D3C98" assert h4_serif.alignment == WD_ALIGN_PARAGRAPH.CENTER # Test H1 with Auto Margins and Background h1_centered = document.paragraphs[8] assert h1_centered.alignment == WD_ALIGN_PARAGRAPH.CENTER - assert str(h1_centered.runs[0].font.color.rgb) == 'FFFFFF' + assert str(h1_centered.runs[0].font.color.rgb) == "FFFFFF" # Test H2 with Lighter Weight and Span h2_lighter = document.paragraphs[9] @@ -1278,13 +1238,13 @@ def test_headers_with_css(self): # Test H3 with RGB Colors and Span h3_rgb = document.paragraphs[10] - assert str(h3_rgb.runs[0].font.color.rgb) == '3498DB' # rgb(52, 152, 219) + assert str(h3_rgb.runs[0].font.color.rgb) == "3498DB" # rgb(52, 152, 219) assert h3_rgb.runs[0].font.size == 177800 # Test RGB span assert len(h3_rgb.runs) >= 2 rgb_span = h3_rgb.runs[1] - assert str(rgb_span.font.color.rgb) == 'E74C3C' # rgb(231, 76, 60) + assert str(rgb_span.font.color.rgb) == "E74C3C" # rgb(231, 76, 60) # Test H4 with Strike-through and Span h4_strike = document.paragraphs[11] @@ -1295,17 +1255,17 @@ def test_headers_with_css(self): assert len(h4_strike.runs) >= 2 no_strike_span = h4_strike.runs[1] assert no_strike_span.font.strike is False - assert str(no_strike_span.font.color.rgb) == 'E74C3C' + assert str(no_strike_span.font.color.rgb) == "E74C3C" # Test H3 with Unsupported Transform and Span h3_unsupported = document.paragraphs[12] - assert str(h3_unsupported.runs[0].font.color.rgb) == 'F39C12' + assert str(h3_unsupported.runs[0].font.color.rgb) == "F39C12" assert h3_unsupported.runs[0].font.size == 196850 # Test supported transform in span assert len(h3_unsupported.runs) >= 2 supported_span = h3_unsupported.runs[1] - assert supported_span.text == 'Supported Transform In Span' # capitalize + assert supported_span.text == "Supported Transform In Span" # capitalize # Test H4 with Reset Styles and Span h4_reset = document.paragraphs[13] @@ -1324,7 +1284,7 @@ def test_headers_with_css(self): h1_transparent = document.paragraphs[14] assert h1_transparent.runs[0].font.size == 361950 visible_span = h1_transparent.runs[1] - assert str(visible_span.font.color.rgb) == 'ECF0F1' + assert str(visible_span.font.color.rgb) == "ECF0F1" # Test H3 with All Three Decorations and Span h3_all_decorations = document.paragraphs[15] @@ -1352,40 +1312,34 @@ def test_headers_with_css(self): def test_color_by_name(self): color_html_example = ( - "
paragraph red
" - "paragraph yellow
" - "paragraph blue
" - "paragraph green
" - "paragraph darkgray
" - "paragraph magenta
" - "paragraph has default black because of invalid color name
" + 'paragraph red
' + 'paragraph yellow
' + 'paragraph blue
' + 'paragraph green
' + 'paragraph darkgray
' + 'paragraph magenta
' + 'paragraph has default black because of invalid color name
' ) - self.document.add_heading( - 'Test: Color by name', - level=1 - ) + self.document.add_heading("Test: Color by name", level=1) # Add on document for human validation self.parser.add_html_to_document(color_html_example, self.document) document = self.parser.parse_html_string(color_html_example) colors = [str(p.runs[0].font.color.rgb) for p in document.paragraphs] - assert 'FF0000' in colors # Red - assert 'FFFF00' in colors # Yellow - assert '0000FF' in colors # Blue - assert '008000' in colors # Green - assert 'A9A9A9' in colors # Darkgray - assert '000000' in colors # Black - assert 'FF00FF' in colors # Magenta + assert "FF0000" in colors # Red + assert "FFFF00" in colors # Yellow + assert "0000FF" in colors # Blue + assert "008000" in colors # Green + assert "A9A9A9" in colors # Darkgray + assert "000000" in colors # Black + assert "FF00FF" in colors # Magenta def test_table_cell_border_properties(self): """Validates that all table cells have the expected border size, style, and color.""" - self.document.add_heading( - 'Test: Table Cell Border Properties', - level=1 - ) + self.document.add_heading("Test: Table Cell Border Properties", level=1) # Add on document for human validation self.parser.add_html_to_document(self.table3_html, self.document) document = self.parser.parse_html_string(self.table3_html) @@ -1396,38 +1350,38 @@ def test_table_cell_border_properties(self): "top": {"color": "D95B48", "style": "single", "size": "1.0"}, "bottom": {"color": "D95B48", "style": "single", "size": "1.0"}, "left": {"color": "FF0000", "style": "single", "size": "1.0"}, - "right": {"color": "8B0000", "style": "single", "size": "1.0"} + "right": {"color": "8B0000", "style": "single", "size": "1.0"}, }, { "top": {"color": "FAC32A", "style": "single", "size": "1.0"}, "bottom": {"color": "FAC32A", "style": "single", "size": "1.125"}, "left": {"color": "none", "style": "none", "size": "none"}, - "right": {"color": "FAC32A", "style": "single", "size": "12.0"} + "right": {"color": "FAC32A", "style": "single", "size": "12.0"}, }, { "top": {"color": "30E667", "style": "none", "size": "5.67"}, "bottom": {"color": "30E667", "style": "single", "size": "5.67"}, "left": {"color": "30E667", "style": "single", "size": "5.67"}, - "right": {"color": "30E667", "style": "single", "size": "5.67"} + "right": {"color": "30E667", "style": "single", "size": "5.67"}, }, { "top": {"color": "none", "style": "none", "size": "none"}, "bottom": {"color": "D948CF", "style": "single", "size": "1.5"}, "left": {"color": "none", "style": "none", "size": "none"}, - "right": {"color": "D948CF", "style": "single", "size": "5.67"} + "right": {"color": "D948CF", "style": "single", "size": "5.67"}, }, { "top": {"color": "EAAAA7", "style": "single", "size": "1.1"}, "bottom": {"color": "EAAAA7", "style": "single", "size": "1.1"}, "left": {"color": "EAAAA7", "style": "single", "size": "1.1"}, - "right": {"color": "EAAAA7", "style": "single", "size": "1.1"} + "right": {"color": "EAAAA7", "style": "single", "size": "1.1"}, }, { "top": {"color": "none", "style": "none", "size": "none"}, "bottom": {"color": "ACC4AA", "style": "dashed", "size": "7.2"}, "left": {"color": "none", "style": "none", "size": "none"}, - "right": {"color": "ACC4AA", "style": "dotted", "size": "4.8"} - } + "right": {"color": "ACC4AA", "style": "dotted", "size": "4.8"}, + }, ] # Validate border properties for each cell @@ -1437,21 +1391,21 @@ def test_table_cell_border_properties(self): # Get the table cell element and properties tc = cell._tc tcPr = tc.get_or_add_tcPr() - tcBorders = tcPr.find(qn('w:tcBorders')) + tcBorders = tcPr.find(qn("w:tcBorders")) # Extract border properties border_sides = { - 'top': tcBorders.find(qn('w:top')) if tcBorders is not None else None, - 'bottom': tcBorders.find(qn('w:bottom')) if tcBorders is not None else None, - 'left': tcBorders.find(qn('w:left')) if tcBorders is not None else None, - 'right': tcBorders.find(qn('w:right')) if tcBorders is not None else None, + "top": tcBorders.find(qn("w:top")) if tcBorders is not None else None, + "bottom": tcBorders.find(qn("w:bottom")) if tcBorders is not None else None, + "left": tcBorders.find(qn("w:left")) if tcBorders is not None else None, + "right": tcBorders.find(qn("w:right")) if tcBorders is not None else None, } for side, border in border_sides.items(): if border is not None: - color = border.get(qn('w:color'), "").upper() # Ensure uppercase and no # - size = border.get(qn('w:sz')) - style = border.get(qn('w:val')) + color = border.get(qn("w:color"), "").upper() # Ensure uppercase and no # + size = border.get(qn("w:sz")) + style = border.get(qn("w:val")) else: color, size, style = "none", "none", "none" @@ -1480,22 +1434,19 @@ def test_table_cell_border_properties(self): def test_table_cell_background_color(self): """Validates that all table cells have the expected background color.""" - self.document.add_heading( - 'Test: Table Cell Background Color', - level=1 - ) + self.document.add_heading("Test: Table Cell Background Color", level=1) # Add on document for human validation self.parser.add_html_to_document(self.table3_html, self.document) document = self.parser.parse_html_string(self.table3_html) # Define expected background colors for each cell expected_background_colors = [ - "3749EF", # Row 1 Column 1 - "33b32e", # Row 1 Column 2 - "BFBFBF", # Row 2 Column 1 - "2eaab3", # Row 2 Column 2 - "99fffa", # Row 3 Column 1 - "2eaab3" # Row 3 Column 2 + "3749EF", # Row 1 Column 1 + "33b32e", # Row 1 Column 2 + "BFBFBF", # Row 2 Column 1 + "2eaab3", # Row 2 Column 2 + "99fffa", # Row 3 Column 1 + "2eaab3", # Row 3 Column 2 ] # Validate background colors for each cell @@ -1507,9 +1458,9 @@ def test_table_cell_background_color(self): tcPr = tc.get_or_add_tcPr() # Get the background color (shading) if it exists - shading = tcPr.find(qn('w:shd')) + shading = tcPr.find(qn("w:shd")) if shading is not None: - background_color = shading.get(qn('w:fill'), "").upper() # Ensure uppercase and no # + background_color = shading.get(qn("w:fill"), "").upper() # Ensure uppercase and no # else: background_color = "None" @@ -1525,10 +1476,7 @@ def test_table_cell_background_color(self): def test_table_cell_dimensions(self): """Validates that all table cells have the expected width and height.""" - self.document.add_heading( - 'Test: Table Cell Dimensions', - level=1 - ) + self.document.add_heading("Test: Table Cell Dimensions", level=1) # Add on document for human validation self.parser.add_html_to_document(self.table3_html, self.document) document = self.parser.parse_html_string(self.table3_html) @@ -1539,35 +1487,35 @@ def test_table_cell_dimensions(self): [ { "width": "258.35px", # Width for the first cell - "height": "23.75pt" # Height for the first cell + "height": "23.75pt", # Height for the first cell }, { - "width": "222.2pt", # Width for the second cell - "height": "23.75pt" # Height for the second cell - } + "width": "222.2pt", # Width for the second cell + "height": "23.75pt", # Height for the second cell + }, ], # Second row [ { "width": "258.35in", # Width for the first cell - "height": "15.5pt" # Height for the first cell + "height": "15.5pt", # Height for the first cell }, { - "width": "6cm", # Width for the second cell - "height": "15.5pt" # Height for the second cell - } + "width": "6cm", # Width for the second cell + "height": "15.5pt", # Height for the second cell + }, ], # Third row [ { "width": "258.35pt", # Width for the first cell - "height": "2rem" # Height for the first cell + "height": "2rem", # Height for the first cell }, { - "width": "6cm", # Width for the second cell - "height": "2em" # Height for the second cell - } - ] + "width": "6cm", # Width for the second cell + "height": "2em", # Height for the second cell + }, + ], ] # Validate dimensions for each cell @@ -1589,8 +1537,7 @@ def test_table_cell_dimensions(self): expected_height_px = unit_converter(expected_height, "px") assert round(abs(cell_width_px - expected_width_px), 2) <= 0.03, ( - f"Width mismatch for cell ({row_idx}, {cell_idx}): " - f"expected {expected_width_px}px, got {cell_width_px}px" + f"Width mismatch for cell ({row_idx}, {cell_idx}): expected {expected_width_px}px, got {cell_width_px}px" ) assert round(abs(cell_height_px - expected_height_px), 2) <= 0.03, ( f"Height mismatch for cell ({row_idx}, {cell_idx}): " @@ -1624,18 +1571,18 @@ def test_border_with_keywords(self): # Get the table cell element and properties tc = cell._tc tcPr = tc.get_or_add_tcPr() - tcBorders = tcPr.find(qn('w:tcBorders')) + tcBorders = tcPr.find(qn("w:tcBorders")) # Extract border properties border_sides = { - 'top': tcBorders.find(qn('w:top')) if tcBorders is not None else None, - 'bottom': tcBorders.find(qn('w:bottom')) if tcBorders is not None else None, - 'left': tcBorders.find(qn('w:left')) if tcBorders is not None else None, - 'right': tcBorders.find(qn('w:right')) if tcBorders is not None else None, + "top": tcBorders.find(qn("w:top")) if tcBorders is not None else None, + "bottom": tcBorders.find(qn("w:bottom")) if tcBorders is not None else None, + "left": tcBorders.find(qn("w:left")) if tcBorders is not None else None, + "right": tcBorders.find(qn("w:right")) if tcBorders is not None else None, } for side, border in border_sides.items(): - size = border.get(qn('w:sz')) if border is not None else "none" + size = border.get(qn("w:sz")) if border is not None else "none" # Convert size from eighths of a point to points size_in_pt = str(float(size) / 8) if size and size != "none" else "none" @@ -1667,20 +1614,20 @@ def test_border_style_with_diff_formats(self): "top": {"color": "ADD8E6", "style": "single", "size": "1.0"}, "bottom": {"color": "none", "style": "none", "size": "none"}, "left": {"color": "000000", "style": "none", "size": "2.25"}, - "right": {"color": "000000", "style": "single", "size": "1.0"} + "right": {"color": "000000", "style": "single", "size": "1.0"}, }, { "top": {"color": "000000", "style": "single", "size": "3.75"}, "bottom": {"color": "none", "style": "none", "size": "none"}, "left": {"color": "000000", "style": "single", "size": "0.75"}, - "right": {"color": "773366", "style": "single", "size": "0.75"} + "right": {"color": "773366", "style": "single", "size": "0.75"}, }, { "top": {"color": "FFA500", "style": "single", "size": "1.0"}, "bottom": {"color": "FF00FF", "style": "single", "size": "3.75"}, "left": {"color": "000000", "style": "dashed", "size": "2.25"}, - "right": {"color": "none", "style": "none", "size": "none"} - } + "right": {"color": "none", "style": "none", "size": "none"}, + }, ] cell_idx = 0 @@ -1689,21 +1636,21 @@ def test_border_style_with_diff_formats(self): # Get the table cell element and properties tc = cell._tc tcPr = tc.get_or_add_tcPr() - tcBorders = tcPr.find(qn('w:tcBorders')) + tcBorders = tcPr.find(qn("w:tcBorders")) # Extract border properties border_sides = { - 'top': tcBorders.find(qn('w:top')) if tcBorders is not None else None, - 'bottom': tcBorders.find(qn('w:bottom')) if tcBorders is not None else None, - 'left': tcBorders.find(qn('w:left')) if tcBorders is not None else None, - 'right': tcBorders.find(qn('w:right')) if tcBorders is not None else None, + "top": tcBorders.find(qn("w:top")) if tcBorders is not None else None, + "bottom": tcBorders.find(qn("w:bottom")) if tcBorders is not None else None, + "left": tcBorders.find(qn("w:left")) if tcBorders is not None else None, + "right": tcBorders.find(qn("w:right")) if tcBorders is not None else None, } for side, border in border_sides.items(): if border is not None: - color = border.get(qn('w:color'), "").upper() # Ensure uppercase and no # - size = border.get(qn('w:sz')) - style = border.get(qn('w:val')) + color = border.get(qn("w:color"), "").upper() # Ensure uppercase and no # + size = border.get(qn("w:sz")) + style = border.get(qn("w:val")) else: color, size, style = "none", "none", "none" @@ -1731,7 +1678,7 @@ def test_border_style_with_diff_formats(self): def test_unbalanced_table(self): # A table with more td elements in latter rows than in the first - self.document.add_heading('Test: Handling unbalanced tables', level=1) + self.document.add_heading("Test: Handling unbalanced tables", level=1) html_unbalanced_table = """