diff --git a/HISTORY.rst b/HISTORY.rst index f595038..c824cdb 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -12,7 +12,8 @@ Release History **Fixes** -- None +- Fixes `#73 `_: Error parsing styles with spaces. | `dfop02 `_ +- Fixes `#71 `_: Error applying color to table cells. | `vvalchev `_ **New Features** diff --git a/html4docx/constants.py b/html4docx/constants.py index 3ef9de5..de06b3a 100644 --- a/html4docx/constants.py +++ b/html4docx/constants.py @@ -161,3 +161,5 @@ def default_borders(): re.compile(r'page-break-after\s*:\s*always\s*(?:!important)?\s*(?:;|$)'), re.compile(r'break-after\s*:\s*page\s*(?:!important)?\s*(?:;|$)'), ) + +RGB_SPACES_REGEX = re.compile(r'(rgba?\()([^)]+)(\))', re.IGNORECASE) diff --git a/html4docx/h4d.py b/html4docx/h4d.py index 4fcbb99..69ac622 100644 --- a/html4docx/h4d.py +++ b/html4docx/h4d.py @@ -352,10 +352,11 @@ def parse_border_value(value: str): Parses a border value like: '1px solid #000000', 'solid 1px red', or '#000000 medium dashed' in any order. """ - parts = value.split() + value = value.strip() + parts = utils.normalize_rgb_spaces(value).split() # Return all default if there is only 'none' or empty - if (len(parts) == 1 and parts[0] == "none") or (not value or value.strip() == ""): + if (len(parts) == 1 and parts[0].lower() == "none") or (not value or value.strip() == ""): return default_size, default_style, default_color size = None @@ -1048,8 +1049,10 @@ def add_text_align_or_margin_to(self, obj, style): def add_styles_to_table_cell(self, styles, doc_cell, cell_row): """Styles that must be applied specifically in a _Cell object""" # Set background color + if "background-color" in styles: - self.set_cell_background(doc_cell, styles["background-color"]) + color = utils.parse_color(styles["background-color"], return_hex=True) + self.set_cell_background(doc_cell, color) # Set width (approximate, since DOCX uses different units) if "width" in styles: @@ -1733,7 +1736,8 @@ def handle_comment(self, data): # Style: Green color to mimic HTML comment styling dark_ish_green = "#008000" - run.font.color.rgb = utils.parse_color(dark_ish_green) + dark_ish_green_color = utils.parse_color(dark_ish_green) + run.font.color.rgb = RGBColor(*dark_ish_green_color) run.italic = True # makes it feel more like a comment def ignore_nested_tables(self, tables_soup): diff --git a/html4docx/utils.py b/html4docx/utils.py index 1ffcf14..9633e27 100644 --- a/html4docx/utils.py +++ b/html4docx/utils.py @@ -238,6 +238,22 @@ def parse_color(original_color: str, return_hex: bool = False): return rgb_to_hex(colors) if return_hex else colors +def normalize_rgb_spaces(value: str) -> str: + """ + Removes spaces inside rgb()/rgba() so it can be safely split. + Example: + rgb(222, 222, 222) -> rgb(222,222,222) + """ + + def _replace(match): + prefix, content, suffix = match.groups() + # remove spaces only inside the function + content = content.replace(" ", "") + return f"{prefix}{content}{suffix}" + + return constants.RGB_SPACES_REGEX.sub(_replace, value) + + def remove_last_occurence(ls, x): ls.pop(len(ls) - ls[::-1].index(x) - 1) diff --git a/tests/assets/htmls/tables3.html b/tests/assets/htmls/tables3.html index b7ff21d..4ec19c0 100644 --- a/tests/assets/htmls/tables3.html +++ b/tests/assets/htmls/tables3.html @@ -4,26 +4,26 @@ width="641"> -

CATEGORY

+ rgba(250, 195, 42, 1);border-left-style:none;border-right:1em solid #fac32a;border-top:1.0pt solid + rgb(250, 195, 42);height:23.75pt;padding:0in;width:222.2pt;" width="296">

OBSERVATIONS/COMMENTS

- NETHERLANDS COURTS   - GERMANY COURTS