Source code for docx_parser_converter.docx_to_html.converters.run_converter

from typing import Optional
from html import escape

from docx_parser_converter.docx_parsers.models.paragraph_models import (
    Run,
    Paragraph,
    TextContent,
    TabContent,
    BreakContent,
)
from docx_parser_converter.docx_parsers.models.styles_models import RunStyleProperties
from docx_parser_converter.docx_to_html.converters.style_converter import StyleConverter


[docs] class RunConverter: """ A converter class for converting DOCX runs to HTML. """
[docs] @staticmethod def convert_run(run: Run, paragraph: Paragraph) -> str: """ Converts a run to its HTML representation. Args: run (Run): The run to convert. paragraph (Paragraph): The paragraph containing the run. Returns: str: The HTML representation of the run. Example: Given a run with bold text and a tab, the output HTML string might look like: .. code-block:: html <span style="font-weight:bold;">This is bold text</span> <span style="display:inline-block; width:36pt;"></span> """ style_attr = RunConverter.convert_run_properties(run.properties) if RunConverter._run_requires_whitespace_preservation(run): style_attr = RunConverter._append_style_attribute(style_attr, "white-space:pre-wrap;") run_html = f"<span{style_attr}>" for content in run.contents: if isinstance(content.run, TabContent): tab_width = RunConverter.get_next_tab_width(paragraph) run_html += f'<span style="display:inline-block; width:{tab_width}pt;"></span>' elif isinstance(content.run, TextContent): run_html += escape(content.run.text, quote=False) elif isinstance(content.run, BreakContent): run_html += "<br/>" run_html += "</span>" return run_html
[docs] @staticmethod def get_next_tab_width(paragraph: Paragraph) -> float: """ Gets the width of the next tab stop for the paragraph. Args: paragraph (Paragraph): The paragraph containing the tab stop. Returns: float: The width of the next tab stop in points. Example: The following gets the next tab width: .. code-block:: python tab_width = RunConverter.get_next_tab_width(paragraph) print(tab_width) # Output: 36.0 """ if paragraph.properties.tabs: for tab in paragraph.properties.tabs: return tab.pos return 36.0
[docs] @staticmethod def convert_run_properties(properties: Optional[RunStyleProperties]) -> str: """ Converts run properties to an HTML style attribute. Args: properties (RunStyleProperties): The run style properties to convert. Returns: str: The HTML style attribute representing the run properties. Example: The output style attribute might look like: .. code-block:: html ' style="font-weight:bold;font-style:italic;color:#FF0000;font-family:Arial;font-size:12pt;"' """ if not properties: return "" style = "" if properties.bold: style += StyleConverter.convert_bold(properties.bold) if properties.italic: style += StyleConverter.convert_italic(properties.italic) decoration = StyleConverter.convert_underline( properties.underline, bool(properties.strikethrough), bool(properties.double_strikethrough), ) if decoration: style += decoration if properties.color: style += StyleConverter.convert_color(properties.color) if properties.underline_color: style += StyleConverter.convert_underline_color(properties.underline_color) if properties.highlight: style += StyleConverter.convert_highlight(properties.highlight) if properties.font: style += StyleConverter.convert_font(properties.font) if properties.size_pt: style += StyleConverter.convert_size(properties.size_pt) if properties.vertical_align or properties.text_position_pt is not None: style += StyleConverter.convert_vertical_align( properties.vertical_align, properties.text_position_pt, ) if properties.all_caps: style += StyleConverter.convert_all_caps(properties.all_caps) elif properties.small_caps: style += StyleConverter.convert_small_caps(properties.small_caps) return f' style="{style}"' if style else ""
@staticmethod def _append_style_attribute(style_attr: str, addition: str) -> str: """ Ensures the provided style string contains the additional declaration. """ if style_attr: return f'{style_attr[:-1]}{addition}"' return f' style="{addition}"' @staticmethod def _run_requires_whitespace_preservation(run: Run) -> bool: """ Determines if any text content inside the run requires preserving whitespace. """ for content in run.contents: if isinstance(content.run, TextContent): if RunConverter._text_requires_whitespace_preservation(content.run.text): return True return False @staticmethod def _text_requires_whitespace_preservation(text: str) -> bool: """ Returns True if the text contains whitespace that should be preserved. """ if not text: return False normalized = text.replace("\r\n", "\n") if "\n" in normalized or "\r" in text or "\t" in text: return True if text.startswith(" ") or text.endswith(" ") or " " in text: return True if "\n " in normalized or "\n\t" in normalized: return True return False