Source code for docx_parser_converter.docx_to_html.converters.table_converter

from typing import List, Optional, Sequence, cast
from lxml import etree, html  # type: ignore
from docx_parser_converter.docx_parsers.models.table_models import (
    MarginProperties,
    Table,
    TableCell,
    TableCellProperties,
    TableProperties,
    TableRow,
    TableRowProperties,
)
from docx_parser_converter.docx_to_html.converters.paragraph_converter import (
    ParagraphConverter,
)
from docx_parser_converter.docx_to_html.converters.style_converter import (
    StyleConverter,
)

[docs] class TableConverter: """ A converter class for converting DOCX tables to HTML. """
[docs] @staticmethod def convert_table(table: Table) -> str: """ Converts a table to its HTML representation. Args: table (Table): The table to convert. Returns: str: The HTML representation of the table. Example: Given a table with properties, the output HTML string might look like: .. code-block:: html <table style="border-collapse: collapse; width:500pt;"> <colgroup> <col style="width:250pt;"> <col style="width:250pt;"> </colgroup> <tbody> <tr style="height:20pt;"> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> </tr> </tbody> </table> """ table_html = etree.Element("table") properties = table.properties or TableProperties.model_validate({}) table_properties_html = TableConverter.convert_table_properties(properties) table_html.set("style", table_properties_html) if table.grid and table.grid.columns: table_grid_html = TableConverter.convert_grid(table.grid.columns) table_html.append(html.fragment_fromstring(table_grid_html)) rows_html = TableConverter.convert_rows(table.rows, properties.tblCellMar) table_html.append(rows_html) return cast( str, html.tostring(table_html, pretty_print=True, encoding="unicode") )
[docs] @staticmethod def convert_table_properties(properties: Optional[TableProperties]) -> str: """ Converts table properties to an HTML style attribute. Args: properties: The table properties to convert. Returns: str: The HTML style attribute representing the table properties. Example: The output style attribute might look like: .. code-block:: css 'border-collapse: collapse; width:500pt; text-align:center; margin-left:20pt; padding: 5pt 5pt 5pt 5pt;' """ props = properties or TableProperties.model_validate({}) styles = ["border-collapse: collapse;"] # Ensure borders collapse if props.tblW and props.tblW.width is not None: styles.append(f"width:{props.tblW.width}pt;") if props.justification: justification_style = StyleConverter.convert_justification( props.justification ) if justification_style: styles.append(justification_style) if props.tblInd and props.tblInd.width is not None: styles.append(f"margin-left:{props.tblInd.width}pt;") if props.tblCellMar: styles.append( f"padding: {props.tblCellMar.top}pt {props.tblCellMar.right}pt " f"{props.tblCellMar.bottom}pt {props.tblCellMar.left}pt;" ) if props.tblLayout: styles.append(f"table-layout:{props.tblLayout};") return " ".join(styles)
[docs] @staticmethod def convert_grid(columns: Sequence[float]) -> str: """ Converts table grid columns to HTML. Args: columns: The grid columns widths. Returns: str: The HTML colgroup element representing the grid columns. Example: The output HTML colgroup might look like: .. code-block:: html <colgroup> <col style="width:250pt;"> <col style="width:250pt;"> </colgroup> """ colgroup = etree.Element("colgroup") for width in columns: etree.SubElement(colgroup, "col", style=f"width:{width}pt;") return cast( str, html.tostring(colgroup, pretty_print=True, encoding="unicode") )
[docs] @staticmethod def convert_rows( rows: Sequence[TableRow], tblCellMar: Optional[MarginProperties] ) -> etree.Element: """ Converts table rows to HTML. Args: rows: The list of table rows to convert. tblCellMar: The table cell margins to apply. Returns: etree.Element: The HTML tbody element representing the rows. Example: The output HTML tbody might look like: .. code-block:: html <tbody> <tr style="height:20pt;"> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> </tr> </tbody> """ tbody = etree.Element("tbody") for row in rows: row_html = TableConverter.convert_row(row, tblCellMar) tbody.append(row_html) return tbody
[docs] @staticmethod def convert_row(row: TableRow, tblCellMar: Optional[MarginProperties]) -> etree.Element: """ Converts a table row to HTML. Args: row: The row to convert. tblCellMar: The table cell margins to apply. Returns: etree.Element: The HTML tr element representing the row. Example: The output HTML tr might look like: .. code-block:: html <tr style="height:20pt;"> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> </tr> """ tr = etree.Element("tr") row_properties_html = TableConverter.convert_row_properties(row.properties) if row_properties_html: tr.set("style", row_properties_html) cells_html = TableConverter.convert_cells(row.cells, tblCellMar) for cell_html in cells_html: tr.append(cell_html) return tr
[docs] @staticmethod def convert_row_properties( properties: Optional[TableRowProperties], ) -> str: """ Converts row properties to an HTML style attribute. Args: properties: The row properties to convert. Returns: str: The HTML style attribute representing the row properties. Example: The output style attribute might look like: .. code-block:: css 'height:20pt; font-weight:bold;' """ row_props = properties or TableRowProperties.model_validate({}) styles: List[str] = [] if row_props.trHeight: tr_height = ( float(row_props.trHeight) if isinstance(row_props.trHeight, str) else row_props.trHeight ) if tr_height is not None: styles.append(f"height:{tr_height}pt;") if row_props.tblHeader: styles.append("font-weight:bold;") return " ".join(styles)
[docs] @staticmethod def convert_cells( cells: Sequence[TableCell], tblCellMar: Optional[MarginProperties] ) -> list: """ Converts table cells to HTML. Args: cells: The list of cells to convert. tblCellMar: The table cell margins to apply. Returns: list: The list of HTML td elements representing the cells. Example: The output HTML td elements might look like: .. code-block:: html <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> """ cell_elements = [] for cell in cells: td = etree.Element("td") cell_properties_html = TableConverter.convert_cell_properties(cell.properties, tblCellMar) td.set("style", cell_properties_html) if TableConverter.is_cell_empty(cell): # Check if the cell is empty td.text = "\u00A0" else: for paragraph in cell.paragraphs: paragraph_html = ParagraphConverter.convert_paragraph(paragraph, None) td.append(html.fragment_fromstring(paragraph_html)) cell_elements.append(td) return cell_elements
[docs] @staticmethod def is_cell_empty(cell) -> bool: """ Check if a cell is empty by verifying if all runs in all paragraphs have no contents. Args: cell: The cell to check. Returns: bool: True if the cell is empty, False otherwise. Example: .. code-block:: python if TableConverter.is_cell_empty(cell): print("Cell is empty") else: print("Cell is not empty") """ for paragraph in cell.paragraphs: for run in paragraph.runs: if run.contents: return False return True
[docs] @staticmethod def convert_cell_properties(properties, tblCellMar) -> str: """ Converts cell properties to an HTML style attribute. Args: properties: The cell properties to convert. tblCellMar: The table cell margins to apply. Returns: str: The HTML style attribute representing the cell properties. Defaults: - Word wrapping is enabled with "word-wrap: break-word;" and "word-break: break-all;". - Overflow handling is enabled with "overflow-wrap: break-word;" and "overflow: hidden;". - Vertical alignment defaults to "top". Example: The output style attribute might look like: .. code-block:: css 'width:250pt; border-top:1pt solid #000000; border-left:1pt solid #000000; border-bottom:1pt solid #000000; border-right:1pt solid #000000; background-color:#FFFFFF; padding:5pt 5pt 5pt 5pt; vertical-align:top;' """ cell_properties = properties or TableCellProperties.model_validate({}) styles = [ "word-wrap: break-word;", # Allow words to be broken at arbitrary points "word-break: break-all;", # Ensure long words break and wrap into next line "overflow-wrap: break-word;", # Handle long words in tables "overflow: hidden;" # Hide overflow content ] if cell_properties.tcW and cell_properties.tcW.width is not None: styles.append(f"width:{cell_properties.tcW.width}pt;") if cell_properties.tcBorders: borders = cell_properties.tcBorders if borders.top and borders.top.size is not None: styles.append( f"border-top:{borders.top.size / 8}pt " f"{TableConverter.map_border_style(borders.top.val)} #{borders.top.color};" ) if borders.left and borders.left.size is not None: styles.append( f"border-left:{borders.left.size / 8}pt " f"{TableConverter.map_border_style(borders.left.val)} #{borders.left.color};" ) if borders.bottom and borders.bottom.size is not None: styles.append( f"border-bottom:{borders.bottom.size / 8}pt " f"{TableConverter.map_border_style(borders.bottom.val)} #{borders.bottom.color};" ) if borders.right and borders.right.size is not None: styles.append( f"border-right:{borders.right.size / 8}pt " f"{TableConverter.map_border_style(borders.right.val)} #{borders.right.color};" ) if cell_properties.shd and cell_properties.shd.fill: styles.append(f"background-color:#{cell_properties.shd.fill};") if tblCellMar: styles.append( f"padding: {tblCellMar.top}pt {tblCellMar.right}pt " f"{tblCellMar.bottom}pt {tblCellMar.left}pt;" ) alignment_value = getattr(cell_properties, "verticalAlignment", None) if not alignment_value and cell_properties.vAlign: alignment_value = cell_properties.vAlign if alignment_value: styles.append( f"vertical-align:{TableConverter.map_vertical_alignment(alignment_value)};" ) else: styles.append("vertical-align:top;") text_alignment = getattr(cell_properties, "textAlignment", None) if text_alignment: styles.append(f"text-align:{text_alignment};") return " ".join(styles)
[docs] @staticmethod def map_border_style(val) -> str: """ Maps DOCX border style to CSS border style. Args: val: The DOCX border style. Returns: str: The CSS border style. Example: The output CSS border style might look like: .. code-block:: css 'solid' """ mapping = { "single": "solid", "double": "double", "dashed": "dashed", # Add more mappings as needed } return mapping.get(val, "solid") # Default to solid if style is unknown
[docs] @staticmethod def map_vertical_alignment(val) -> str: """ Maps DOCX vertical alignment to CSS vertical alignment. Args: val: The DOCX vertical alignment. Returns: str: The CSS vertical alignment. Example: The output CSS vertical alignment might look like: .. code-block:: css 'top' """ mapping = { "top": "top", "center": "middle", "bottom": "bottom", # Add more mappings as needed } return mapping.get(val, "top") # Default to top if alignment is unknown