Source code for docx_parser_converter.docx_to_html.converters.table_converter

from lxml import etree, html
from docx_parser_converter.docx_parsers.models.table_models import Table
from docx_parser_converter.docx_to_html.converters.paragraph_converter import ParagraphConverter

[docs] class TableConverter: """ A converter class for converting DOCX tables to HTML. """
[docs] @staticmethod def convert_table(table: Table) -> str: """ Converts a table to its HTML representation. Args: table (Table): The table to convert. Returns: str: The HTML representation of the table. Example: Given a table with properties, the output HTML string might look like: .. code-block:: html <table style="border-collapse: collapse; width:500pt;"> <colgroup> <col style="width:250pt;"> <col style="width:250pt;"> </colgroup> <tbody> <tr style="height:20pt;"> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> </tr> </tbody> </table> """ table_html = etree.Element("table") table_properties_html = TableConverter.convert_table_properties(table.properties) table_html.set("style", table_properties_html) table_grid_html = TableConverter.convert_grid(table.grid.columns) table_html.append(html.fragment_fromstring(table_grid_html)) rows_html = TableConverter.convert_rows(table.rows, table.properties.tblCellMar) table_html.append(rows_html) return html.tostring(table_html, pretty_print=True, encoding="unicode")
[docs] @staticmethod def convert_table_properties(properties) -> str: """ Converts table properties to an HTML style attribute. Args: properties: The table properties to convert. Returns: str: The HTML style attribute representing the table properties. Example: The output style attribute might look like: .. code-block:: css 'border-collapse: collapse; width:500pt; text-align:center; margin-left:20pt; padding: 5pt 5pt 5pt 5pt;' """ styles = ["border-collapse: collapse;"] # Ensure borders collapse if properties.tblW: styles.append(f"width:{properties.tblW.width}pt;") if properties.justification: styles.append(f"text-align:{properties.justification};") if properties.tblInd: styles.append(f"margin-left:{properties.tblInd.width}pt;") if properties.tblCellMar: styles.append(f"padding: {properties.tblCellMar.top}pt {properties.tblCellMar.right}pt {properties.tblCellMar.bottom}pt {properties.tblCellMar.left}pt;") if properties.tblLayout: styles.append(f"table-layout:{properties.tblLayout};") return " ".join(styles)
[docs] @staticmethod def convert_grid(columns) -> str: """ Converts table grid columns to HTML. Args: columns: The grid columns widths. Returns: str: The HTML colgroup element representing the grid columns. Example: The output HTML colgroup might look like: .. code-block:: html <colgroup> <col style="width:250pt;"> <col style="width:250pt;"> </colgroup> """ colgroup = etree.Element("colgroup") for width in columns: col = etree.SubElement(colgroup, "col", style=f"width:{width}pt;") return html.tostring(colgroup, pretty_print=True, encoding="unicode")
[docs] @staticmethod def convert_rows(rows, tblCellMar) -> etree.Element: """ Converts table rows to HTML. Args: rows: The list of table rows to convert. tblCellMar: The table cell margins to apply. Returns: etree.Element: The HTML tbody element representing the rows. Example: The output HTML tbody might look like: .. code-block:: html <tbody> <tr style="height:20pt;"> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> </tr> </tbody> """ tbody = etree.Element("tbody") for row in rows: row_html = TableConverter.convert_row(row, tblCellMar) tbody.append(row_html) return tbody
[docs] @staticmethod def convert_row(row, tblCellMar) -> etree.Element: """ Converts a table row to HTML. Args: row: The row to convert. tblCellMar: The table cell margins to apply. Returns: etree.Element: The HTML tr element representing the row. Example: The output HTML tr might look like: .. code-block:: html <tr style="height:20pt;"> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> </tr> """ tr = etree.Element("tr") row_properties_html = TableConverter.convert_row_properties(row.properties) tr.set("style", row_properties_html) cells_html = TableConverter.convert_cells(row.cells, tblCellMar) for cell_html in cells_html: tr.append(cell_html) return tr
[docs] @staticmethod def convert_row_properties(properties) -> str: """ Converts row properties to an HTML style attribute. Args: properties: The row properties to convert. Returns: str: The HTML style attribute representing the row properties. Example: The output style attribute might look like: .. code-block:: css 'height:20pt; font-weight:bold;' """ styles = [] if properties.trHeight: tr_height = float(properties.trHeight) if isinstance(properties.trHeight, str) else properties.trHeight styles.append(f"height:{tr_height}pt;") if properties.tblHeader: styles.append("font-weight:bold;") return " ".join(styles)
[docs] @staticmethod def convert_cells(cells, tblCellMar) -> list: """ Converts table cells to HTML. Args: cells: The list of cells to convert. tblCellMar: The table cell margins to apply. Returns: list: The list of HTML td elements representing the cells. Example: The output HTML td elements might look like: .. code-block:: html <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 1</td> <td style="width:250pt; padding:5pt 5pt 5pt 5pt;">Cell 2</td> """ cell_elements = [] for cell in cells: td = etree.Element("td") cell_properties_html = TableConverter.convert_cell_properties(cell.properties, tblCellMar) td.set("style", cell_properties_html) if TableConverter.is_cell_empty(cell): # Check if the cell is empty td.text = "\u00A0" else: for paragraph in cell.paragraphs: paragraph_html = ParagraphConverter.convert_paragraph(paragraph, None) td.append(html.fragment_fromstring(paragraph_html)) cell_elements.append(td) return cell_elements
[docs] @staticmethod def is_cell_empty(cell) -> bool: """ Check if a cell is empty by verifying if all runs in all paragraphs have no contents. Args: cell: The cell to check. Returns: bool: True if the cell is empty, False otherwise. Example: .. code-block:: python if TableConverter.is_cell_empty(cell): print("Cell is empty") else: print("Cell is not empty") """ for paragraph in cell.paragraphs: for run in paragraph.runs: if run.contents: return False return True
[docs] @staticmethod def convert_cell_properties(properties, tblCellMar) -> str: """ Converts cell properties to an HTML style attribute. Args: properties: The cell properties to convert. tblCellMar: The table cell margins to apply. Returns: str: The HTML style attribute representing the cell properties. Defaults: - Word wrapping is enabled with "word-wrap: break-word;" and "word-break: break-all;". - Overflow handling is enabled with "overflow-wrap: break-word;" and "overflow: hidden;". - Vertical alignment defaults to "top". Example: The output style attribute might look like: .. code-block:: css 'width:250pt; border-top:1pt solid #000000; border-left:1pt solid #000000; border-bottom:1pt solid #000000; border-right:1pt solid #000000; background-color:#FFFFFF; padding:5pt 5pt 5pt 5pt; vertical-align:top;' """ styles = [ "word-wrap: break-word;", # Allow words to be broken at arbitrary points "word-break: break-all;", # Ensure long words break and wrap into next line "overflow-wrap: break-word;", # Handle long words in tables "overflow: hidden;" # Hide overflow content ] if properties.tcW: styles.append(f"width:{properties.tcW.width}pt;") if properties.tcBorders: if properties.tcBorders.top: styles.append(f"border-top:{properties.tcBorders.top.size / 8}pt {TableConverter.map_border_style(properties.tcBorders.top.val)} #{properties.tcBorders.top.color};") if properties.tcBorders.left: styles.append(f"border-left:{properties.tcBorders.left.size / 8}pt {TableConverter.map_border_style(properties.tcBorders.left.val)} #{properties.tcBorders.left.color};") if properties.tcBorders.bottom: styles.append(f"border-bottom:{properties.tcBorders.bottom.size / 8}pt {TableConverter.map_border_style(properties.tcBorders.bottom.val)} #{properties.tcBorders.bottom.color};") if properties.tcBorders.right: styles.append(f"border-right:{properties.tcBorders.right.size / 8}pt {TableConverter.map_border_style(properties.tcBorders.right.val)} #{properties.tcBorders.right.color};") if properties.shd: styles.append(f"background-color:#{properties.shd.fill};") if tblCellMar: styles.append(f"padding: {tblCellMar.top}pt {tblCellMar.right}pt {tblCellMar.bottom}pt {tblCellMar.left}pt;") if hasattr(properties, 'verticalAlignment'): styles.append(f"vertical-align:{TableConverter.map_vertical_alignment(properties.verticalAlignment)};") else: styles.append("vertical-align: top;") # Default to top if not specified if hasattr(properties, 'textAlignment'): styles.append(f"text-align:{properties.textAlignment};") return " ".join(styles)
[docs] @staticmethod def map_border_style(val) -> str: """ Maps DOCX border style to CSS border style. Args: val: The DOCX border style. Returns: str: The CSS border style. Example: The output CSS border style might look like: .. code-block:: css 'solid' """ mapping = { "single": "solid", "double": "double", "dashed": "dashed", # Add more mappings as needed } return mapping.get(val, "solid") # Default to solid if style is unknown
[docs] @staticmethod def map_vertical_alignment(val) -> str: """ Maps DOCX vertical alignment to CSS vertical alignment. Args: val: The DOCX vertical alignment. Returns: str: The CSS vertical alignment. Example: The output CSS vertical alignment might look like: .. code-block:: css 'top' """ mapping = { "top": "top", "center": "middle", "bottom": "bottom", # Add more mappings as needed } return mapping.get(val, "top") # Default to top if alignment is unknown