Source code for docx_parser_converter.docx_to_html.docx_to_html_converter

import os
from docx_parser_converter.docx_to_html.docx_processor import DocxProcessor
from docx_parser_converter.docx_to_html.html_generator import HtmlGenerator
from docx_parser_converter.docx_parsers.utils import read_binary_from_file_path


[docs] class DocxToHtmlConverter: """ A converter class for converting DOCX files to HTML. """ def __init__(self, docx_file: bytes, use_default_values: bool = True): """ Initializes the DocxToHtmlConverter with the given DOCX file. Args: docx_file (bytes): The binary content of the DOCX file. use_default_values (bool): Whether to use default values for missing styles and numbering. Defaults to True. Example: The following is an example of how to initialize the DocxToHtmlConverter: .. code-block:: python docx_file_content = read_binary_from_file_path("path/to/your/docx_file.docx") converter = DocxToHtmlConverter(docx_file_content, use_default_values=True) """ self.docx_file = docx_file self.document_schema, self.styles_schema, self.numbering_schema = DocxProcessor.process_docx(docx_file)
[docs] def convert_to_html(self) -> str: """ Converts the DOCX file to HTML. Returns: str: The generated HTML content. Example: The following is an example of how to convert a DOCX file to HTML: .. code-block:: python html_content = converter.convert_to_html() print(html_content) """ return HtmlGenerator.generate_html(self.document_schema, self.numbering_schema)
[docs] def save_html_to_file(self, html_content: str, output_path: str) -> None: """ Saves the generated HTML content to a file. Args: html_content (str): The HTML content to save. output_path (str): The path to save the HTML file. Example: The following is an example of how to save HTML content to a file: .. code-block:: python converter.save_html_to_file(html_content, "path/to/output.html") """ try: with open(output_path, 'w', encoding='utf-8') as file: file.write(html_content) except Exception as e: print(f"Error: Failed to save HTML file. Error: {e}")
if __name__ == "__main__": # docx_path = "C:/Users/omerh/Desktop/docx_test.docx" docx_path = "C:/Users/omerh/Desktop/SAFEs for analysis/SAFE 1 - Cap Only.docx" html_output_path = "C:/Users/omerh/Desktop/new_newnewdocx1.html" if not os.path.exists(docx_path): print(f"File not found: {docx_path}") else: try: docx_file_content = read_binary_from_file_path(docx_path) except Exception as e: print(f"Error: Failed to read DOCX file. Error: {e}") else: converter = DocxToHtmlConverter(docx_file_content, use_default_values=True) html_output = converter.convert_to_html() converter.save_html_to_file(html_output, html_output_path) print(f"HTML file saved to: {html_output_path}")