from lxml import etree
from typing import List
from docx_parser_converter.docx_parsers.helpers.common_helpers import extract_element, NAMESPACE_URI
from docx_parser_converter.docx_parsers.models.paragraph_models import Run, RunContent, TextContent, TabContent
from docx_parser_converter.docx_parsers.models.styles_models import RunStyleProperties
from docx_parser_converter.docx_parsers.styles.run_properties_parser import RunPropertiesParser
[docs]
class RunParser:
"""
A parser for extracting run elements from the DOCX document structure.
This class handles the extraction of run properties and contents within a
run element, converting them into a structured Run object for further
processing or conversion to other formats like HTML.
"""
[docs]
def parse(self, r: etree.Element) -> Run:
"""
Parses a run from the given XML element.
Args:
r (etree.Element): The run XML element.
Returns:
Run: The parsed run.
Example:
The following is an example of a run element in a document.xml file:
.. code-block:: xml
<w:r>
<w:rPr>
<w:b/>
<w:color w:val="FF0000"/>
</w:rPr>
<w:t>Example text</w:t>
</w:r>
"""
rPr = extract_element(r, ".//w:rPr")
run_properties = RunPropertiesParser().parse(rPr) if rPr else RunStyleProperties()
contents = self.extract_run_contents(r)
return Run(contents=contents, properties=run_properties)