from docx_parser_converter.docx_parsers.models.paragraph_models import Paragraph
from docx_parser_converter.docx_to_txt.converters.run_converter import RunConverter
from docx_parser_converter.docx_to_txt.converters.numbering_converter import NumberingConverter
[docs]
class ParagraphConverter:
"""
Class to convert paragraphs to plain text.
"""
[docs]
@staticmethod
def convert_paragraph(paragraph: Paragraph, numbering_schema, indent: bool) -> str:
"""
Convert a paragraph to plain text.
Args:
paragraph (Paragraph): The paragraph object.
numbering_schema: The numbering schema.
indent (bool): Whether to apply indentation.
Returns:
str: Plain text representation of the paragraph.
Example:
.. code-block:: python
paragraph_text = ParagraphConverter.convert_paragraph(paragraph, numbering_schema, indent=True)
print(paragraph_text)
"""
paragraph_text = ""
if paragraph.numbering:
paragraph_text += NumberingConverter.convert_numbering(paragraph, numbering_schema)
for run in paragraph.runs:
paragraph_text += RunConverter.convert_run(run)
if indent and paragraph.properties and paragraph.properties.indent:
indent_value = paragraph.properties.indent.left_pt or 0
paragraph_text = ParagraphConverter.add_indentation(paragraph_text, indent_value)
return paragraph_text
[docs]
@staticmethod
def add_indentation(text: str, indent_value: float) -> str:
"""
Add indentation to the text based on the indent value in points.
Args:
text (str): The text to indent.
indent_value (float): The indentation value in points.
Returns:
str: The indented text.
Example:
.. code-block:: python
indented_text = ParagraphConverter.add_indentation("This is a test.", 72)
print(indented_text) # Output: "\t\tThis is a test."
"""
tab_size_in_points = 36
num_tabs = int(indent_value // tab_size_in_points)
remaining_points = indent_value % tab_size_in_points
num_spaces = int(remaining_points / (tab_size_in_points / 4)) # Assume 4 spaces per tab size
return "\t" * num_tabs + " " * num_spaces + text
[docs]
@staticmethod
def add_spacing(prev_paragraph: Paragraph, curr_paragraph: Paragraph) -> str:
"""
Add spacing between paragraphs based on their spacing properties.
Args:
prev_paragraph (Paragraph): The previous paragraph.
curr_paragraph (Paragraph): The current paragraph.
Returns:
str: Newlines to add for spacing.
Example:
.. code-block:: python
spacing = ParagraphConverter.add_spacing(prev_paragraph, curr_paragraph)
print(spacing) # Output: "\n\n" (depending on spacing properties)
"""
spacing_after = prev_paragraph.properties.spacing.after_pt if prev_paragraph.properties and prev_paragraph.properties.spacing and prev_paragraph.properties.spacing.after_pt is not None else 0
spacing_before = curr_paragraph.properties.spacing.before_pt if curr_paragraph.properties and curr_paragraph.properties.spacing and curr_paragraph.properties.spacing.before_pt is not None else 0
# Total spacing in points
total_spacing_points = spacing_after + spacing_before
# Convert points to newlines (1 newline = 12 points, assuming standard line height)
# Use a threshold of 6 points for adding a newline
threshold = 6
num_newlines = int((total_spacing_points + threshold) // 12)
return "\n" * num_newlines
[docs]
@staticmethod
def convert_paragraph_properties(properties, indent: bool) -> str:
"""
Convert paragraph properties to text format.
Args:
properties: The paragraph properties.
indent (bool): Whether to apply indentation.
Returns:
str: Text representation of paragraph properties.
Example:
.. code-block:: python
paragraph_properties_text = ParagraphConverter.convert_paragraph_properties(properties, indent=True)
print(paragraph_properties_text)
"""
# Convert properties if needed
return ""