from docx_parser_converter.docx_parsers.models.paragraph_models import Paragraph
from docx_parser_converter.docx_parsers.models.numbering_models import NumberingLevel, NumberingSchema
"""
TODO:
Add style application of the numbering itself
(requires updating the DocumentParser and the ParagraphStyleProperties to add
another property of RunStyleProperties, in addition to the RunStyleProperties
that the Run model has)
"""
[docs]
class NumberingConverter:
"""
A converter class for handling numbered paragraphs in DOCX documents.
"""
_numbering_counters = {}
[docs]
@staticmethod
def reset_counters():
NumberingConverter._numbering_counters = {}
[docs]
@staticmethod
def convert_numbering(paragraph: Paragraph, numbering_schema: NumberingSchema) -> str:
"""
Converts the numbering for a given paragraph to its HTML representation.
Args:
paragraph (Paragraph): The paragraph containing the numbering to convert.
numbering_schema: The schema containing numbering definitions.
Returns:
str: The HTML representation of the numbering.
Example:
Given a paragraph with numbering, the output HTML string might look like:
.. code-block:: html
<span style="font-family:Times New Roman;">1.</span><span style="padding-left:7.2pt;"></span>
<span style="font-family:Times New Roman;">I.</span><span style="padding-left:7.2pt;"></span>
"""
numbering = paragraph.numbering
try:
numbering_level = NumberingConverter.get_numbering_level(numbering_schema, numbering.numId, numbering.ilvl)
except Exception as e:
print(f"Warning: {e}")
return "•"
if numbering.numId not in NumberingConverter._numbering_counters:
NumberingConverter._numbering_counters[numbering.numId] = [0] * 9 # Supports up to 9 levels
NumberingConverter._numbering_counters[numbering.numId][numbering.ilvl] += 1
# Reset counters for deeper levels if a higher level is incremented
for i in range(numbering.ilvl + 1, 9):
NumberingConverter._numbering_counters[numbering.numId][i] = 0
counters = NumberingConverter._numbering_counters[numbering.numId][:numbering.ilvl + 1]
formatted_counters = [NumberingConverter.format_number(counters[i], numbering_level.numFmt) for i in range(numbering.ilvl + 1)]
# Replace all placeholders in lvlText
lvlText = numbering_level.lvlText
for i in range(1, numbering.ilvl + 2):
lvlText = lvlText.replace(f"%{i}", formatted_counters[i-1])
indent_left_pt = numbering_level.indent.left_pt if numbering_level.indent and numbering_level.indent.left_pt else 0
firstline_indent_pt = numbering_level.indent.firstline_pt if numbering_level.indent and numbering_level.indent.firstline_pt else 0
def get_char_width(char):
if char.isdigit() or char.isalpha():
return 7.2
elif char in ('.', '(', ')'):
return 3.6
return 7.2
numbering_text_length_pt = sum(get_char_width(c) for c in lvlText)
if numbering_level.tab_pt:
net_padding = numbering_level.tab_pt - (indent_left_pt + firstline_indent_pt) - numbering_text_length_pt
padding_style = f"padding-left:{max(net_padding, 7.2)}pt;"
if numbering_level.fonts and numbering_level.fonts.ascii:
font_style = f"font-family:{numbering_level.fonts.ascii};"
return f'<span style="{font_style}">{lvlText}</span><span style="{padding_style}"></span>'
return f'<span>{lvlText}</span><span style="{padding_style}"></span>'
if numbering_level.fonts and numbering_level.fonts.ascii:
font_style = f"font-family:{numbering_level.fonts.ascii};"
return f'<span style="{font_style}">{lvlText}</span><span style="padding-left:7.2pt;"></span>'
return f'{lvlText}<span style="padding-left:7.2pt;"></span>'
[docs]
@staticmethod
def get_numbering_level(numbering_schema, numId: int, ilvl: int) -> NumberingLevel:
"""
Retrieves the numbering level from the numbering schema.
Args:
numbering_schema: The schema containing numbering definitions.
numId (int): The numbering ID.
ilvl (int): The numbering level.
Returns:
NumberingLevel: The retrieved numbering level.
Raises:
ValueError: If the numbering level is not found.
Example:
The numbering level might be represented in the schema as:
.. code-block:: xml
<w:num w:numId="1">
<w:abstractNumId w:val="0"/>
</w:num>
<w:abstractNum w:abstractNumId="0">
<w:lvl w:ilvl="0">
<w:start w:val="1"/>
<w:numFmt w:val="decimal"/>
<w:lvlText w:val="%1."/>
<w:lvlJc w:val="left"/>
<w:pPr>
<w:ind w:left="720" w:hanging="360"/>
</w:pPr>
<w:rPr>
<w:rFonts w:ascii="Times New Roman"/>
</w:rPr>
</w:lvl>
</w:abstractNum>
"""
instance = next((inst for inst in numbering_schema.instances if inst.numId == numId), None)
if instance:
level = next((lvl for lvl in instance.levels if lvl.ilvl == ilvl), None)
if level:
return level
raise ValueError(f"Numbering level not found for numId: {numId}, ilvl: {ilvl}")
[docs]
@staticmethod
def to_roman(num: int) -> str:
"""
Converts a number to its Roman numeral representation.
Args:
num (int): The number to convert.
Returns:
str: The Roman numeral representation.
Example:
The following converts a number to Roman numeral:
.. code-block:: python
NumberingConverter.to_roman(1) # "I"
"""
val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
syb = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"]
roman_num = ''
i = 0
while num > 0:
for _ in range(num // val[i]):
roman_num += syb[i]
num -= val[i]
i += 1
return roman_num
[docs]
@staticmethod
def to_upper_letter(num: int) -> str:
"""
Converts a number to its uppercase letter representation.
Args:
num (int): The number to convert.
Returns:
str: The uppercase letter representation.
Example:
The following converts a number to an uppercase letter:
.. code-block:: python
NumberingConverter.to_upper_letter(1) # "A"
"""
return chr(64 + num)
[docs]
@staticmethod
def to_lower_letter(num: int) -> str:
"""
Converts a number to its lowercase letter representation.
Args:
num (int): The number to convert.
Returns:
str: The lowercase letter representation.
Example:
The following converts a number to a lowercase letter:
.. code-block:: python
NumberingConverter.to_lower_letter(1) # "a"
"""
return chr(96 + num)