scripts/prepare_html_rules.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

#!/usr/bin/env python3
"""Add ids to all rules based on their headline"""
# TODO: support nested lists

import sys
import re

HEADER_RE = re.compile(r'<h\d')
DATA_NUMBER_RE = re.compile(r'data\-number="([^"]*)"')


def add_ids(ol):
    """add ids to all li"""
    _id = ""
    for s in ol.previous_siblings:
        if not s.name:
            continue
        if s.name.startswith("h"):
            _id = s.attrs["data-number"]
            break

    for i, li in enumerate(ol.find_all("li")):
        li["id"] = f"{_id}-{i+1}"
        li["class"] = "rule"


def extract_headline_nr(line: str) -> str:
    """Extract the headline's number from it's data-number attribute"""
    m = DATA_NUMBER_RE.search(line)
    if m:
        return m.group(1)

    return ""


def main():
    """Add ids to all ordered lists in the input"""
    headline = ""
    i = 0
    for line in sys.stdin.readlines():
        if HEADER_RE.match(line):
            headline = extract_headline_nr(line)
            h_idx = line.find('<h') + 3
            line = line[:h_idx] + ' class="rule"' + line[h_idx:]
        if '<ol' in line and headline:
            i = 1
        if '<li' in line and i:
            line = line.replace('<li', f'<li id="{headline}-{i}" class="rule"')
            i = i + 1
        if '</ol' in line:
            i = 0
        print(line, end="")


if __name__ == "__main__":
    main()