Skip to content

Commit

Permalink
feat: prise en charge de liens multiples
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Feb 17, 2024
1 parent 4599ff1 commit cb6f86a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 17 deletions.
28 changes: 14 additions & 14 deletions alexi/analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ def img(self) -> str:
# should be done with the sequence CRF
SECTION = r"\b(?:article|chapitre|section|sous-section|annexe)s?"
NUMERO = r"[\d\.XIV]+"
NUMEROS = rf"{NUMERO}(?:(?:,|\s+(?:et|ou))\s+{NUMERO})*"
NUMEROS = rf"{NUMERO}(?P<numeros>(?:,|\s+(?:et|ou))\s+{NUMERO})*"
MILIEU = r"\btypes?\s+des?\s+milieux?"
MTYPE = r"[\dA-Z]+\.\d"
MTYPES = rf"{MTYPE}(?:(?:,|\s+(?:et|ou))\s+{MTYPE})*"
MTYPES = rf"{MTYPE}(?P<mtypes>(?:,|\s+(?:et|ou))\s+{MTYPE})*"
RLRQ = r"(?:c\.|(?:R\.?\s*)?[LR]\.?\s*R\.?\s*Q\.?)\s*,?[^\)]+"
REGNUM = rf"(?:(?:SQ-)?\d[\d\.A-Z-]+|\({RLRQ}\))"
REGLEMENT = rf"""
Expand All @@ -104,9 +104,9 @@ def img(self) -> str:
MATCHER = re.compile(
rf"""
(?:
(?:{SECTION}\s+(?P<numeros>{NUMEROS})
(?:{SECTION}\s+(?P<numero>{NUMEROS})
(?:\s+{DU}\s+{SECTION}\s+{NUMERO})*
|{MILIEU}\s+(?P<mtypes>{MTYPES}))
|{MILIEU}\s+(?P<mtype>{MTYPES}))
(?:\s+{DU}\s+(?:{REGLEMENT}|{LOI}))?
|{REGLEMENT}|{LOI})
""",
Expand All @@ -122,22 +122,22 @@ def match_links(text: str):
"""
for m in MATCHER.finditer(text):
if m.group("numeros") is not None:
before = re.sub(r"s$", "", text[: m.start("numeros")].strip())
after = text[m.end("numeros") :]
for num in NUMMATCH.finditer(m.group("numeros")):
before = re.sub(r"s$", "", text[m.start() : m.start("numero")].strip())
after = text[m.end("numero") : m.end()]
for num in NUMMATCH.finditer(m.group("numero")):
yield Hyperlien(
m.start("numeros") + num.start(),
m.start("numeros") + num.end(),
m.start("numero") + num.start(),
m.start("numero") + num.end(),
f"{before} {num.group()}{after}",
None,
)
elif m.group("mtypes") is not None:
before = text[: m.start("mtypes")]
after = text[m.end("mtypes") :]
for mt in MTMATCH.finditer(m.group("mtypes")):
before = text[m.start() : m.start("mtype")]
after = text[m.end("mtype") : m.end()]
for mt in MTMATCH.finditer(m.group("mtype")):
yield Hyperlien(
m.start("mtypes") + mt.start(),
m.start("mtypes") + mt.end(),
m.start("mtype") + mt.start(),
m.start("mtype") + mt.end(),
f"{before}{mt.group()}{after}",
None,
)
Expand Down
5 changes: 3 additions & 2 deletions alexi/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,9 @@ def bloc_html(self, bloc: Bloc) -> str:
link_text = text[link.start : link.end]
href = link.href
if href is None and self.resolver:
href = self.resolver(link_text, str(self.path), self.doc)
LOGGER.info("%s:%s -> %s", link_text, self.path, href)
href_text = link_text if link.alt is None else link.alt
href = self.resolver(href_text, str(self.path), self.doc)
LOGGER.info("%s:%s -> %s", href_text, self.path, href)
if href is None:
chunks.append(link_text)
else:
Expand Down
2 changes: 1 addition & 1 deletion test/test_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def test_locate_article(test_input, expected):
"types des milieux T5.1, T5.2, T5.3, ZC.1 et ZC.2 du Règlement de zonage 1314-2021-Z",
"types des milieux",
["T5.1", "T5.2", "T5.3", "ZC.1", "ZC.2"],
"du Règlement de zonage 1314-2021-Z",
"du Règlement de zonage",
),
]

Expand Down

0 comments on commit cb6f86a

Please sign in to comment.