Skip to content

Commit

Permalink
fix(autofixes): use NO-BREAK SPACE with colons in French and Breton (#…
Browse files Browse the repository at this point in the history
…12549)

* fix(autofixes): use NO-BREAK SPACE with colons in French and Breton

The French style guide recommends a non-breaking space before a colon
character, not a narrow one.

Fixes #11237

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ib and pre-commit-ci[bot] committed Sep 21, 2024
1 parent 4e8f273 commit 402c806
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 10 deletions.
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Not yet released.
* :ref:`mt-deepl` now supports specifying translation context.
* :ref:`mt-aws` now supports :ref:`glossary-mt`.
* :ref:`autofix` for Devanagari danda now better handles latin script.
* :ref:`autofix` for French and Breton now uses a non-breaking space before colons instead of a narrow one.

**Bug fixes**

Expand Down
32 changes: 26 additions & 6 deletions weblate/checks/chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,22 @@

from weblate.trans.models import Unit

FRENCH_PUNCTUATION = {";", ":", "?", "!"}
FRENCH_PUNCTUATION_NBSP = {":"}
FRENCH_PUNCTUATION_NNBSP = {";", "?", "!"}
FRENCH_PUNCTUATION = FRENCH_PUNCTUATION_NBSP.union(FRENCH_PUNCTUATION_NNBSP)
FRENCH_PUNCTUATION_SPACING = {"Zs", "Ps", "Pe"}
FRENCH_PUNCTUATION_FIXUP_RE = "([ \u00a0\u2009])([{}])".format(
"".join(FRENCH_PUNCTUATION)
FRENCH_PUNCTUATION_FIXUP_RE_NBSP = "([ \u2009\u202f])([{}])".format(
"".join(FRENCH_PUNCTUATION_NBSP)
)
FRENCH_PUNCTUATION_FIXUP_RE_NNBSP = "([ \u00a0\u2009])([{}])".format(
"".join(FRENCH_PUNCTUATION_NNBSP)
)
FRENCH_PUNCTUATION_MISSING_RE_NBSP = "([^\u00a0])([{}])".format(
"".join(FRENCH_PUNCTUATION_NBSP)
)
FRENCH_PUNCTUATION_MISSING_RE_NNBSP = "([^\u202f])([{}])".format(
"".join(FRENCH_PUNCTUATION_NNBSP)
)
FRENCH_PUNCTUATION_MISSING_RE = "([^\u202f])([{}])".format("".join(FRENCH_PUNCTUATION))
MY_QUESTION_MARK = "\u1038\u104b"
INTERROBANGS = ("?!", "!?", "?!", "!?", "⁈", "⁉")

Expand Down Expand Up @@ -487,13 +497,23 @@ def get_fixup(self, unit: Unit) -> Iterable[tuple[str, str, str]] | None:
return [
# First fix possibly wrong whitespace
(
FRENCH_PUNCTUATION_FIXUP_RE,
FRENCH_PUNCTUATION_FIXUP_RE_NBSP,
"\u00a0$2",
"gu",
),
(
FRENCH_PUNCTUATION_FIXUP_RE_NNBSP,
"\u202f$2",
"gu",
),
# Then add missing ones
(
FRENCH_PUNCTUATION_MISSING_RE,
FRENCH_PUNCTUATION_MISSING_RE_NBSP,
"$1\u00a0$2",
"gu",
),
(
FRENCH_PUNCTUATION_MISSING_RE_NNBSP,
"$1\u202f$2",
"gu",
),
Expand Down
8 changes: 6 additions & 2 deletions weblate/trans/autofixes/chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from django.utils.translation import gettext_lazy

from weblate.checks.chars import (
FRENCH_PUNCTUATION_FIXUP_RE,
FRENCH_PUNCTUATION_FIXUP_RE_NBSP,
FRENCH_PUNCTUATION_FIXUP_RE_NNBSP,
EndEllipsisCheck,
PunctuationSpacingCheck,
ZeroWidthSpaceCheck,
Expand Down Expand Up @@ -111,7 +112,10 @@ def fix_single_target(
and "ignore-punctuation-spacing" not in unit.all_flags
):
# Fix existing
new_target = re.sub(FRENCH_PUNCTUATION_FIXUP_RE, "\u202f\\2", target)
new_target = re.sub(FRENCH_PUNCTUATION_FIXUP_RE_NBSP, "\u00a0\\2", target)
new_target = re.sub(
FRENCH_PUNCTUATION_FIXUP_RE_NNBSP, "\u202f\\2", new_target
)
# Do not add missing as that is likely to trigger issues with other content
# such as URLs or Markdown syntax.
return new_target, new_target != target
Expand Down
4 changes: 2 additions & 2 deletions weblate/trans/tests/test_autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ def test_punctuation_spacing(self) -> None:
cs_unit = MockUnit(source="Foo:", code="cs")
self.assertEqual(fix.fix_target(["Bar:"], non_unit), (["Bar:"], False))
self.assertEqual(
fix.fix_target(["Bar\u202f:"], fr_unit), (["Bar\u202f:"], False)
fix.fix_target(["Bar\u00a0:"], fr_unit), (["Bar\u00a0:"], False)
)
self.assertEqual(fix.fix_target(["Bar :"], fr_unit), (["Bar\u202f:"], True))
self.assertEqual(fix.fix_target(["Bar :"], fr_unit), (["Bar\u00a0:"], True))
self.assertEqual(fix.fix_target(["Bar:"], fr_unit), (["Bar:"], False))
self.assertEqual(fix.fix_target(["Bar:"], fr_ca_unit), (["Bar:"], False))
self.assertEqual(fix.fix_target(["Bar:"], cs_unit), (["Bar:"], False))

0 comments on commit 402c806

Please sign in to comment.