Skip to content

Commit

Permalink
Merge pull request #602 from carmenbianca/special-parse-endings
Browse files Browse the repository at this point in the history
Strip special non-comment-style endings from regex matches
  • Loading branch information
carmenbianca committed Oct 25, 2022
2 parents a095aaa + 5769d18 commit d9c0d27
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 7 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ The versions follow [semantic versioning](https://semver.org).

- Apache Velocity Template (Extensions: `.vm`, `.vtl`)

- Some special endings are always stripped from copyright and licensing
statements (#602):

- `">` (and variations such as `'>`, `" >`, and `"/>`)
- `] ::`

### Changed

- Updated PyPI development status to 'production/stable' (#381)
Expand Down
34 changes: 27 additions & 7 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from difflib import SequenceMatcher
from gettext import gettext as _
from hashlib import sha1
from itertools import chain
from os import PathLike
from pathlib import Path
from typing import BinaryIO, Iterator, List, Optional, Set
Expand All @@ -42,14 +43,32 @@
_LOGGER = logging.getLogger(__name__)
_LICENSING = Licensing()

_END_PATTERN = r"{}$".format( # pylint: disable=consider-using-f-string
# REUSE-IgnoreStart

_END_PATTERN = r"{}$".format(
"".join(
{
r"(?:{})*".format( # pylint: disable=consider-using-f-string
re.escape(style.MULTI_LINE.end)
r"(?:{})*".format(item) # pylint: disable=consider-using-f-string
for item in chain(
(
re.escape(style.MULTI_LINE.end)
for style in _all_style_classes()
if style.MULTI_LINE.end
),
# These are special endings which do not belong to specific
# comment styles, but which we want to nonetheless strip away
# while parsing.
(
ending
for ending in [
# ex: <tag value="Copyright Jane Doe">
r'"\s*/*>',
r"'\s*/*>",
# ex: [SPDX-License-Identifier: GPL-3.0-or-later] ::
r"\]\s*::",
]
),
)
for style in _all_style_classes()
if style.MULTI_LINE.end
}
)
)
Expand All @@ -75,14 +94,12 @@
]

_COPYRIGHT_STYLES = {
# REUSE-IgnoreStart
"spdx": "SPDX-FileCopyrightText:",
"spdx-symbol": "SPDX-FileCopyrightText: ©",
"string": "Copyright",
"string-c": "Copyright (C)",
"string-symbol": "Copyright ©",
"symbol": "©",
# REUSE-IgnoreEnd
}

# Amount of bytes that we assume will be big enough to contain the entire
Expand Down Expand Up @@ -492,3 +509,6 @@ def detect_line_endings(text: str) -> str:
if line_ending in text:
return line_ending
return os.linesep


# REUSE-IgnoreEnd
21 changes: 21 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,27 @@ def test_extract_sameline_multiline():
assert result.copyright_lines == {"SPDX-FileCopyrightText: Jane Doe"}


def test_extract_special_endings():
"""Strip some non-comment-style endings from the end of copyright and
licensing information.
"""
text = cleandoc(
"""
<tag value="Copyright 2019 Jane Doe">
<tag value="Copyright 2019 John Doe" >
<tag value="Copyright 2019 Joe Somebody" />
<tag value='Copyright 2019 Alice'>
<tag value='Copyright 2019 Bob' >
<tag value='Copyright 2019 Eve' />
[Copyright 2019 Ajnulo] ::
"""
)
result = _util.extract_spdx_info(text)
for item in result.copyright_lines:
assert ">" not in item
assert "] ::" not in item


def test_filter_ignore_block_with_comment_style():
"""Test that the ignore block is properly removed if start and end markers
are in comment style.
Expand Down

0 comments on commit d9c0d27

Please sign in to comment.