diff options
-rw-r--r-- | .pre-commit-config.yaml | 9 | ||||
-rw-r--r-- | README.md | 7 | ||||
-rwxr-xr-x | check_copyright.py | 182 |
3 files changed, 198 insertions, 0 deletions
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index db9a019..49f0ccd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,3 +8,12 @@ repos: rev: v10.0.1 hooks: - id: clang-format + + - repo: local + hooks: + - id: copyright-check + name: copyright-check + description: Checks for a valid copyright header + entry: check_copyright.py + language: script + types_or: [c, c++] @@ -138,6 +138,13 @@ We are open for contributions. * Code should be formatted with clang-format using the project's .clang-format configuration. +We use [pre-commit](https://pre-commit.com/) for local git hooks to help ensure +code quality and standardization. To install the hooks run the following +commands in the root of the repository: + + $ pip install pre-commit + $ pre-commit install + Contributors are expected to abide by the [freedesktop.org code of conduct](https://www.freedesktop.org/wiki/CodeOfConduct/). diff --git a/check_copyright.py b/check_copyright.py new file mode 100755 index 0000000..855fe82 --- /dev/null +++ b/check_copyright.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2022 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +''' +This script will check the files passed as arguments for a valid copyright +header. It will run against all changed files as part of the pre-commit hook +after running `pre-commit install` in the root of the repository. It can also +be run manually e.g. `./check_copyright.py file1 dir1/file2` + +A valid copyright header includes the word 'Copyright' followed by the +relevant copyright years. It also requires an SPDX licence id string identifying +the license as MIT, as follows: 'SPDX-License-Identifier: MIT' +''' + +import datetime +import re +import sys +from typing import List + +# Set the limit on number of lines at top of file to search +MAX_SEARCH_LINES = 20 +CURRENT_YEAR = datetime.datetime.now().year + +# Regex for positive copyright string +COPYRIGHT_YEAR_REGEX = re.compile( + r".*\bCOPYRIGHT.*%s.*" % str(CURRENT_YEAR), re.IGNORECASE +) + +# Regex for positive SPDX id string +SPDX_REGEX = re.compile(r".*SPDX-License-Identifier: MIT.*", re.IGNORECASE) + +# To match eg: "Copyright (C) 2014-2021" +PATTERN_STRING = r"\bCopyright\b.*[0-9,)]" +PATTERN_COPYRIGHT = re.compile(PATTERN_STRING, re.IGNORECASE) + + +def generate_years_string(years: List[int]) -> str: + """ + Create a compacted string representation of a list of years. + + E.g. [1991, 2001, 2002, 2003, 2006, 2007] becomes "1991, 2001-2003, + 2006-2007" + """ + + generated_years_string = "" + if len(years) > 0: + y_mod_strings = ["%d" % years[0]] + + last_element_was_incremental = False + for i in range(1, len(years)): + # Are we in an incremental sequence? + if years[i] == years[i - 1] + 1: + + last_element_was_incremental = True + + # Are we at the last element? + if i == len(years) - 1: + y_mod_strings.append("-%d" % years[i]) + else: + continue + + else: + # End of a sequence? + if last_element_was_incremental: + y_mod_strings.append("-%d, " % years[i - 1]) + else: + y_mod_strings.append(", ") + + y_mod_strings.append("%d" % years[i]) + last_element_was_incremental = False + + generated_years_string = "".join(y_mod_strings) + + return generated_years_string + + +def parse_years_string(s: str) -> List[int]: + """ + Given the string "1999, 2001-2005" this function returns the list: + [1999, 2001, 2002, 2003, 2004, 2005] + """ + singles = re.findall(r"(?<![-\d])\d+(?![-\d])", s) + years = [int(x) for x in singles] + + ranges = re.findall(r"\d+-\d+", s) + for r in ranges: + limits = re.findall(r"\d+", r) + years.extend(range(int(limits[0]), int(limits[1]) + 1)) + + years = list(set(y for y in years if 1900 < y <= CURRENT_YEAR)) + years.sort() + + return years + + +def update_header(filename: str) -> None: + """ + Updates the Copyright header in 'filename' to hold the correct years. + """ + + with open(filename, "r+", encoding="utf-8") as file_handle: + file_data = file_handle.read() + + copyright_match = re.search(PATTERN_COPYRIGHT, file_data) + if copyright_match: + + notice_years = parse_years_string(copyright_match.group(0)) + + if not notice_years or notice_years[-1] != CURRENT_YEAR: + notice_years.append(CURRENT_YEAR) + + years_string = generate_years_string(notice_years) + + file_data = re.sub( + PATTERN_COPYRIGHT, "Copyright (c) %s" % years_string, file_data, 1 + ) + file_handle.seek(0) + file_handle.write(file_data) + file_handle.truncate() + + +bad_copyright_files = [] +bad_spdx_files = [] + +for changed_file in sys.argv[1:]: + copyright_found = False + spdx_found = False + + with open(changed_file, encoding="utf-8") as f: + for line_num, line in enumerate(f): + if line_num > MAX_SEARCH_LINES: + break + if COPYRIGHT_YEAR_REGEX.match(line): + copyright_found = True + if SPDX_REGEX.match(line): + spdx_found = True + + if not copyright_found: + bad_copyright_files.append(changed_file) + update_header(changed_file) + if not spdx_found: + bad_spdx_files.append(changed_file) + +if bad_copyright_files: + print( + "The following files did not have a valid copyright header: " + + str(bad_copyright_files) + + "\nAn attempted fix may have been made please check the files and re-commit", + file=sys.stderr, + ) + +if bad_spdx_files: + print( + "The following files do not have a valid SPDX licence identifier: " + + str(bad_spdx_files) + + "\nPlease add the identifier as follows 'SPDX-License-Identifier: MIT'", + file=sys.stderr, + ) + +if bad_copyright_files or bad_spdx_files: + sys.exit(1) |