From 0352e09dfd79dc39e43074f57aaf83be3b22c3ad Mon Sep 17 00:00:00 2001 From: Alberto Soragna Date: Tue, 18 Oct 2022 07:35:54 +0900 Subject: [PATCH 1/2] various fixes to ament-copyright and add --allowed-licenses argument Signed-off-by: Alberto Soragna --- ament_copyright/ament_copyright/main.py | 44 ++++++++++---- ament_copyright/ament_copyright/parser.py | 72 ++++++++++++++--------- ament_copyright/test/test_parser.py | 10 ++-- 3 files changed, 83 insertions(+), 43 deletions(-) diff --git a/ament_copyright/ament_copyright/main.py b/ament_copyright/ament_copyright/main.py index 3077ca1c3..833ba11e7 100644 --- a/ament_copyright/ament_copyright/main.py +++ b/ament_copyright/ament_copyright/main.py @@ -29,11 +29,11 @@ from ament_copyright import UNKNOWN_IDENTIFIER from ament_copyright.crawler import get_files from ament_copyright.parser import get_comment_block +from ament_copyright.parser import get_copyright_information_regex from ament_copyright.parser import get_index_of_next_line from ament_copyright.parser import parse_file from ament_copyright.parser import scan_past_coding_and_shebang_lines from ament_copyright.parser import scan_past_empty_lines -from ament_copyright.parser import search_copyright_information def main(argv=sys.argv[1:]): @@ -61,6 +61,13 @@ def main(argv=sys.argv[1:]): default=[], dest='excludes', help='The filenames to exclude.') + parser.add_argument( + '--allowed-licenses', + metavar='license name', + nargs='+', + default=[], + dest='allowed_licenses', + help='List of valid licenses.') group = parser.add_mutually_exclusive_group() group.add_argument( '--add-missing', @@ -112,9 +119,20 @@ def main(argv=sys.argv[1:]): if not filenames: print('No repository roots and files found') + # if user has specified a list of allowed licenses, use only those + if args.allowed_licenses: + allowed_licenses = {} + for license_name in args.allowed_licenses: + if license_name in licenses: + allowed_licenses[license_name] = licenses[license_name] + else: + assert False, 'Requested unknown license: ' + license_name + else: + allowed_licenses = licenses + file_descriptors = {} for filename in sorted(filenames): - file_descriptors[filename] = parse_file(filename) + file_descriptors[filename] = parse_file(filename, allowed_licenses) if args.add_missing: name = names.get(args.add_missing[0], args.add_missing[0]) @@ -274,7 +292,7 @@ def add_copyright_year(file_descriptors, new_years, verbose): file_descriptor = file_descriptors[path] # ignore files which do not have a header - if not getattr(file_descriptor, 'copyright_identifier', None): + if not getattr(file_descriptor, 'copyright_identifiers', None): continue index = scan_past_coding_and_shebang_lines(file_descriptor.content) @@ -287,10 +305,13 @@ def add_copyright_year(file_descriptors, new_years, verbose): else: block = file_descriptor.content[index:] block_offset = 0 - copyright_span, years_span, name_span = search_copyright_information(block) - if copyright_span is None: + + regex = get_copyright_information_regex() + match = regex.search(block) + if not match: assert False, "Could not find copyright information in file '%s'" % \ file_descriptor.path + years_span, _ = match.span(1), match.span(2) # skip if all new years are already included years = get_years_from_string(block[years_span[0]:years_span[1]]) @@ -311,12 +332,13 @@ def add_copyright_year(file_descriptors, new_years, verbose): file_descriptor.content[global_years_span[1]:] # output beginning of file for debugging - # index = global_years_span[0] - # for _ in range(3): - # index = get_index_of_next_line(content, index) - # print('<<<') - # print(content[:index - 1]) - # print('>>>') + if verbose: + index = global_years_span[0] + for _ in range(3): + index = get_index_of_next_line(content, index) + print('<<<') + print(content[:index - 1]) + print('>>>') with open(file_descriptor.path, 'w', encoding='utf-8') as h: h.write(content) diff --git a/ament_copyright/ament_copyright/parser.py b/ament_copyright/ament_copyright/parser.py index ef19d99a7..6d2740e83 100644 --- a/ament_copyright/ament_copyright/parser.py +++ b/ament_copyright/ament_copyright/parser.py @@ -52,7 +52,7 @@ def read(self): with open(self.path, 'r', encoding='utf-8') as h: self.content = h.read() - def parse(self): + def parse(self, allowed_licenses): raise NotImplementedError() def identify_license(self, content, license_part, licenses=None): @@ -104,7 +104,7 @@ def identify_copyright(self): else: self.copyright_identifiers.append(UNKNOWN_IDENTIFIER) - def parse(self): + def parse(self, allowed_licenses): self.read() if not self.content: return @@ -113,36 +113,48 @@ def parse(self): index = scan_past_coding_and_shebang_lines(self.content) index = scan_past_empty_lines(self.content, index) - # get first comment block without leading comment tokens - block, _ = get_comment_block(self.content, index) - copyrights, remaining_block = search_copyright_information(block) - - if len(copyrights) == 0: - block = get_multiline_comment_block(self.content, index) + def parse_comment_block(block): copyrights, remaining_block = search_copyright_information(block) - - if len(copyrights) == 0: - return - - self.copyrights = copyrights + self.copyrights += copyrights + # if we haven't found a license yet, try to identify it in this block + # in case of files with multiple licenses, we only consider the first one found + # an example is if you copy a file with an existing license and then you prepend yours + if self.license_identifier == UNKNOWN_IDENTIFIER: + license_text = '{copyright}' + remaining_block + self.identify_license(license_text, 'file_headers', allowed_licenses) + + # parse all single-line comment blocks for copyright information + tmp_index = index + while True: + block, tmp_index = get_comment_block(self.content, tmp_index) + if block: + parse_comment_block(block) + else: + break + + # parse all multi-line comment blocks for copyright information + tmp_index = index + while True: + block, tmp_index = get_multiline_comment_block(self.content, tmp_index) + if block: + parse_comment_block(block) + else: + break self.identify_copyright() - content = '{copyright}' + remaining_block - self.identify_license(content, 'file_headers') - class ContributingDescriptor(FileDescriptor): def __init__(self, path): super(ContributingDescriptor, self).__init__(CONTRIBUTING_FILETYPE, path) - def parse(self): + def parse(self, allowed_licenses): self.read() if not self.content: return - self.identify_license(self.content, 'contributing_files') + self.identify_license(self.content, 'contributing_files', allowed_licenses) class LicenseDescriptor(FileDescriptor): @@ -150,15 +162,15 @@ class LicenseDescriptor(FileDescriptor): def __init__(self, path): super(LicenseDescriptor, self).__init__(LICENSE_FILETYPE, path) - def parse(self): + def parse(self, allowed_licenses): self.read() if not self.content: return - self.identify_license(self.content, 'license_files') + self.identify_license(self.content, 'license_files', allowed_licenses) -def parse_file(path): +def parse_file(path, allowed_licenses): filetype = determine_filetype(path) if filetype == SOURCE_FILETYPE: d = SourceDescriptor(path) @@ -168,7 +180,7 @@ def parse_file(path): d = LicenseDescriptor(path) else: return None - d.parse() + d.parse(allowed_licenses) return d @@ -180,9 +192,7 @@ def determine_filetype(path): return SOURCE_FILETYPE -def search_copyright_information(content): - if content is None: - return [], content +def get_copyright_information_regex(): # regex for matching years or year ranges (yyyy-yyyy) separated by colons year = r'\d{4}' year_range = '%s-%s' % (year, year) @@ -191,6 +201,13 @@ def search_copyright_information(content): r'copyright(?:\s+\(c\))?\s+(%s(?:,\s*%s)*),?\s+([^\n\r]+)$' % \ (year_or_year_range, year_or_year_range) regex = re.compile(pattern, re.DOTALL | re.MULTILINE | re.IGNORECASE) + return regex + + +def search_copyright_information(content): + if content is None: + return [], content + regex = get_copyright_information_regex() copyrights = [] while True: @@ -297,6 +314,7 @@ def get_multiline_comment_block(content, index): start_match = start_regex.search(content, index) if not start_match: continue + comment_token = start_match.group(1) start_index = start_match.start(1) # find the first match of the comment end token @@ -323,8 +341,8 @@ def get_multiline_comment_block(content, index): # Single-line header does not have a common prefix to strip out lines = prefixed_lines - return '\n'.join(lines) - return None + return '\n'.join(lines), start_index + len(comment_token) + 1 + return None, index def scan_past_empty_lines(content, index): diff --git a/ament_copyright/test/test_parser.py b/ament_copyright/test/test_parser.py index f36453e92..d42230da3 100644 --- a/ament_copyright/test/test_parser.py +++ b/ament_copyright/test/test_parser.py @@ -271,7 +271,7 @@ def test_get_comment_block_slashes2(): """ index = 0 index = scan_past_empty_lines(commented_content, index) - block = get_multiline_comment_block(commented_content, index) + block, _ = get_multiline_comment_block(commented_content, index) assert block is not None assert block == 'ddd' @@ -320,7 +320,7 @@ def test_get_multiline_comment_block_cstyle(): """ index = 0 index = scan_past_empty_lines(commented_content, index) - block = get_multiline_comment_block(commented_content, index) + block, _ = get_multiline_comment_block(commented_content, index) assert block is not None assert block == '\n'.join(['aaa', 'bbb', 'ccc']) @@ -339,7 +339,7 @@ def test_get_multiline_comment_block_cstyle2(): """ index = 0 index = scan_past_empty_lines(commented_content, index) - block = get_multiline_comment_block(commented_content, index) + block, _ = get_multiline_comment_block(commented_content, index) assert block is not None assert block == '\n'.join(['aaa', 'bbb', 'ccc']) @@ -355,7 +355,7 @@ def test_get_multiline_comment_block_xmlstyle(): """ index = 0 index = scan_past_empty_lines(commented_content, index) - block = get_multiline_comment_block(commented_content, index) + block, _ = get_multiline_comment_block(commented_content, index) assert block is not None assert block == '\n'.join(['aaa', 'bbb', 'ccc']) @@ -371,6 +371,6 @@ def test_get_multiline_comment_block_xmlstyle_prefixed(): """ index = 0 index = scan_past_empty_lines(commented_content, index) - block = get_multiline_comment_block(commented_content, index) + block, _ = get_multiline_comment_block(commented_content, index) assert block is not None assert block == '\n'.join(['aaa', 'bbb', 'ccc']) From b85188ebe68461391a12ffdcb058fbb77de3c920 Mon Sep 17 00:00:00 2001 From: Alberto Soragna Date: Tue, 18 Oct 2022 07:40:22 +0900 Subject: [PATCH 2/2] remove un-needed operation Signed-off-by: Alberto Soragna --- ament_copyright/ament_copyright/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ament_copyright/ament_copyright/main.py b/ament_copyright/ament_copyright/main.py index 833ba11e7..fa8dc9846 100644 --- a/ament_copyright/ament_copyright/main.py +++ b/ament_copyright/ament_copyright/main.py @@ -311,7 +311,7 @@ def add_copyright_year(file_descriptors, new_years, verbose): if not match: assert False, "Could not find copyright information in file '%s'" % \ file_descriptor.path - years_span, _ = match.span(1), match.span(2) + years_span = match.span(1) # skip if all new years are already included years = get_years_from_string(block[years_span[0]:years_span[1]])