390
391
# search to allow leading jank words
391
392
re_copyright_line = re.compile(r'''
392
(?:(?:Copyright|Copyr\.)(?:\s*©)?|
393
©\s*(?:Copyright|Copyr\.)|
394
©)\s*(?P<copyright>.*)$
393
(?:(?:Copyright|Copyr\.)\s*©\s*|
394
©\s*(?:Copyright|Copyr\.)\s+|
395
(?:Copyright:?|Copyr\.)\s+|
396
©\s*)(?P<copyright>[^\s].*)$
395
397
''', re.IGNORECASE | re.VERBOSE)
397
399
def clean_copyright(line):
437
439
(?:Copyright|Copyr\.|\(C\)|©|\\\(co) # fake )
438
440
''', re.IGNORECASE | re.VERBOSE)
442
# matching line is excluded to be identified as copyright.
440
443
re_copyright_mark_exclude = re.compile(r'''(?:
442
Copyright\s+holder| # copyright holders ...
443
copyright\s+and\s+license| # copyright and license
444
Copyright[!-')-~]| # no coding variables
445
Copyright:?$| # ... copyright
445
[^h][-+*/_a-su-z0-9]\(C\)| # C MACRO (but Copyright(C) is not included)
446
446
if\s+\(C\)| # C code
447
^modified\s+version| # ... of Copyright
448
[^h][-+*/_a-su-z0-9]\(C\)| # C MACRO (but Copyright(C) is OK)
447
switch\s+\(C\)| # C code
449
448
(?:def|if|return)\s.*\(C\)| # Python/C
450
switch\s+\(C\)| # C code
451
^This\s # This ... Copyright
449
/Copyright| # file name
450
Copyright[^\s(:]| # text or variable name
452
Copyright\s+notice| # text
453
Copyright\s+holder| # text
454
Copyright\s+section| # text
455
Copyright\s+stanza| # text
456
copyright\s+file| # text
457
copyright\s+and\s+license| # text
458
of\s+copyright| # text
459
their\s+copyright| # text
460
the\s+copyright| # text
461
^This\s.*copyright # text
452
462
)''', re.IGNORECASE | re.VERBOSE)
454
464
re_copyright_nomark_year = re.compile(r'''
529
540
^This\sfile\sis\smaintained| # Automake files
530
541
^Do\sall\sthe\swork\sfor\sAutomake| # aclocal.m4 Automake
531
542
^Originally\s+written\s+by\+.{10,20}?\s+Please\s+send\spatches| # config.guess
543
^Please\s+note\s+that\s+the| # Makefile.in.in (gettext)
532
544
^Please\s+send\s+patches\s+with| # config.sub
533
545
^Please\s+send\s+patches\s+to| # config.sub, config.guess
534
546
^if\s+not\s+1,\s+datestamp\s+to\s+the\s+version\s+number| # configure.ac
699
713
debmake.debug.debug('Da: {}'.format(line), type='a')
700
714
if copyright_data == {} and license_lines == []:
701
715
# no copyright and no license
702
copyright_data = {'__NO_COPYRIGHT_NOR_LICENSE__':(0, 0)}
716
copyright_data = {'__NO_COPYRIGHT_NOR_LICENSE__':(9999, 0)}
703
717
elif copyright_data == {}:
704
718
# no copyright and but with license (Maybe __UNKNOWN__ license)
705
copyright_data = {'__NO_COPYRIGHT__':(0, 0)}
719
copyright_data = {'__NO_COPYRIGHT__':(9999, 0)}
706
720
return (copyright_data, license_lines)
708
722
###################################################################
726
740
return (copyright_data, license_lines)
728
742
###################################################################
743
# Check autogenerated files
744
###################################################################
745
re_autofiles = re.compile(r'''(
746
^Makefile.in$| # Autotools
747
^.*/Makefile\.in$| # Autotools
748
^aclocal.m4$| # Autotools
749
^build-aux/.*$| # Autotools
750
^compile$| # Autotools
751
^config\.guess$| # Autotools
752
^config\.status$| # Autotools
753
^config\.sub$| # Autotools
754
^config\.rpath$| # Autotools
755
^configure$| # Autotools
756
^depcomp$| # Autotools
757
^install-sh$| # Autotools
758
^libltdl/.*$| # Autotools
759
^libtool$| # Autotools
760
^ltmain.sh$| # Autotools
761
^missing$| # Autotools
762
^py-compile$| # Autotools
763
^test-driver$| # Autotools
764
^po/Makefile$| # Autotools (getttext)
765
^po/Makefile\.in$| # Autotools (gettext)
766
^po/Makefile\.in\.in$| # Autotools (gettext)
767
^po/Makevars$| # Autotools (gettext)
768
^m4/.*$ # Autotools (no | at the end)
769
)''', re.IGNORECASE | re.VERBOSE)
771
###################################################################
729
772
# Check all appearing copyright and license texts
730
773
###################################################################
731
774
# data[*][0]: license name ID: licenseid
733
776
# data[*][2]: copyright holder info (data=dictionary): copyright_lines
734
777
# data[*][3]: license text (original: list of lines): license_lines
735
778
###################################################################
736
def check_all_licenses(files, encoding='utf-8', mode=0):
779
def check_all_licenses(files, encoding='utf-8', mode=0, pedantic=False):
738
781
license_cache = {} # hashtable for quicker license scan
782
md5hash = hashlib.md5()
783
licensetext0 = '\n Auto-generated file under the permissive license.'
784
md5hash.update(licensetext0.encode())
785
md5hashkey0 = md5hash.hexdigest()
786
license_cache[md5hashkey0] = ('__AUTO_PERMISSIVE__', licensetext0, True)
739
787
if len(files) == 0:
740
788
print('W: check_all_licenses(files) should have files', file=sys.stderr)
741
789
if sys.hexversion >= 0x03030000: # Python 3.3 ...
746
794
if sys.hexversion >= 0x03030000: # Python 3.3 ...
747
795
print('.', file=sys.stderr, end='', flush=True)
748
796
(copyright_data, license_lines) = check_license(file, encoding=encoding)
797
debmake.debug.debug('Dc: copyright_data = {}'.format(copyright_data), type='c')
749
798
norm_text = debmake.lc.normalize(license_lines)
750
799
md5hash = hashlib.md5()
751
800
md5hash.update(norm_text.encode())
752
801
md5hashkey = md5hash.hexdigest()
753
802
if md5hashkey in license_cache.keys():
754
(licenseid, licensetext) = license_cache[md5hashkey]
756
(licenseid, licensetext) = debmake.lc.lc(norm_text, license_lines, mode)
757
license_cache[md5hashkey] = (licenseid, licensetext)
803
(licenseid, licensetext, permissive) = license_cache[md5hashkey]
805
(licenseid, licensetext, permissive) = debmake.lc.lc(norm_text, license_lines, mode)
806
license_cache[md5hashkey] = (licenseid, licensetext, permissive)
807
if not pedantic and permissive and re_autofiles.search(file):
808
debmake.debug.debug('Dl: LICENSE ID = __AUTO_PERMISSIVE__ from {}'.format(licenseid), type='l')
809
licenseid = '__AUTO_PERMISSIVE__'
810
licensetext = licensetext0
811
md5hashkey = md5hashkey0
813
debmake.debug.debug('Dl: LICENSE ID = {}'.format(licenseid), type='l')
758
814
adata.append((md5hashkey, copyright_data, licenseid, licensetext, file))
760
816
print('W: check_all_licenses on non-existing file: {}'.format(file), file=sys.stderr)
761
817
for c in copyright_data.keys():
762
debmake.debug.debug('Dc: {}-{}: {}'.format(copyright_data[c][0], copyright_data[c][0], c), type='c')
818
debmake.debug.debug('Dc: {}-{}: {}'.format(copyright_data[c][0], copyright_data[c][1], c), type='c')
763
819
for l in license_lines:
764
820
debmake.debug.debug('Dl: {}'.format(l), type='l')
765
821
print('\nI: check_all_licenses completed for {} files.'.format(len(files)), file=sys.stderr)
787
843
sortkey = '{0:03} {1:02} {2} {3}'.format(max(0, 1000 - len(bunched_files)), min(99, len(licenseid)), licenseid, md5hashkey)
788
844
bunched_files = sorted(bunched_files)
789
845
copyright_list = []
790
for name, (year_min, year_max) in copyright_data.items():
846
for name, (year_min, year_max) in bunched_copyright_data.items():
791
847
copyright_list.append((year_min, year_max, name))
792
848
copyright_list = sorted(copyright_list)
793
849
bdata.append((sortkey, bunched_files, copyright_list, licenseid, licensetext))
815
871
cdata.append((licenseid, licensetext, bunched_files, copyright_lines))
818
def check_copyright(files, mode=0, encoding='utf-8'):
874
def check_copyright(files, mode=0, encoding='utf-8', pedantic=False):
819
875
print('I: check_all_licenses', file=sys.stderr)
820
adata = check_all_licenses(files, encoding=encoding, mode=mode)
876
adata = check_all_licenses(files, encoding=encoding, mode=mode, pedantic=pedantic)
821
877
print('I: bunch_all_licenses', file=sys.stderr)
822
878
bdata = bunch_all_licenses(adata)
823
879
print('I: format_all_licenses', file=sys.stderr)
860
916
#######################################################################
862
918
#######################################################################
863
def copyright(package_name, license_file_masks, data, xml_html_files, binary_files, huge_files, mode=0):
919
def copyright(package_name, license_file_masks, data, xml_html_files, binary_files, huge_files, mode=0, tutorial=False):
864
920
# mode: 0: not -c, 1: -c simple, 2: -cc normal, 3: -ccc extensive
865
921
# -1: -cccc debug simple, -2 -ccccc debug normal -3 -cccccc debug extensive
866
922
# make text to print
868
924
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
869
925
Upstream-Name: {}
870
926
Source: <url://example.com>
927
'''.format(package_name)
872
930
### Uncomment the following 2 lines to enable uscan to exclude non-DFSG components
873
931
### Files-Excluded: command/non-dfsg.exe
874
932
### docs/source/javascripts/jquery-1.7.1.min.js
884
942
### Please avoid to pick license terms that are more restrictive than the
885
943
### packaged work, as it may make Debian's contributions unacceptable upstream.
887
'''.format(package_name)
888
948
for (licenseid, licensetext, files, copyright_lines) in data:
890
950
text += 'Files: {}\n'.format('\n '.join(files))