fix-copyright.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #!/usr/bin/env python
  2. from subprocess import check_output as run
  3. from datetime import datetime
  4. from itertools import groupby
  5. from operator import itemgetter
  6. import re
  7. import magic
  8. def authors(filename):
  9. log = run(['git', 'log', '--follow',
  10. '--date=short','--format=%aN%x09%ad', filename],
  11. universal_newlines=True)
  12. for line in log.splitlines():
  13. author, date = line.split('\t')
  14. if author != 'fix-copyright.py':
  15. yield author, datetime.strptime(date, '%Y-%m-%d')
  16. def new_copyright(filename, previous):
  17. def f():
  18. au = list(authors(filename))
  19. alldates = map(itemgetter(1), au)
  20. aup = sorted(au + map(lambda a: (a, None), previous), key=itemgetter(0))
  21. for author, records in groupby(aup, itemgetter(0)):
  22. dates = filter(None, map(itemgetter(1), records))
  23. if not dates: dates = alldates
  24. start = min(dates)
  25. end = max(dates)
  26. fmt = '{0}' if start.year == end.year else '{0}-{1}'
  27. line = 'Copyright ' + fmt.format(start.year, end.year) + ' ' + author
  28. key = (start, author)
  29. yield key, line
  30. return map(itemgetter(1), sorted(f()))
  31. def fix_copyright(filename):
  32. # Find copyright block in original file
  33. prefix = set()
  34. names = []
  35. lines = []
  36. with open(filename, 'r') as f:
  37. content = list(f)
  38. for i, line in enumerate(content[:15]):
  39. m = re.match(r'^(?P<prefix>\W*)(\(c\))?\s*?copyright\s*(\(c\))?\s+\d{4}(\s*-\s*\d{4})?\s+(?P<name>.+?)\s*$', line, re.IGNORECASE)
  40. if m:
  41. d = m.groupdict()
  42. prefix.add(d['prefix'])
  43. lines.append(i)
  44. names.append(d['name'].strip())
  45. if len(prefix) != 1:
  46. print 'Not found:', filename
  47. return
  48. prefix = list(prefix)[0]
  49. print filename
  50. new = iter(new_copyright(filename, names))
  51. with open(filename, 'w') as f:
  52. for i, line in enumerate(content):
  53. if i in lines:
  54. for repl in new:
  55. print >>f, prefix + repl
  56. else:
  57. print >>f, line,
  58. pass
  59. def all_files():
  60. ls = run(['git', 'ls-files'], universal_newlines=True)
  61. for filename in ls.splitlines():
  62. if magic.from_file(filename, mime=True).split('/')[0] == 'text':
  63. yield filename
  64. for f in all_files():
  65. fix_copyright(f)