blob: 5ad060cd2f0b74f5008dd22bff8a58ef20436d2a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
import os
import re
import sys
def check_encoding(encoding, scan_dir, regex_pattern):
fname = None
try:
assert encoding in ['ascii', 'utf-8'], "unexpected encoding"
cmp = re.compile(regex_pattern)
for root, dirs, files in os.walk(scan_dir):
fname = root
cmp_list = [f for f in files if cmp.search(f) is not None]
for f in cmp_list:
fname = os.path.join(root, f)
with open(fname, mode='rb') as test_file:
btext = test_file.read()
# check encoding
btext.decode(encoding=encoding, errors="strict")
if encoding == "utf-8" and btext.startswith(b'\xEF\xBB\xBF'):
raise ValueError("unexpected BOM in file")
# check strict CRLF line-ending
LF = btext.count(b'\r')
CRLF = btext.count(b'\r\n')
assert LF >= CRLF, "CRLF logic error"
if CRLF != LF:
raise ValueError("CRLF violation: found {} LF characters".format(LF - CRLF))
except Exception as err:
print("ERROR with [{}]: {}".format(fname, err))
return -1
else:
return 0
if __name__ == "__main__":
# python check-sources.sh.py 'ascii' '.' '.*\.(cpp|h)$'
res = check_encoding(sys.argv[1], sys.argv[2], sys.argv[3])
sys.exit(0 if res == 0 else -1)
|