summaryrefslogtreecommitdiff
path: root/tools/build_cleaner.py
blob: 04e2089e1103a89da1e118ff8ca5786e52ce1685 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python3
# Copyright (c) the JPEG XL Project Authors. All rights reserved.
#
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.


"""build_cleaner.py: Update build files.

This tool keeps certain parts of the build files up to date.
"""

import argparse
import collections
import locale
import os
import re
import subprocess
import sys
import tempfile


def RepoFiles(src_dir):
  """Return the list of files from the source git repository"""
  git_bin = os.environ.get('GIT_BIN', 'git')
  files = subprocess.check_output([git_bin, '-C', src_dir, 'ls-files'])
  ret = files.decode(locale.getpreferredencoding()).splitlines()
  ret.sort()
  return ret

def GetPrefixLibFiles(repo_files, prefix, suffixes=('.h', '.cc', '.ui')):
  """Gets the library files that start with the prefix and end with source
  code suffix."""
  prefix_files = [
      fn for fn in repo_files
      if fn.startswith(prefix) and any(fn.endswith(suf) for suf in suffixes)]
  return prefix_files

# Type holding the different types of sources in libjxl:
#   * decoder and common sources,
#   * encoder-only sources,
#   * tests-only sources,
#   * google benchmark sources,
#   * threads library sources,
#   * extras library sources,
#   * libjxl (encoder+decoder) public include/ headers and
#   * threads public include/ headers.
JxlSources = collections.namedtuple(
    'JxlSources', ['dec', 'enc', 'test', 'gbench', 'threads',
                   'extras', 'jxl_public_hdrs', 'threads_public_hdrs'])

def SplitLibFiles(repo_files):
  """Splits the library files into the different groups.

  """
  testonly = (
      'testdata.h', 'test_utils.h', '_test.h', '_test.cc',
      # _testonly.* files are library code used in tests only.
      '_testonly.h', '_testonly.cc'
  )
  main_srcs = GetPrefixLibFiles(repo_files, 'lib/jxl/')
  extras_srcs = GetPrefixLibFiles(repo_files, 'lib/extras/')
  test_srcs = [fn for fn in main_srcs
               if any(patt in fn for patt in testonly)]
  lib_srcs = [fn for fn in main_srcs
              if not any(patt in fn for patt in testonly)]

  # Google benchmark sources.
  gbench_srcs = sorted(fn for fn in lib_srcs + extras_srcs
                       if fn.endswith('_gbench.cc'))
  lib_srcs = [fn for fn in lib_srcs if fn not in gbench_srcs]
  # Exclude optional codecs from extras.
  exclude_extras = ['/codec_gif', '/codec_apng', '/codec_exr']
  extras_srcs = [fn for fn in extras_srcs if fn not in gbench_srcs and
                 not any(patt in fn for patt in testonly) and
                 not any(patt in fn for patt in exclude_extras)]


  enc_srcs = [fn for fn in lib_srcs
              if os.path.basename(fn).startswith('enc_') or
                 os.path.basename(fn).startswith('butteraugli')]
  enc_srcs.extend([
      "lib/jxl/encode.cc",
      "lib/jxl/encode_internal.h",
      "lib/jxl/gaborish.cc",
      "lib/jxl/gaborish.h",
      "lib/jxl/huffman_tree.cc",
      "lib/jxl/huffman_tree.h",
      # Only the inlines in linalg.h header are used in the decoder.
      # TODO(deymo): split out encoder only linalg.h functions.
      "lib/jxl/linalg.cc",
      "lib/jxl/optimize.cc",
      "lib/jxl/optimize.h",
      "lib/jxl/progressive_split.cc",
      "lib/jxl/progressive_split.h",
      # TODO(deymo): Add luminance.cc and luminance.h here too. Currently used
      # by aux_out.h.
      # dec_file is not intended to be part of the decoder library, so move it
      # to the encoder source set
      "lib/jxl/dec_file.cc",
      "lib/jxl/dec_file.h",
  ])
  # Temporarily remove enc_bit_writer from the encoder sources: a lot of
  # decoder source code still needs to be split up into encoder and decoder.
  # Including the enc_bit_writer in the decoder allows to build a working
  # libjxl_dec library.
  # TODO(lode): remove the dependencies of the decoder on enc_bit_writer and
  # remove enc_bit_writer from the dec_srcs again.
  enc_srcs.remove("lib/jxl/enc_bit_writer.cc")
  enc_srcs.remove("lib/jxl/enc_bit_writer.h")
  enc_srcs.sort()

  enc_srcs_set = set(enc_srcs)
  lib_srcs = [fn for fn in lib_srcs if fn not in enc_srcs_set]

  # The remaining of the files are in the dec_library.
  dec_srcs = lib_srcs

  thread_srcs = GetPrefixLibFiles(repo_files, 'lib/threads/')
  thread_srcs = [fn for fn in thread_srcs
                 if not any(patt in fn for patt in testonly)]
  public_hdrs = GetPrefixLibFiles(repo_files, 'lib/include/jxl/')

  threads_public_hdrs = [fn for fn in public_hdrs if '_parallel_runner' in fn]
  jxl_public_hdrs = list(sorted(set(public_hdrs) - set(threads_public_hdrs)))
  return JxlSources(dec_srcs, enc_srcs, test_srcs, gbench_srcs, thread_srcs,
                    extras_srcs, jxl_public_hdrs, threads_public_hdrs)


def CleanFile(args, filename, pattern_data_list):
  """Replace a pattern match with new data in the passed file.

  Given a regular expression pattern with a single () match, it runs the regex
  over the passed filename and replaces the match () with the new data. If
  args.update is set, it will update the file with the new contents, otherwise
  it will return True when no changes were needed.

  Multiple pairs of (regular expression, new data) can be passed to the
  pattern_data_list parameter and will be applied in order.

  The regular expression must match at least once in the file.
  """
  filepath = os.path.join(args.src_dir, filename)
  with open(filepath, 'r') as f:
    src_text = f.read()

  if not pattern_data_list:
    return True

  new_text = src_text

  for pattern, data in pattern_data_list:
    offset = 0
    chunks = []
    for match in re.finditer(pattern, new_text):
      chunks.append(new_text[offset:match.start(1)])
      offset = match.end(1)
      chunks.append(data)
    if not chunks:
      raise Exception('Pattern not found for %s: %r' % (filename, pattern))
    chunks.append(new_text[offset:])
    new_text = ''.join(chunks)

  if new_text == src_text:
    return True

  if args.update:
    print('Updating %s' % filename)
    with open(filepath, 'w') as f:
      f.write(new_text)
    return True
  else:
    with tempfile.NamedTemporaryFile(
        mode='w', prefix=os.path.basename(filename)) as new_file:
      new_file.write(new_text)
      new_file.flush()
      subprocess.call(
          ['diff', '-u', filepath, '--label', 'a/' + filename, new_file.name,
           '--label', 'b/' + filename])
    return False


def BuildCleaner(args):
  repo_files = RepoFiles(args.src_dir)
  ok = True

  # jxl version
  with open(os.path.join(args.src_dir, 'lib/CMakeLists.txt'), 'r') as f:
    cmake_text = f.read()

  gni_patterns = []
  for varname in ('JPEGXL_MAJOR_VERSION', 'JPEGXL_MINOR_VERSION',
                  'JPEGXL_PATCH_VERSION'):
    # Defined in CMakeLists.txt as "set(varname 1234)"
    match = re.search(r'set\(' + varname + r' ([0-9]+)\)', cmake_text)
    version_value = match.group(1)
    gni_patterns.append((r'"' + varname + r'=([0-9]+)"', version_value))

  jxl_src = SplitLibFiles(repo_files)

  # libjxl
  jxl_cmake_patterns = []
  jxl_cmake_patterns.append(
      (r'set\(JPEGXL_INTERNAL_SOURCES_DEC\n([^\)]+)\)',
       ''.join('  %s\n' % fn[len('lib/'):] for fn in jxl_src.dec)))
  jxl_cmake_patterns.append(
      (r'set\(JPEGXL_INTERNAL_SOURCES_ENC\n([^\)]+)\)',
       ''.join('  %s\n' % fn[len('lib/'):] for fn in jxl_src.enc)))
  ok = CleanFile(
      args, 'lib/jxl.cmake',
      jxl_cmake_patterns) and ok

  ok = CleanFile(
      args, 'lib/jxl_benchmark.cmake',
      [(r'set\(JPEGXL_INTERNAL_SOURCES_GBENCH\n([^\)]+)\)',
        ''.join('  %s\n' % fn[len('lib/'):] for fn in jxl_src.gbench))]) and ok

  gni_patterns.append((
      r'libjxl_dec_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):] for fn in jxl_src.dec)))
  gni_patterns.append((
      r'libjxl_enc_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):] for fn in jxl_src.enc)))
  gni_patterns.append((
      r'libjxl_gbench_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):] for fn in jxl_src.gbench)))


  tests = [fn[len('lib/'):] for fn in jxl_src.test if fn.endswith('_test.cc')]
  testlib = [fn[len('lib/'):] for fn in jxl_src.test
             if not fn.endswith('_test.cc')]
  gni_patterns.append((
      r'libjxl_tests_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn for fn in tests)))
  gni_patterns.append((
      r'libjxl_testlib_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn for fn in testlib)))

  # libjxl_threads
  ok = CleanFile(
      args, 'lib/jxl_threads.cmake',
      [(r'set\(JPEGXL_THREADS_SOURCES\n([^\)]+)\)',
        ''.join('  %s\n' % fn[len('lib/'):] for fn in jxl_src.threads))]) and ok

  gni_patterns.append((
      r'libjxl_threads_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):] for fn in jxl_src.threads)))

  # libjxl_extras
  ok = CleanFile(
      args, 'lib/jxl_extras.cmake',
      [(r'set\(JPEGXL_EXTRAS_SOURCES\n([^\)]+)\)',
        ''.join('  %s\n' % fn[len('lib/'):] for fn in jxl_src.extras))]) and ok

  gni_patterns.append((
      r'libjxl_extras_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):] for fn in jxl_src.extras)))

  # libjxl_profiler
  profiler_srcs = [fn[len('lib/'):] for fn in repo_files
                   if fn.startswith('lib/profiler')]
  ok = CleanFile(
      args, 'lib/jxl_profiler.cmake',
      [(r'set\(JPEGXL_PROFILER_SOURCES\n([^\)]+)\)',
        ''.join('  %s\n' % fn for fn in profiler_srcs))]) and ok

  gni_patterns.append((
      r'libjxl_profiler_sources = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn for fn in profiler_srcs)))

  # Public headers.
  gni_patterns.append((
      r'libjxl_public_headers = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):]
              for fn in jxl_src.jxl_public_hdrs)))
  gni_patterns.append((
      r'libjxl_threads_public_headers = \[\n([^\]]+)\]',
      ''.join('    "%s",\n' % fn[len('lib/'):]
              for fn in jxl_src.threads_public_hdrs)))


  # Update the list of tests. CMake version include test files in other libs,
  # not just in libjxl.
  tests = [fn[len('lib/'):] for fn in repo_files
           if fn.endswith('_test.cc') and fn.startswith('lib/')]
  ok = CleanFile(
      args, 'lib/jxl_tests.cmake',
      [(r'set\(TEST_FILES\n([^\)]+)  ### Files before this line',
        ''.join('  %s\n' % fn for fn in tests))]) and ok
  ok = CleanFile(
      args, 'lib/jxl_tests.cmake',
      [(r'set\(TESTLIB_FILES\n([^\)]+)\)',
        ''.join('  %s\n' % fn for fn in testlib))]) and ok

  # Update lib.gni
  ok = CleanFile(args, 'lib/lib.gni', gni_patterns) and ok

  return ok


def main():
  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument('--src-dir',
                      default=os.path.realpath(os.path.join(
                          os.path.dirname(__file__), '..')),
                      help='path to the build directory')
  parser.add_argument('--update', default=False, action='store_true',
                      help='update the build files instead of only checking')
  args = parser.parse_args()
  if not BuildCleaner(args):
    print('Build files need update.')
    sys.exit(2)


if __name__ == '__main__':
  main()