Source code for check_python_h_first.wrapper

"""Wrapper to check multiple files at once.

The functions in this script try to sort the files for greater
effectiveness, then run
:func:`.single_file.check_python_h_included_first` on each file,
collating the results.

Originally implemented `in SciPy
<https://github.com/scipy/scipy/blob/888ca356/tools/check_python_h_first.py>`_
"""

import fnmatch
import os.path
import subprocess
import sys

from .get_submodule_paths import get_submodule_paths
from .single_file import check_python_h_included_first

C_CPP_EXTENSIONS = (".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx")
# check against list in diff_files


[docs] def sort_order(path: str) -> tuple[int, str]: """Sort key function to get files in reasonable order. Tries to get headers before files that included them. Parameters ---------- path : str Returns ------- priority : int path : str """ # I should generalize this for different projects if "include/numpy" in path: # Want to process numpy/*.h first, to work out which of those # include Python.h directly priority = 0x00 elif "h" in os.path.splitext(path)[1].lower(): # Then other headers, which tend to include numpy/*.h priority = 0x10 else: # Source files after headers, to give the best chance of # properly checking whether they include Python.h priority = 0x20 if "common" in path: priority -= 8 path_basename = os.path.basename(path) if path_basename.startswith("npy_"): priority -= 4 elif path_basename.startswith("npy"): priority -= 3 elif path_basename.startswith("np"): priority -= 2 if "config" in path_basename: priority -= 1 return priority, path
[docs] def process_files(file_list: list[str]) -> int: """Process each of the files in the list. Parameters ---------- file_list : list of str Returns ------- n_out_of_order : int The number of headers before Python.h """ n_out_of_order = 0 submodule_paths = get_submodule_paths() root_directory = os.path.dirname(os.path.dirname(__file__)) for name_to_check in sorted(file_list, key=sort_order): name_to_check = os.path.join(root_directory, name_to_check) if any(submodule_path in name_to_check for submodule_path in submodule_paths): continue if ".dispatch." in name_to_check: continue try: n_out_of_order += check_python_h_included_first(name_to_check) except UnicodeDecodeError: print(f"File {name_to_check:s} not utf-8", sys.stdout) return n_out_of_order
[docs] def find_c_cpp_files(root: str) -> list[str]: """Find C and C++ files under root. Parameters ---------- root : str Returns ------- list of str """ result = [] for dirpath, dirnames, filenames in os.walk(root): # I'm assuming other people have checked boost for name in ("build", ".git", "boost"): try: dirnames.remove(name) except ValueError: pass for name in fnmatch.filter(dirnames, "*.p"): dirnames.remove(name) result.extend( [ os.path.join(dirpath, name) for name in filenames if os.path.splitext(name)[1].lower() in C_CPP_EXTENSIONS ] ) # Check the headers before the source files result.sort(key=lambda path: "h" in os.path.splitext(path)[1], reverse=True) return result
[docs] def diff_files(sha: str) -> list[str]: """Find the diff since the given SHA. Adapted from scipy/tools/lint.py """ res = subprocess.run( [ "git", "diff", "--name-only", "--diff-filter=ACMR", "-z", sha, "--", # Check against C_CPP_EXTENSIONS "*.[chCH]", "*.[ch]pp", "*.[ch]xx", "*.cc", "*.hh", ], stdout=subprocess.PIPE, encoding="utf-8", ) res.check_returncode() return [f for f in res.stdout.split("\0") if f]