diff --git a/.github/workflows/commoncode-release.yml b/.github/workflows/commoncode-release.yml index 467e6ac234..ca127d2965 100644 --- a/.github/workflows/commoncode-release.yml +++ b/.github/workflows/commoncode-release.yml @@ -1,4 +1,4 @@ -name: Create library release archives, create a GH release and publish PyPI wheel and sdist on tag in main branch +name: Create and release commoncode wheels on GitHub and Pypi # This is executed automatically on a tag in the main branch diff --git a/.github/workflows/licensedcode-data-index-release.yml b/.github/workflows/licensedcode-data-index-release.yml index ba267f89f5..353829d190 100644 --- a/.github/workflows/licensedcode-data-index-release.yml +++ b/.github/workflows/licensedcode-data-index-release.yml @@ -1,4 +1,4 @@ -name: Create library release archives, create a GH release and publish PyPI wheel and sdist on tag in main branch +name: Create and release licensedcode index & data wheels on GitHub and Pypi # This is executed automatically on a tag in the main branch diff --git a/commoncode-CHANGELOG.rst b/commoncode-CHANGELOG.rst index dc63866360..2d56e74814 100644 --- a/commoncode-CHANGELOG.rst +++ b/commoncode-CHANGELOG.rst @@ -1,6 +1,29 @@ Release notes ============= +Version 32.5.2 - (2026-06-11) +----------------------------- + +- Bump version properly. + +Version 32.5.1 - (2026-06-11) +----------------------------- + +- Minor fix in pyproject.toml to release wheels + to pypi properly. + +Version 32.5.0 - (2026-06-11) +----------------------------- + +- Merge commoncode back into scancode-toolkit + https://github.com/aboutcode-org/scancode-toolkit/pull/5116 + +- Add support to create codebase from multiple input paths by + starting codebase walk from these inputs and then ignoring + based on path patterns. Improves codebase and resource + collection and creation performance for multi-path scan inputs + https://github.com/aboutcode-org/scancode-toolkit/pull/5055 + Version 32.4.2 - (2025-01-08) ----------------------------- diff --git a/pyproject-commoncode.toml b/pyproject-commoncode.toml index 6c69ab439b..3ea4920d29 100644 --- a/pyproject-commoncode.toml +++ b/pyproject-commoncode.toml @@ -4,7 +4,7 @@ build-backend = "flot.buildapi" [project] name = "commoncode" -version = "32.4.2" +version = "32.5.2" authors = [ { name = "nexB. Inc. and others", email = "info@aboutcode.org" }, ] @@ -42,9 +42,6 @@ metadata_files = [ requires-python = ">=3.10" -[project.urls] -Homepage = "https://github.com/nexB/scancode-toolkit" - dependencies = [ "attrs >= 18.1,!=20.1.0;python_version<'3.11'", "attrs >= 22.1.0;python_version>='3.11'", @@ -55,6 +52,8 @@ dependencies = [ "text_unidecode >= 1.0" ] +[project.urls] +Homepage = "https://github.com/nexB/scancode-toolkit" [project.optional-dependencies] dev = [ diff --git a/pyproject-scancode-toolkit-mini.toml b/pyproject-scancode-toolkit-mini.toml index a816bb2de7..b40f4d07fe 100644 --- a/pyproject-scancode-toolkit-mini.toml +++ b/pyproject-scancode-toolkit-mini.toml @@ -256,7 +256,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish # scancode_pre_scan is the entry point for pre_scan plugins executed before the # scans. See also plugincode.pre_scan module for details and doc. [project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" facet = "summarycode.facet:AddFacet" diff --git a/pyproject-scancode-toolkit.toml b/pyproject-scancode-toolkit.toml index 407d65b9c4..29f8157b8d 100644 --- a/pyproject-scancode-toolkit.toml +++ b/pyproject-scancode-toolkit.toml @@ -257,7 +257,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish # scancode_pre_scan is the entry point for pre_scan plugins executed before the # scans. See also plugincode.pre_scan module for details and doc. [project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" facet = "summarycode.facet:AddFacet" diff --git a/pyproject.toml b/pyproject.toml index cbd405f2bd..f2371c6bb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -262,7 +262,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish # scancode_pre_scan is the entry point for pre_scan plugins executed before the # scans. See also plugincode.pre_scan module for details and doc. [project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" facet = "summarycode.facet:AddFacet" diff --git a/src/commoncode/resource.py b/src/commoncode/resource.py index a635a0a860..5302f2f9d7 100644 --- a/src/commoncode/resource.py +++ b/src/commoncode/resource.py @@ -40,6 +40,7 @@ from commoncode.datautils import List from commoncode.datautils import Mapping from commoncode.datautils import String +from commoncode.fileset import is_included from commoncode.filetype import is_file as filetype_is_file from commoncode.filetype import is_special from commoncode.fileutils import as_posixpath @@ -99,7 +100,7 @@ def skip_ignored(location): if TRACE_DEEP: logger_debug() logger_debug( - "Codebase.populate: walk: ignored loc:", + "Codebase.populate: walk: skip_ignored:", location, "ignored:", ignored(location), @@ -110,6 +111,42 @@ def skip_ignored(location): return is_special(location) or ignored(location) +def is_ignored(location, includes=tuple(), excludes=tuple()): + + excludes = { + pattern: 'User ignore: Supplied by --ignore' for pattern in excludes + } + + includes = { + pattern: 'User include: Supplied by --include' for pattern in includes + } + + included_from_options = is_included( + path=location, + includes=includes, + excludes=excludes, + ) + + if TRACE_DEEP: + logger_debug( + "Codebase.populate: walk: is_ignored:", + "is_ignored: location:", + location, + "included_from_options:", + included_from_options, + "skip_ignored", + skip_ignored(location) + ) + + if skip_ignored(location) or not included_from_options: + if TRACE_DEEP: + logger_debug("is_ignored: location:", location, "is_skipped",) + + return True + + return False + + def depth_walk( root_location, max_depth, @@ -203,6 +240,8 @@ class Codebase: __slots__ = ( "max_depth", "location", + "includes", + "ignores", "has_single_resource", "resource_attributes", "resource_class", @@ -237,6 +276,8 @@ def __init__( max_in_memory=10000, max_depth=0, paths=tuple(), + ignores=tuple(), + includes=tuple(), *args, **kwargs, ): @@ -299,6 +340,8 @@ def __init__( # finally populate self.paths = self._prepare_clean_paths(paths) + self.includes = self._prepare_clean_paths(includes) + self.ignores = ignores self._populate() def _prepare_clean_paths(self, paths=tuple()): @@ -462,11 +505,17 @@ def _populate(self): return if self.paths: - return self._create_resources_from_paths(root=root, paths=self.paths) + # In case of a list of full paths, we create resources without walking + return self._create_resources_from_full_paths(root=root, paths=self.paths) + # In case we have multiple else: - return self._create_resources_from_root(root=root) + return self._create_resources_from_root( + root=root, + includes=self.includes, + ignores=self.ignores, + ) - def _create_resources_from_paths(self, root, paths): + def _create_resources_from_full_paths(self, root, paths): # without paths we iterate the provided paths. We report an error # if a path is missing on disk. @@ -484,22 +533,21 @@ def _create_resources_from_paths(self, root, paths): msg = f"ERROR: cannot populate codebase: path: {path!r} not found in {res_loc!r}" self.errors.append(msg) raise Exception(path, join(base_location, path)) - continue # create all parents. The last parent is the one we want to use parent = root if TRACE: - logger_debug("Codebase._create_resources_from_paths: parent", parent) + logger_debug("Codebase._create_resources_from_full_paths: parent", parent) for parent_path in get_ancestor_paths(path, include_self=False): if TRACE: logger_debug( - f" Codebase._create_resources_from_paths: parent_path: {parent_path!r}" + f" Codebase._create_resources_from_full_paths: parent_path: {parent_path!r}" ) if not parent_path: continue newpar = parents_by_path.get(parent_path) if TRACE: - logger_debug(" Codebase._create_resources_from_paths: newpar", repr(newpar)) + logger_debug(" Codebase._create_resources_from_full_paths: newpar", repr(newpar)) if not newpar: newpar = self._get_or_create_resource( @@ -510,7 +558,7 @@ def _create_resources_from_paths(self, root, paths): ) if not newpar: raise Exception( - "ERROR: Codebase._create_resources_from_paths:" + "ERROR: Codebase._create_resources_from_full_paths:" f" cannot create parent for: {parent_path!r}" ) parent = newpar @@ -519,7 +567,7 @@ def _create_resources_from_paths(self, root, paths): if TRACE: logger_debug( - f" Codebase._create_resources_from_paths:", + f" Codebase._create_resources_from_full_paths:", f"created newpar: {newpar!r}", ) @@ -530,10 +578,10 @@ def _create_resources_from_paths(self, root, paths): is_file=isfile(res_loc), ) if TRACE: - logger_debug("Codebase._create_resources_from_paths: resource", res) + logger_debug("Codebase._create_resources_from_full_paths: resource", res) - def _create_resources_from_root(self, root): - # without paths we walks the root location top-down + def _create_resources_from_root(self, root, includes, ignores): + # without paths we walk the root location top-down # track resources parents by location during construction. # NOTE: this cannot exhaust memory on a large codebase, because we do @@ -546,23 +594,48 @@ def err(_error): f"ERROR: cannot populate codebase: {_error}\n{traceback.format_exc()}" ) - # Walk over the directory and build the resource tree - for top, dirs, files in depth_walk( - root_location=root.location, - max_depth=self.max_depth, - error_handler=err, - ): - parent = parents_by_loc.pop(top) - for created in self._create_resources( - parent=parent, - top=top, - dirs=dirs, - files=files, + # ignore creating resources based on path patterns + skip_ignored = partial(is_ignored, excludes=ignores) + + if TRACE_DEEP: + logger_debug(f"parents_by_loc: {parents_by_loc}, ignores: {ignores}, includes: {includes}") + + # in the case of a single input location, walking starts from + # the root and only the root location + if not includes: + includes = [root.location] + else: + # create the directory resources between the common + # prefix and the included locations so that they are + # connected to the root + for created in self._create_resources_common_prefix_to_inputs( + root=root, + includes=includes, ): - # on the plain, bare FS, files cannot be parents if not created.is_file: parents_by_loc[created.location] = created + # we start walking through all the input locations + for included_location in includes: + # Walk over the directory and build the resource tree + for top, dirs, files in depth_walk( + root_location=included_location, + skip_ignored=skip_ignored, + max_depth=self.max_depth, + error_handler=err, + ): + parent = parents_by_loc.pop(top) + for created in self._create_resources( + parent=parent, + top=top, + dirs=dirs, + files=files, + skip_ignored=skip_ignored, + ): + # on the plain, bare FS, files cannot be parents + if not created.is_file: + parents_by_loc[created.location] = created + def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored): """ Create and yield ``files`` and ``dirs`` children Resources of a @@ -575,6 +648,8 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored) for name in names: location = join(top, name) if skip_ignored(location): + if TRACE_DEEP: + logger_debug(f"_create_resources, depth_walk loop: ignored location: {location}") continue res = self._get_or_create_resource( name=name, @@ -585,6 +660,28 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored) logger_debug("Codebase.create_resources:", res) yield res + def _create_resources_common_prefix_to_inputs(self, root, includes): + + if TRACE_DEEP: + logger_debug(f"_create_resources_common_prefix_to_inputs: root:{root.location}, includes: {includes}") + + for included_path in includes: + _, _, extra_dir_path = included_path.rpartition(root.location) + extra_dirs = extra_dir_path.strip("/").split("/") + if TRACE_DEEP: + logger_debug(f"_create_resources_common_prefix_to_inputs: root:{root.location}, includes: {includes}") + + dir_resource = root + for dir_segment in extra_dirs: + dir_resource = self._get_or_create_resource( + name=dir_segment, + parent=dir_resource, + is_file=False, + ) + if TRACE: + logger_debug("Codebase.create_resources:", dir_resource) + yield dir_resource + def _create_root_resource(self): """ Create and return the root Resource of this codebase. @@ -1550,8 +1647,8 @@ def clean_path(path): Return a cleaned and normalized POSIX ``path``. """ path = path or "" - # convert to posix and ensure we have no slash at both ends - path = posixpath_normpath(path.replace("\\", "/").strip("/")) + # convert to posix and ensure we have no slash at the end + path = posixpath_normpath(path.replace("\\", "/").rstrip("/")) if path == ".": path = "" return path @@ -1570,8 +1667,8 @@ def strip_first_path_segment(path): '' >>> strip_first_path_segment('foo/bar/baz') 'bar/baz' - >>> strip_first_path_segment('/foo/bar/baz/') - 'bar/baz' + >>> strip_first_path_segment('/foo/bar/baz') + 'foo/bar/baz' >>> strip_first_path_segment('foo/') '' """ diff --git a/src/commoncode/testcase.py b/src/commoncode/testcase.py index af81fc7f5e..ee680e24da 100644 --- a/src/commoncode/testcase.py +++ b/src/commoncode/testcase.py @@ -93,7 +93,7 @@ class FileDrivenTesting(object): test_data_dir = None - def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): + def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True, relative=False): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp @@ -129,6 +129,11 @@ def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir + + if relative: + _, _, rel_test_loc = test_loc.rpartition(os.getcwd()) + return rel_test_loc.strip("/").strip("\\") + return test_loc def get_temp_file(self, extension=None, dir_name="td", file_name="tf"): diff --git a/src/scancode/cli.py b/src/scancode/cli.py index 1376c6cfee..1e418e32f0 100644 --- a/src/scancode/cli.py +++ b/src/scancode/cli.py @@ -221,6 +221,16 @@ def default_processes(): callback=validate_input_path, type=click.Path(exists=True, readable=True, path_type=str)) +@click.option('--ignore', + multiple=True, + default=None, + metavar='', + help='Ignore files matching .', + sort_order=10, + help_group=cliutils.CORE_GROUP, + cls=PluggableCommandLineOption, +) + @click.option('--strip-root', is_flag=True, default=False, @@ -395,6 +405,7 @@ def default_processes(): def scancode( ctx, input, # NOQA + ignore, strip_root, full_root, processes, @@ -505,6 +516,7 @@ def scancode( # run proper success, _results = run_scan( input=input, + ignore=ignore, from_json=from_json, strip_root=strip_root, full_root=full_root, @@ -545,7 +557,8 @@ def scancode( def run_scan( - input, # NOQA + input, # + ignore=[], from_json=False, strip_root=False, full_root=False, @@ -597,6 +610,9 @@ def echo_func(*_args, **_kwargs): msg = 'At least one input path is required.' raise ScancodeError(msg) + # To support multiple path inputs + include = [] + if not isinstance(input, (list, tuple)): if not isinstance(input, str): msg = 'Unknown format: "{}".'.format(repr(input)) @@ -611,8 +627,6 @@ def echo_func(*_args, **_kwargs): # VirtualCodebase; otherwise we have to process `input` to make it a single # root with excludes. elif not from_json: - # FIXME: support the multiple root better. This is quirky at best - # This is the case where we have a list of input path and the # `from_json` option is not selected: we can handle this IFF they share # a common root directory and none is an absolute path @@ -624,32 +638,33 @@ def echo_func(*_args, **_kwargs): ) raise ScancodeError(msg) + abs_input = [os.path.abspath(i) for i in input] + # find the common prefix directory (note that this is a pre string # operation hence it may return non-existing paths - common_prefix = os.path.commonprefix(input) + common_prefix = os.path.commonprefix(abs_input) if not common_prefix: # we have no common prefix, but all relative. therefore the - # parent/root is the current ddirectory + # parent/root is the current directory common_prefix = str('.') + elif not common_prefix.endswith("/"): + # common prefix has trailing incomplete dirname + # for example the common prefix of "/temp/scancode" + # and "/temp/scans" is "/temp/scan" + common_prefix, _, _ = common_prefix.rpartition("/") elif not os.path.isdir(common_prefix): msg = ( 'Invalid inputs: all input paths must share a ' - 'common single parent directory.' + f'common single parent directory. common part: {common_prefix}' ) raise ScancodeError(msg) - # and we craft a list of synthetic --include path pattern options from - # the input list of paths - included_paths = [as_posixpath(path).rstrip('/') for path in input] - # FIXME: this is a hack as this "include" is from an external plugin!!! - include = list(requested_options.get('include', []) or []) - include.extend(included_paths) - requested_options['include'] = include - - # ... and use the common prefix as our new input + # and we craft a list of include paths where the codebase walks + # will start from, even though the root is the common prefix + include = [as_posixpath(path).rstrip('/') for path in abs_input] input = common_prefix # NOQA # build mappings of all options to pass down to plugins @@ -894,6 +909,8 @@ def echo_func(*_args, **_kwargs): try: codebase = codebase_class( location=input, + includes=include, + ignores=ignore, resource_attributes=resource_attributes, codebase_attributes=codebase_attributes, full_root=full_root, diff --git a/src/scancode/outdated.py b/src/scancode/outdated.py index 4be850d847..2c68dc39e2 100644 --- a/src/scancode/outdated.py +++ b/src/scancode/outdated.py @@ -83,7 +83,11 @@ def total_seconds(td): class VersionCheckState: - def __init__(self): + def __init__(self, is_test=False): + if is_test: + self.state={} + return + self.statefile_path = os.path.join( scancode_cache_dir, 'scancode-version-check.json') self.lockfile_path = self.statefile_path + '.lockfile' @@ -135,6 +139,7 @@ def check_scancode_version( release_date=scancode_release_date, new_version_url='https://pypi.org/pypi/scancode-toolkit/json', force=False, + is_test=False, ): """ Check for an updated version of scancode-toolkit. Return a message to @@ -146,6 +151,7 @@ def check_scancode_version( installed_version=installed_version, new_version_url=new_version_url, force=force, + is_test=is_test, ) if newer_version: return build_outdated_message( @@ -159,6 +165,7 @@ def fetch_newer_version( installed_version=scancode_version, new_version_url='https://pypi.org/pypi/scancode-toolkit/json', force=False, + is_test=False, ): """ Return a version string if there is an updated version of scancode-toolkit @@ -175,9 +182,10 @@ def fetch_newer_version( try: installed_version = packaging_version.parse(installed_version) - state = VersionCheckState() + state = VersionCheckState(is_test=is_test) current_time = datetime.datetime.utcnow() + latest_version = None # Determine if we need to refresh the state if ('last_check' in state.state and 'latest_version' in state.state): last_check = datetime.datetime.strptime( diff --git a/src/scancode/plugin_ignore.py b/src/scancode/plugin_ignore.py index 70b0e30b10..3b1b3a06ed 100644 --- a/src/scancode/plugin_ignore.py +++ b/src/scancode/plugin_ignore.py @@ -37,87 +37,63 @@ def logger_debug(*args): return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) -@pre_scan_impl -class ProcessIgnore(PreScanPlugin): +def process_codebase(codebase, ignore=(), include=(), **kwargs): """ - Include or ignore files matching patterns. + WARNING: DEPRECATED, ignore/include moved to codebase import + step in core plugins. + Keep only included and non-ignored Resources in the codebase. """ - options = [ - PluggableCommandLineOption(('--ignore',), - multiple=True, - default=None, - metavar='', - help='Ignore files matching .', - sort_order=10, - help_group=PRE_SCAN_GROUP), - PluggableCommandLineOption(('--include',), - multiple=True, - default=None, - metavar='', - help='Include files matching .', - sort_order=11, - help_group=PRE_SCAN_GROUP) - ] - - def is_enabled(self, ignore, include, **kwargs): - return ignore or include - - def process_codebase(self, codebase, ignore=(), include=(), **kwargs): - """ - Keep only included and non-ignored Resources in the codebase. - """ - - if not (ignore or include): - return - - excludes = { - pattern: 'User ignore: Supplied by --ignore' for pattern in ignore - } - - includes = { - pattern: 'User include: Supplied by --include' for pattern in include - } - - included = partial(is_included, includes=includes, excludes=excludes) - - paths_to_remove = set() - paths_to_remove_add = paths_to_remove.add - paths_to_remove_discard = paths_to_remove.discard - - # Walk codebase top-down to collect the paths of Resources to remove. - for resource in codebase.walk(topdown=True): - if resource.is_root: - continue - - resource_path = resource.path - - if not included(resource_path): - for child in resource.children(codebase): - paths_to_remove_add(child.path) - paths_to_remove_add(resource_path) - else: - # we may have been selected for removal based on a parent dir - # but may be explicitly included. Honor that - paths_to_remove_discard(resource_path) - - if TRACE: - logger_debug('process_codebase: paths_to_remove') - logger_debug(paths_to_remove) - for path in sorted(paths_to_remove): - logger_debug(codebase.get_resource(path)) - - remove_resource = codebase.remove_resource - - # Then, walk bottom-up and remove the non-included Resources from the - # Codebase if the Resource path is in our list of paths to remove. - for resource in codebase.walk(topdown=False): - resource_path = resource.path - if resource.is_root: - continue - # removing dirs will also remove its files - if resource.is_dir: - continue - if resource_path in paths_to_remove: - paths_to_remove_discard(resource_path) - remove_resource(resource) + if not (ignore or include): + return + + excludes = { + pattern: 'User ignore: Supplied by --ignore' for pattern in ignore + } + + includes = { + pattern: 'User include: Supplied by --include' for pattern in include + } + + included = partial(is_included, includes=includes, excludes=excludes) + + paths_to_remove = set() + paths_to_remove_add = paths_to_remove.add + paths_to_remove_discard = paths_to_remove.discard + + # Walk codebase top-down to collect the paths of Resources to remove. + for resource in codebase.walk(topdown=True): + if resource.is_root: + continue + + resource_path = resource.path + + if not included(resource_path): + for child in resource.children(codebase): + paths_to_remove_add(child.path) + paths_to_remove_add(resource_path) + else: + # we may have been selected for removal based on a parent dir + # but may be explicitly included. Honor that + paths_to_remove_discard(resource_path) + + if TRACE: + logger_debug('process_codebase: paths_to_remove') + logger_debug(paths_to_remove) + for path in sorted(paths_to_remove): + logger_debug(codebase.get_resource(path)) + + remove_resource = codebase.remove_resource + + # Then, walk bottom-up and remove the non-included Resources from the + # Codebase if the Resource path is in our list of paths to remove. + for resource in codebase.walk(topdown=False): + resource_path = resource.path + if resource.is_root: + continue + # removing dirs will also remove its files + if resource.is_dir: + continue + if resource_path in paths_to_remove: + paths_to_remove_discard(resource_path) + remove_resource(resource) diff --git a/src/scancode_config.py b/src/scancode_config.py index 20c57a19be..6e9f634b08 100644 --- a/src/scancode_config.py +++ b/src/scancode_config.py @@ -95,7 +95,7 @@ def _create_dir(location): from subprocess import CalledProcessError # this may fail with exceptions - cmd = 'git', 'describe', '--tags', + cmd = 'git', 'describe', '--tags', '--match="v*"' try: output = check_output(cmd, stderr=STDOUT) __version__ = output.decode('utf-8').strip() diff --git a/tests/commoncode/test_fileset.py b/tests/commoncode/test_fileset.py index ccbfe9df99..25632ae1a5 100644 --- a/tests/commoncode/test_fileset.py +++ b/tests/commoncode/test_fileset.py @@ -56,6 +56,11 @@ def test_is_included_is_included_exclusions_2(self): assert fileset.is_included("/some/src/this/that", incs, excs) assert not fileset.is_included("/src/dist/build/mylib.so", incs, excs) + def test_is_included_is_included_inside_exclusions(self): + incs = {"/src/*.so": ".scanignore"} + excs = {"/src/*": ".scanignore"} + assert not fileset.is_included("/src/dist/build/mylib.so", incs, excs) + def test_is_included_empty_exclusions(self): incs = {"/src/*": ".scanignore"} excs = {"": ".scanignore"} diff --git a/tests/commoncode/test_resource.py b/tests/commoncode/test_resource.py index b85470eb9d..07f87dd55e 100644 --- a/tests/commoncode/test_resource.py +++ b/tests/commoncode/test_resource.py @@ -354,7 +354,7 @@ def test_get_resource_for_multiple_resource_codebase(self): codebase = Codebase(test_codebase) assert codebase.get_resource("resource/a").path == "resource/a" - assert codebase.get_resource("/resource/c").path == "resource/c" + assert codebase.get_resource("resource/c").path == "resource/c" assert codebase.get_resource("resource/dsasda/../b/").path == "resource/b" def test_Resource_build_path(self): diff --git a/tests/scancode/data/help/help.txt b/tests/scancode/data/help/help.txt index e725888ead..d65f1f00f4 100644 --- a/tests/scancode/data/help/help.txt +++ b/tests/scancode/data/help/help.txt @@ -92,8 +92,6 @@ Options: such that all paths have a common root directory. pre-scan: - --ignore Ignore files matching . - --include Include files matching . --facet = Add the to files with a path matching . @@ -138,6 +136,7 @@ Options: which are todo items and needs manual review. core: + --ignore Ignore files matching . --timeout Stop an unfinished file scan after a timeout in seconds. [default: 120 seconds] -n, --processes INT Set the number of parallel processes to use. Disable diff --git a/tests/scancode/data/help/help_linux.txt b/tests/scancode/data/help/help_linux.txt index 6794b19d60..2b917909f3 100644 --- a/tests/scancode/data/help/help_linux.txt +++ b/tests/scancode/data/help/help_linux.txt @@ -94,8 +94,6 @@ Options: such that all paths have a common root directory. pre-scan: - --ignore Ignore files matching . - --include Include files matching . --facet = Add the to files with a path matching . @@ -140,6 +138,7 @@ Options: which are todo items and needs manual review. core: + --ignore Ignore files matching . --timeout Stop an unfinished file scan after a timeout in seconds. [default: 120 seconds] -n, --processes INT Set the number of parallel processes to use. Disable diff --git a/tests/scancode/test_cli.py b/tests/scancode/test_cli.py index 9d038f71e6..0dca907efc 100644 --- a/tests/scancode/test_cli.py +++ b/tests/scancode/test_cli.py @@ -168,7 +168,7 @@ def test_scan_info_returns_full_root(): file_paths = [f['path'] for f in result_data['files']] assert len(file_paths) == 12 # note that we strip paths from leading and trailing slashes - root = fileutils.as_posixpath(test_dir).strip('/') + root = fileutils.as_posixpath(test_dir) assert all(p.startswith(root) for p in file_paths) @@ -184,7 +184,7 @@ def test_scan_info_returns_correct_full_root_with_single_file(): scanned_file = files[0] # and we check that the path is the full path without repeating the file name # note that the path never contain leading and trailing slashes - assert scanned_file['path'] == fileutils.as_posixpath(test_file).strip('/') + assert scanned_file['path'] == fileutils.as_posixpath(test_file) def test_scan_info_returns_does_not_strip_root_with_single_file(): @@ -837,6 +837,15 @@ def test_scan_should_not_fail_with_low_max_in_memory_setting_when_ignoring_files run_scan_click(args, expected_rc=0) +def test_scan_supports_multiple_input_paths(): + test_file_1 = test_env.get_test_loc('summaries/client', relative=True) + test_file_2 = test_env.get_test_loc('summaries/counts', relative=True) + result_file = test_env.get_temp_file('json') + args = ['--info', '-n', '1', test_file_1, test_file_2, '--json', result_file] + run_scan_click(args, expected_rc=0) + + + def test_get_displayable_summary(): from scancode.cli import get_displayable_summary from commoncode.resource import Codebase diff --git a/tests/scancode/test_outdated.py b/tests/scancode/test_outdated.py index cdac7853b2..0509c6ea36 100644 --- a/tests/scancode/test_outdated.py +++ b/tests/scancode/test_outdated.py @@ -152,8 +152,8 @@ def jget(*args, **kwargs): json=jget, status_code=200 ) - assert not outdated.fetch_newer_version(force=True) - assert not outdated.check_scancode_version(force=True) + assert not outdated.fetch_newer_version(force=True, is_test=True) + assert not outdated.check_scancode_version(force=True, is_test=True) def test_fetch_newer_version_local_git_version(): diff --git a/tests/scancode/test_plugin_ignore.py b/tests/scancode/test_plugin_ignore.py index 78f2954d76..db739db88a 100644 --- a/tests/scancode/test_plugin_ignore.py +++ b/tests/scancode/test_plugin_ignore.py @@ -14,7 +14,6 @@ from commoncode.fileset import is_included from scancode.cli_test_utils import run_scan_click from scancode.cli_test_utils import load_json_result -from scancode.plugin_ignore import ProcessIgnore from commoncode.resource import Codebase @@ -48,15 +47,13 @@ def test_is_included_glob_file(self): assert not is_included(location, excludes=excludes) def check_ProcessIgnore(self, test_dir, expected, ignore, include=()): - codebase = Codebase(test_dir) - test_plugin = ProcessIgnore() - test_plugin.process_codebase(codebase, ignore=ignore, include=include) + codebase = Codebase(location=test_dir, ignores=ignore, includes=include) resources = [res.strip_root_path for res in codebase.walk(skip_root=True)] assert sorted(resources) == expected def test_ProcessIgnore_with_single_file(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - ignore = ('sample.doc',) + ignore = ('*sample.doc',) expected = [ 'user', 'user/ignore.doc', @@ -69,7 +66,7 @@ def test_ProcessIgnore_with_single_file(self): def test_ProcessIgnore_with_multiple_files(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - ignore = ('ignore.doc', 'sample.doc',) + ignore = ('*ignore.doc', '*sample.doc',) expected = [ 'user', 'user/src', @@ -111,25 +108,10 @@ def test_ProcessIgnore_with_multiple_ignores(self): ] self.check_ProcessIgnore(test_dir, expected, ignore) - def test_ProcessIgnore_include_with_glob_for_extension(self): - test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - include = ('*.doc',) - expected = [ - 'user', - 'user/ignore.doc', - 'user/src', - 'user/src/ignore.doc', - 'user/src/test', - 'user/src/test/sample.doc', - ] - self.check_ProcessIgnore(test_dir, expected, ignore=(), include=include) - def test_ProcessIgnore_process_codebase_does_not_fail_to_access_an_ignored_resourced_cached_to_disk(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') - codebase = Codebase(test_dir, max_in_memory=1) - test_plugin = ProcessIgnore() ignore = ['test'] - test_plugin.process_codebase(codebase, ignore=ignore) + Codebase(location=test_dir, max_in_memory=1, ignores=ignore) class TestScanPluginIgnoreFiles(FileDrivenTesting): @@ -241,7 +223,7 @@ def test_scancode_multiple_ignores(self): def test_scancode_codebase_attempt_to_access_an_ignored_resourced_cached_to_disk(self): test_dir = self.extract_test_tar('plugin_ignore/user.tgz') result_file = self.get_temp_file('json') - args = ['--copyright', '--strip-root', '--ignore', 'test', test_dir, '--max-in-memory', '1', '--json', result_file] + args = ['--copyright', '--strip-root', '--ignore', '*test', test_dir, '--max-in-memory', '1', '--json', result_file] run_scan_click(args) scan_result = load_json_result(result_file) assert scan_result['headers'][0]['extra_data']['files_count'] == 2 @@ -251,6 +233,5 @@ def test_scancode_codebase_attempt_to_access_an_ignored_resourced_cached_to_disk u'user/ignore.doc', u'user/src', u'user/src/ignore.doc', - u'user/src/test', ] assert scan_locs == expected