Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/commoncode-release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Create library release archives, create a GH release and publish PyPI wheel and sdist on tag in main branch
name: Create and release commoncode wheels on GitHub and Pypi


# This is executed automatically on a tag in the main branch
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/licensedcode-data-index-release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Create library release archives, create a GH release and publish PyPI wheel and sdist on tag in main branch
name: Create and release licensedcode index & data wheels on GitHub and Pypi


# This is executed automatically on a tag in the main branch
Expand Down
23 changes: 23 additions & 0 deletions commoncode-CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
Release notes
=============

Version 32.5.2 - (2026-06-11)
-----------------------------

- Bump version properly.

Version 32.5.1 - (2026-06-11)
-----------------------------

- Minor fix in pyproject.toml to release wheels
to pypi properly.

Version 32.5.0 - (2026-06-11)
-----------------------------

- Merge commoncode back into scancode-toolkit
https://github.com/aboutcode-org/scancode-toolkit/pull/5116

- Add support to create codebase from multiple input paths by
starting codebase walk from these inputs and then ignoring
based on path patterns. Improves codebase and resource
collection and creation performance for multi-path scan inputs
https://github.com/aboutcode-org/scancode-toolkit/pull/5055

Version 32.4.2 - (2025-01-08)
-----------------------------

Expand Down
7 changes: 3 additions & 4 deletions pyproject-commoncode.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "flot.buildapi"

[project]
name = "commoncode"
version = "32.4.2"
version = "32.5.2"
authors = [
{ name = "nexB. Inc. and others", email = "info@aboutcode.org" },
]
Expand Down Expand Up @@ -42,9 +42,6 @@ metadata_files = [

requires-python = ">=3.10"

[project.urls]
Homepage = "https://github.com/nexB/scancode-toolkit"

dependencies = [
"attrs >= 18.1,!=20.1.0;python_version<'3.11'",
"attrs >= 22.1.0;python_version>='3.11'",
Expand All @@ -55,6 +52,8 @@ dependencies = [
"text_unidecode >= 1.0"
]

[project.urls]
Homepage = "https://github.com/nexB/scancode-toolkit"

[project.optional-dependencies]
dev = [
Expand Down
1 change: 0 additions & 1 deletion pyproject-scancode-toolkit-mini.toml
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish
# scancode_pre_scan is the entry point for pre_scan plugins executed before the
# scans. See also plugincode.pre_scan module for details and doc.
[project.entry-points.scancode_pre_scan]
ignore = "scancode.plugin_ignore:ProcessIgnore"
facet = "summarycode.facet:AddFacet"


Expand Down
1 change: 0 additions & 1 deletion pyproject-scancode-toolkit.toml
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish
# scancode_pre_scan is the entry point for pre_scan plugins executed before the
# scans. See also plugincode.pre_scan module for details and doc.
[project.entry-points.scancode_pre_scan]
ignore = "scancode.plugin_ignore:ProcessIgnore"
facet = "summarycode.facet:AddFacet"


Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ scancode-train-gibberish-model = "textcode.train_gibberish_model:train_gibberish
# scancode_pre_scan is the entry point for pre_scan plugins executed before the
# scans. See also plugincode.pre_scan module for details and doc.
[project.entry-points.scancode_pre_scan]
ignore = "scancode.plugin_ignore:ProcessIgnore"
facet = "summarycode.facet:AddFacet"


Expand Down
157 changes: 127 additions & 30 deletions src/commoncode/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from commoncode.datautils import List
from commoncode.datautils import Mapping
from commoncode.datautils import String
from commoncode.fileset import is_included
from commoncode.filetype import is_file as filetype_is_file
from commoncode.filetype import is_special
from commoncode.fileutils import as_posixpath
Expand Down Expand Up @@ -99,7 +100,7 @@ def skip_ignored(location):
if TRACE_DEEP:
logger_debug()
logger_debug(
"Codebase.populate: walk: ignored loc:",
"Codebase.populate: walk: skip_ignored:",
location,
"ignored:",
ignored(location),
Expand All @@ -110,6 +111,42 @@ def skip_ignored(location):
return is_special(location) or ignored(location)


def is_ignored(location, includes=tuple(), excludes=tuple()):

excludes = {
pattern: 'User ignore: Supplied by --ignore' for pattern in excludes
}

includes = {
pattern: 'User include: Supplied by --include' for pattern in includes
}

included_from_options = is_included(
path=location,
includes=includes,
excludes=excludes,
)

if TRACE_DEEP:
logger_debug(
"Codebase.populate: walk: is_ignored:",
"is_ignored: location:",
location,
"included_from_options:",
included_from_options,
"skip_ignored",
skip_ignored(location)
)

if skip_ignored(location) or not included_from_options:
if TRACE_DEEP:
logger_debug("is_ignored: location:", location, "is_skipped",)

return True

return False


def depth_walk(
root_location,
max_depth,
Expand Down Expand Up @@ -203,6 +240,8 @@ class Codebase:
__slots__ = (
"max_depth",
"location",
"includes",
"ignores",
"has_single_resource",
"resource_attributes",
"resource_class",
Expand Down Expand Up @@ -237,6 +276,8 @@ def __init__(
max_in_memory=10000,
max_depth=0,
paths=tuple(),
ignores=tuple(),
includes=tuple(),
*args,
**kwargs,
):
Expand Down Expand Up @@ -299,6 +340,8 @@ def __init__(

# finally populate
self.paths = self._prepare_clean_paths(paths)
self.includes = self._prepare_clean_paths(includes)
self.ignores = ignores
self._populate()

def _prepare_clean_paths(self, paths=tuple()):
Expand Down Expand Up @@ -462,11 +505,17 @@ def _populate(self):
return

if self.paths:
return self._create_resources_from_paths(root=root, paths=self.paths)
# In case of a list of full paths, we create resources without walking
return self._create_resources_from_full_paths(root=root, paths=self.paths)
# In case we have multiple
else:
return self._create_resources_from_root(root=root)
return self._create_resources_from_root(
root=root,
includes=self.includes,
ignores=self.ignores,
)

def _create_resources_from_paths(self, root, paths):
def _create_resources_from_full_paths(self, root, paths):
# without paths we iterate the provided paths. We report an error
# if a path is missing on disk.

Expand All @@ -484,22 +533,21 @@ def _create_resources_from_paths(self, root, paths):
msg = f"ERROR: cannot populate codebase: path: {path!r} not found in {res_loc!r}"
self.errors.append(msg)
raise Exception(path, join(base_location, path))
continue

# create all parents. The last parent is the one we want to use
parent = root
if TRACE:
logger_debug("Codebase._create_resources_from_paths: parent", parent)
logger_debug("Codebase._create_resources_from_full_paths: parent", parent)
for parent_path in get_ancestor_paths(path, include_self=False):
if TRACE:
logger_debug(
f" Codebase._create_resources_from_paths: parent_path: {parent_path!r}"
f" Codebase._create_resources_from_full_paths: parent_path: {parent_path!r}"
)
if not parent_path:
continue
newpar = parents_by_path.get(parent_path)
if TRACE:
logger_debug(" Codebase._create_resources_from_paths: newpar", repr(newpar))
logger_debug(" Codebase._create_resources_from_full_paths: newpar", repr(newpar))

if not newpar:
newpar = self._get_or_create_resource(
Expand All @@ -510,7 +558,7 @@ def _create_resources_from_paths(self, root, paths):
)
if not newpar:
raise Exception(
"ERROR: Codebase._create_resources_from_paths:"
"ERROR: Codebase._create_resources_from_full_paths:"
f" cannot create parent for: {parent_path!r}"
)
parent = newpar
Expand All @@ -519,7 +567,7 @@ def _create_resources_from_paths(self, root, paths):

if TRACE:
logger_debug(
f" Codebase._create_resources_from_paths:",
f" Codebase._create_resources_from_full_paths:",
f"created newpar: {newpar!r}",
)

Expand All @@ -530,10 +578,10 @@ def _create_resources_from_paths(self, root, paths):
is_file=isfile(res_loc),
)
if TRACE:
logger_debug("Codebase._create_resources_from_paths: resource", res)
logger_debug("Codebase._create_resources_from_full_paths: resource", res)

def _create_resources_from_root(self, root):
# without paths we walks the root location top-down
def _create_resources_from_root(self, root, includes, ignores):
# without paths we walk the root location top-down

# track resources parents by location during construction.
# NOTE: this cannot exhaust memory on a large codebase, because we do
Expand All @@ -546,23 +594,48 @@ def err(_error):
f"ERROR: cannot populate codebase: {_error}\n{traceback.format_exc()}"
)

# Walk over the directory and build the resource tree
for top, dirs, files in depth_walk(
root_location=root.location,
max_depth=self.max_depth,
error_handler=err,
):
parent = parents_by_loc.pop(top)
for created in self._create_resources(
parent=parent,
top=top,
dirs=dirs,
files=files,
# ignore creating resources based on path patterns
skip_ignored = partial(is_ignored, excludes=ignores)

if TRACE_DEEP:
logger_debug(f"parents_by_loc: {parents_by_loc}, ignores: {ignores}, includes: {includes}")

# in the case of a single input location, walking starts from
# the root and only the root location
if not includes:
includes = [root.location]
else:
# create the directory resources between the common
# prefix and the included locations so that they are
# connected to the root
for created in self._create_resources_common_prefix_to_inputs(
root=root,
includes=includes,
):
# on the plain, bare FS, files cannot be parents
if not created.is_file:
parents_by_loc[created.location] = created

# we start walking through all the input locations
for included_location in includes:
# Walk over the directory and build the resource tree
for top, dirs, files in depth_walk(
root_location=included_location,
skip_ignored=skip_ignored,
max_depth=self.max_depth,
error_handler=err,
):
parent = parents_by_loc.pop(top)
for created in self._create_resources(
parent=parent,
top=top,
dirs=dirs,
files=files,
skip_ignored=skip_ignored,
):
# on the plain, bare FS, files cannot be parents
if not created.is_file:
parents_by_loc[created.location] = created

def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored):
"""
Create and yield ``files`` and ``dirs`` children Resources of a
Expand All @@ -575,6 +648,8 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored)
for name in names:
location = join(top, name)
if skip_ignored(location):
if TRACE_DEEP:
logger_debug(f"_create_resources, depth_walk loop: ignored location: {location}")
continue
res = self._get_or_create_resource(
name=name,
Expand All @@ -585,6 +660,28 @@ def _create_resources(self, parent, top, dirs, files, skip_ignored=skip_ignored)
logger_debug("Codebase.create_resources:", res)
yield res

def _create_resources_common_prefix_to_inputs(self, root, includes):

if TRACE_DEEP:
logger_debug(f"_create_resources_common_prefix_to_inputs: root:{root.location}, includes: {includes}")

for included_path in includes:
_, _, extra_dir_path = included_path.rpartition(root.location)
extra_dirs = extra_dir_path.strip("/").split("/")
if TRACE_DEEP:
logger_debug(f"_create_resources_common_prefix_to_inputs: root:{root.location}, includes: {includes}")

dir_resource = root
for dir_segment in extra_dirs:
dir_resource = self._get_or_create_resource(
name=dir_segment,
parent=dir_resource,
is_file=False,
)
if TRACE:
logger_debug("Codebase.create_resources:", dir_resource)
yield dir_resource

def _create_root_resource(self):
"""
Create and return the root Resource of this codebase.
Expand Down Expand Up @@ -1550,8 +1647,8 @@ def clean_path(path):
Return a cleaned and normalized POSIX ``path``.
"""
path = path or ""
# convert to posix and ensure we have no slash at both ends
path = posixpath_normpath(path.replace("\\", "/").strip("/"))
# convert to posix and ensure we have no slash at the end
path = posixpath_normpath(path.replace("\\", "/").rstrip("/"))
if path == ".":
path = ""
return path
Expand All @@ -1570,8 +1667,8 @@ def strip_first_path_segment(path):
''
>>> strip_first_path_segment('foo/bar/baz')
'bar/baz'
>>> strip_first_path_segment('/foo/bar/baz/')
'bar/baz'
>>> strip_first_path_segment('/foo/bar/baz')
'foo/bar/baz'
>>> strip_first_path_segment('foo/')
''
"""
Expand Down
Loading
Loading