diff --git a/gigl/common/utils/gcs.py b/gigl/common/utils/gcs.py index dfb166afa..7f0952ddb 100644 --- a/gigl/common/utils/gcs.py +++ b/gigl/common/utils/gcs.py @@ -30,7 +30,7 @@ def _upload_file_to_gcs( source_file_path: LocalUri, dest_gcs_path: GcsUri, - project: str, + project: Optional[str], gcs_utils_client: Optional[storage.Client] = None, ): ( @@ -45,7 +45,9 @@ def _upload_file_to_gcs( blob.upload_from_filename(source_file_path.uri) -def _pickling_safe_upload_file_to_gcs(obj: Tuple[Tuple[LocalUri, GcsUri], str]): +def _pickling_safe_upload_file_to_gcs( + obj: Tuple[Tuple[LocalUri, GcsUri], Optional[str]], +): file_paths, project = obj source_file_path, dest_gcs_path = file_paths storage_client = storage.Client(project=project) @@ -58,7 +60,7 @@ def _pickling_safe_upload_file_to_gcs(obj: Tuple[Tuple[LocalUri, GcsUri], str]): def _upload_files_to_gcs_parallel( - project: str, local_file_path_to_gcs_path_map: dict[LocalUri, GcsUri] + project: Optional[str], local_file_path_to_gcs_path_map: dict[LocalUri, GcsUri] ): with ProcessPoolExecutor(max_workers=None) as executor: results = executor.map( @@ -84,6 +86,9 @@ def __init__(self, project: Optional[str] = None) -> None: project (Optional[str]): The GCP project ID. Defaults to None. """ self.__storage_client = storage.Client(project=project) + # Passing project=None explicitly puts storage.Client in "no project" + # mode where client.project is None — a valid, common configuration. + self.__project: Optional[str] = self.__storage_client.project def upload_from_string(self, gcs_path: GcsUri, content: str) -> None: bucket_name, blob_name = self.get_bucket_and_blob_path_from_gcs_path(gcs_path) @@ -133,7 +138,7 @@ def upload_files_to_gcs( """ if parallel: _upload_files_to_gcs_parallel( - project=self.__storage_client.project, # ty: ignore[invalid-argument-type] + project=self.__project, local_file_path_to_gcs_path_map=local_file_path_to_gcs_path_map, ) else: @@ -144,7 +149,7 @@ def upload_files_to_gcs( _upload_file_to_gcs( source_file_path=source_file_path, dest_gcs_path=dest_gcs_path, - project=self.__storage_client.project, + project=self.__project, gcs_utils_client=self.__storage_client, ) diff --git a/tests/unit/utils/gcs_test.py b/tests/unit/utils/gcs_test.py index 1a55978d3..e67735887 100644 --- a/tests/unit/utils/gcs_test.py +++ b/tests/unit/utils/gcs_test.py @@ -16,6 +16,7 @@ class TestGcsUtils(TestCase): def test_upload_from_filelike(self, mock_storage_client): # Mock the GCS client, bucket, and blob mock_client = MagicMock(spec=Client) + mock_client.project = "test-project" mock_bucket = MagicMock(spec=Bucket) mock_blob = MagicMock(spec=Blob) @@ -40,9 +41,23 @@ def test_upload_from_filelike(self, mock_storage_client): filelike, content_type="application/octet-stream" ) + def test_init_succeeds_when_client_has_no_project(self): + # storage.Client(project=None) with an explicit None is a documented + # "no project" mode where client.project is None. GcsUtils must still + # be constructible in that mode (it is the default code path, e.g. + # FileLoader() with no project). + mock_client = MagicMock(spec=Client) + mock_client.project = None + + with patch("gigl.common.utils.gcs.storage.Client", return_value=mock_client): + gcs_utils = GcsUtils() + + self.assertIsNotNone(gcs_utils) + def test_delete_files_in_bucket_dir(self): # Mock the GCS client, bucket, and blob mock_client = MagicMock(spec=Client) + mock_client.project = "test-project" mock_bucket = MagicMock(spec=Bucket) non_existent_bucket = "test-bucket"