@@ -507,3 +507,201 @@ def test_prepare_parameter_ranges_for_tuning(self):
507507 assert len (processed_ranges ["ContinuousParameterRanges" ]) == 1
508508 assert len (processed_ranges ["IntegerParameterRanges" ]) == 1
509509 assert len (processed_ranges ["CategoricalParameterRanges" ]) == 1
510+
511+
512+ class TestUploadSourceCodeIgnorePatterns :
513+ """Test _upload_source_code_and_configure_hyperparameters respects ignore_patterns."""
514+
515+ @pytest .fixture
516+ def mock_model_trainer_with_source (self , tmp_path ):
517+ """Create a mock ModelTrainer with source_code configured."""
518+ # Create test source directory with files that should be included/ignored
519+ source_dir = tmp_path / "source"
520+ source_dir .mkdir ()
521+
522+ # Create files that should be included
523+ (source_dir / "train.py" ).write_text ("# training script" )
524+ (source_dir / "utils.py" ).write_text ("# utils" )
525+
526+ # Create files that should be ignored
527+ (source_dir / ".env" ).write_text ("SECRET=123" )
528+ (source_dir / "model.pt" ).write_text ("fake model weights" )
529+
530+ # Create directories that should be ignored
531+ pycache_dir = source_dir / "__pycache__"
532+ pycache_dir .mkdir ()
533+ (pycache_dir / "train.cpython-310.pyc" ).write_text ("bytecode" )
534+
535+ git_dir = source_dir / ".git"
536+ git_dir .mkdir ()
537+ (git_dir / "config" ).write_text ("git config" )
538+
539+ # Create subdirectory with mixed files
540+ sub_dir = source_dir / "subdir"
541+ sub_dir .mkdir ()
542+ (sub_dir / "helper.py" ).write_text ("# helper" )
543+ (sub_dir / "cache.pyc" ).write_text ("bytecode" )
544+
545+ # Create mock model trainer
546+ trainer = MagicMock ()
547+ trainer .sagemaker_session = MagicMock ()
548+ trainer .sagemaker_session .boto_session = MagicMock ()
549+ trainer .sagemaker_session .boto_session .client .return_value = MagicMock ()
550+ trainer .sagemaker_session .boto_region_name = "us-west-2"
551+ trainer .sagemaker_session .default_bucket .return_value = "test-bucket"
552+ trainer .base_job_name = "test-job"
553+ trainer .hyperparameters = {}
554+
555+ # Create source_code mock
556+ source_code = MagicMock ()
557+ source_code .source_dir = str (source_dir )
558+ source_code .entry_script = "train.py"
559+ source_code .ignore_patterns = [".env" , ".git" , "__pycache__" , "*.pt" , "*.pyc" ]
560+ trainer .source_code = source_code
561+
562+ return trainer , source_dir
563+
564+ def test_upload_source_code_respects_ignore_patterns (self , mock_model_trainer_with_source ):
565+ """Test that files matching ignore_patterns are excluded from tarball."""
566+ import tarfile
567+ import tempfile
568+
569+ trainer , source_dir = mock_model_trainer_with_source
570+
571+ # Call the method
572+ HyperparameterTuner ._upload_source_code_and_configure_hyperparameters (trainer )
573+
574+ # Get the uploaded tarball path from the mock call
575+ s3_client = trainer .sagemaker_session .boto_session .client .return_value
576+ upload_call = s3_client .upload_file .call_args
577+ assert upload_call is not None
578+
579+ # The tarball was already deleted, so we need to test by recreating the scenario
580+ # Let's verify the logic by checking what would have been uploaded
581+ # We do this by examining the method behavior directly
582+
583+ # For a more thorough test, let's capture what files were added
584+ # by patching tarfile.open
585+ with patch ("tarfile.open" ) as mock_tarfile :
586+ mock_tar = MagicMock ()
587+ mock_tarfile .return_value .__enter__ .return_value = mock_tar
588+
589+ # Reset the mock to test again
590+ trainer .hyperparameters = {}
591+
592+ HyperparameterTuner ._upload_source_code_and_configure_hyperparameters (trainer )
593+
594+ # Check which files were added to the tarball
595+ added_files = [call [0 ][0 ] for call in mock_tar .add .call_args_list ]
596+ added_arcnames = [call [1 ]["arcname" ] for call in mock_tar .add .call_args_list ]
597+
598+ # Should include these files
599+ assert any ("train.py" in f for f in added_files )
600+ assert any ("utils.py" in f for f in added_files )
601+ assert any ("helper.py" in f for f in added_files )
602+
603+ # Should NOT include these files (ignored patterns)
604+ assert not any (".env" in f for f in added_files )
605+ assert not any ("model.pt" in f for f in added_files )
606+ assert not any ("__pycache__" in f for f in added_files )
607+ assert not any (".git" in f for f in added_files )
608+ assert not any (".pyc" in f for f in added_files )
609+
610+ def test_upload_source_code_ignores_directories (self , mock_model_trainer_with_source ):
611+ """Test that directories matching ignore_patterns are completely skipped."""
612+ trainer , source_dir = mock_model_trainer_with_source
613+
614+ with patch ("tarfile.open" ) as mock_tarfile :
615+ mock_tar = MagicMock ()
616+ mock_tarfile .return_value .__enter__ .return_value = mock_tar
617+
618+ HyperparameterTuner ._upload_source_code_and_configure_hyperparameters (trainer )
619+
620+ added_files = [call [0 ][0 ] for call in mock_tar .add .call_args_list ]
621+
622+ # No files from __pycache__ directory should be added
623+ assert not any ("__pycache__" in f for f in added_files )
624+ # No files from .git directory should be added
625+ assert not any (".git" in f for f in added_files )
626+
627+ def test_upload_source_code_with_empty_ignore_patterns (self , tmp_path ):
628+ """Test backward compatibility with None/empty ignore_patterns."""
629+ # Create simple source directory
630+ source_dir = tmp_path / "source"
631+ source_dir .mkdir ()
632+ (source_dir / "train.py" ).write_text ("# training script" )
633+ (source_dir / ".env" ).write_text ("SECRET=123" )
634+
635+ # Create mock trainer with no ignore_patterns
636+ trainer = MagicMock ()
637+ trainer .sagemaker_session = MagicMock ()
638+ trainer .sagemaker_session .boto_session = MagicMock ()
639+ trainer .sagemaker_session .boto_session .client .return_value = MagicMock ()
640+ trainer .sagemaker_session .boto_region_name = "us-west-2"
641+ trainer .sagemaker_session .default_bucket .return_value = "test-bucket"
642+ trainer .base_job_name = "test-job"
643+ trainer .hyperparameters = {}
644+
645+ source_code = MagicMock ()
646+ source_code .source_dir = str (source_dir )
647+ source_code .entry_script = "train.py"
648+ source_code .ignore_patterns = None # No ignore patterns
649+ trainer .source_code = source_code
650+
651+ with patch ("tarfile.open" ) as mock_tarfile :
652+ mock_tar = MagicMock ()
653+ mock_tarfile .return_value .__enter__ .return_value = mock_tar
654+
655+ HyperparameterTuner ._upload_source_code_and_configure_hyperparameters (trainer )
656+
657+ added_files = [call [0 ][0 ] for call in mock_tar .add .call_args_list ]
658+
659+ # Both files should be added when no ignore_patterns
660+ assert any ("train.py" in f for f in added_files )
661+ assert any (".env" in f for f in added_files )
662+
663+ def test_upload_source_code_with_default_ignore_patterns (self , tmp_path ):
664+ """Test that default ignore patterns from SourceCode class work."""
665+ # Create source directory with default-ignored files
666+ source_dir = tmp_path / "source"
667+ source_dir .mkdir ()
668+ (source_dir / "train.py" ).write_text ("# training script" )
669+ (source_dir / ".DS_Store" ).write_text ("mac file" )
670+
671+ cache_dir = source_dir / ".cache"
672+ cache_dir .mkdir ()
673+ (cache_dir / "data.json" ).write_text ("{}" )
674+
675+ # Create mock trainer with default ignore_patterns
676+ trainer = MagicMock ()
677+ trainer .sagemaker_session = MagicMock ()
678+ trainer .sagemaker_session .boto_session = MagicMock ()
679+ trainer .sagemaker_session .boto_session .client .return_value = MagicMock ()
680+ trainer .sagemaker_session .boto_region_name = "us-west-2"
681+ trainer .sagemaker_session .default_bucket .return_value = "test-bucket"
682+ trainer .base_job_name = "test-job"
683+ trainer .hyperparameters = {}
684+
685+ source_code = MagicMock ()
686+ source_code .source_dir = str (source_dir )
687+ source_code .entry_script = "train.py"
688+ # Default patterns as defined in SourceCode class
689+ source_code .ignore_patterns = [
690+ ".env" , ".git" , "__pycache__" , ".DS_Store" , ".cache" , ".ipynb_checkpoints"
691+ ]
692+ trainer .source_code = source_code
693+
694+ with patch ("tarfile.open" ) as mock_tarfile :
695+ mock_tar = MagicMock ()
696+ mock_tarfile .return_value .__enter__ .return_value = mock_tar
697+
698+ HyperparameterTuner ._upload_source_code_and_configure_hyperparameters (trainer )
699+
700+ added_files = [call [0 ][0 ] for call in mock_tar .add .call_args_list ]
701+
702+ # train.py should be included
703+ assert any ("train.py" in f for f in added_files )
704+ # .DS_Store should be ignored
705+ assert not any (".DS_Store" in f for f in added_files )
706+ # .cache directory should be ignored
707+ assert not any (".cache" in f for f in added_files )
0 commit comments