Skip to content

Commit 8ad1a7c

Browse files
authored
Merge pull request #158 from casparvl/link_nvidia_drivers
Update NVIDIA driver symlink script
2 parents b041f50 + 2abbd00 commit 8ad1a7c

1 file changed

Lines changed: 134 additions & 3 deletions

File tree

scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

Lines changed: 134 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,10 @@ find_cuda_libraries_on_host() {
447447
fi
448448
}
449449

450+
# Symlink structure changed from 2025.06 onwards. This function reflects the symlinking as it was done for EESSI 2023.06
450451
# Actually symlinks the Matched libraries to correct folders.
451452
# Then also creates "host" and "latest" folder symlinks
452-
symlink_mode () {
453+
symlink_mode_202306 () {
453454
# First let's make sure the driver libraries are not already in place
454455
# Have to link drivers = True
455456
link_drivers=1
@@ -617,6 +618,132 @@ symlink_mode () {
617618

618619
}
619620

621+
622+
# Symlink structure changed from 2025.06 onwards. This function reflects the new symlinking
623+
# Actually symlinks the Matched libraries to correct folders.
624+
symlink_mode () {
625+
# First let's make sure the driver libraries are not already in place
626+
# Have to link drivers = True
627+
link_drivers=1
628+
629+
# Do some checks on existence of links and that we don't end up at /dev/null (the default), so we can print some informative information
630+
# One downside is that we can't explicitely check if something is a variant symlink, so we'll just assume that if it's a link AND it
631+
# lives in our CVMFS repository, it must be a variant symlink
632+
nvidia_trusted_dir="${EESSI_EPREFIX}/lib/nvidia"
633+
if [[ -L "$nvidia_trusted_dir" ]]; then
634+
target1=$(readlink "$nvidia_trusted_dir")
635+
log_verbose "$nvidia_trusted_dir is a CVMFS variant symlink (EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE) currently pointing to $target1"
636+
# If this is a link, and if it lives in the EESSI_CVMFS_REPO, we assume this is a variant symlink
637+
if [[ -L "$target1" && "$target1" == "$EESSI_CVMFS_REPO"/* ]]; then
638+
target2=$(readlink "$target1")
639+
msg="${target1} appears to be a CVMFS variant symlink (EESSI_NVIDIA_OVERRIDE_DEFAULT) currently pointing to ${target2}."
640+
msg="${msg} Proceeding to install host symlinks in ${target2}."
641+
log_verbose "${msg}"
642+
643+
# Check if target2 isn't /dev/null (the default target of the EESSI_NVIDIA_OVERRIDE_DEFAULT variant symlink)
644+
# If it is, suggest setting EESSI_NVIDIA_OVERRIDE_DEFAULT or EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE
645+
if [[ $target2 == /dev/null ]]; then
646+
msg="${nvidia_trusted_dir} is a symlink pointing to ${target1}, which is a symlink pointing to ${target2}\n"
647+
msg="${msg}If you want to symlink the drivers in a single location for all EESSI versions, please define"
648+
msg="${msg} the EESSI_NVIDIA_OVERRIDE_DEFAULT variant symlink in your local CVMFS configuration to point to"
649+
msg="${msg} writeable location. This will change the target of symlink ${target1}.\n"
650+
msg="${msg}If you want to symlink the drivers only for this version of EESSI (${EESSI_VERSION}), please define"
651+
msg="${msg} the EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE variant symlink in your local CVMFS configuration to point to"
652+
msg="${msg} writeable location. This will change the target of symlink ${nvidia_trusted_dir}.\n"
653+
fatal_error "${msg}"
654+
fi
655+
else
656+
msg="$target1 does not seem to be a CVMFS variant symlink, suggesting that EESSI_${EESSI_VERSION//./}_NVIDIA_OVERRIDE"
657+
msg="${msg} was set in the CVMFS config. Proceeding to install host symlinks in $target1."
658+
log_verbose "${msg}"
659+
fi
660+
else
661+
msg="$nvidia_trusted_dir is expected to be a symlink, but it's not. This will likely fail"
662+
msg="${msg} as CVMFS repositories are read-only. Proceeding anyway, but expect this to fail."
663+
echo_yellow "${msg}"
664+
fi
665+
666+
# Make sure that target of nvidia_trusted_dir variant symlink is an existing directory
667+
install_target=$(readlink -f "$nvidia_trusted_dir")
668+
echo "Ensure the final target of ${nvidia_trusted_dir} (${install_target}) exists"
669+
log_verbose "Target directory in which driver symlinks will be installed: ${install_target}"
670+
if [ ! -d "$install_target" ]; then
671+
check_global_read
672+
if ! create_directory_structure "$install_target"; then
673+
fatal_error "No write permissions to directory ${install_target}"
674+
fi
675+
fi
676+
677+
# Define file to store driver version that was symlinked
678+
host_injection_driver_version_file="${install_target}/driver_version.txt"
679+
log_verbose "host_injection_driver_version_file: ${host_injection_driver_version_file}"
680+
681+
# Check if drivers are already linked with correct version
682+
# This is done by comparing host_injection_driver_version_file (driver_version.txt)
683+
# This is needed when updating GPU drivers.
684+
if [ -e "$host_injection_driver_version_file" ]; then
685+
if grep -q "$HOST_GPU_DRIVER_VERSION" "$host_injection_driver_version_file"; then
686+
echo_green "The host GPU driver libraries (v${HOST_GPU_DRIVER_VERSION}) have already been linked! (based on ${host_injection_driver_version_file})"
687+
# The GPU libraries were already linked for this version of CUDA driver
688+
# Have to link drivers = False
689+
link_drivers=0
690+
else
691+
# There's something there but it is out of date
692+
echo_yellow "The host GPU driver libraries version have changed. Now its: (v${HOST_GPU_DRIVER_VERSION})"
693+
echo_yellow "Cleaning out outdated symlinks."
694+
rm "${install_target}"/* || fatal_error "Unable to remove files under '${install_target}'."
695+
fi
696+
fi
697+
698+
# Link all matched_libraries from Nvidia to correct host_injection folder
699+
# This step is only run, when linking of drivers is needed (eg. link_drivers==1)
700+
# Setup variable to track if some drivers were actually linked this run.
701+
drivers_linked=0
702+
703+
# Have to link drivers
704+
if [ "$link_drivers" -eq 1 ]; then
705+
# Link the matched libraries
706+
707+
cd "${install_target}" || fatal_error "Failed to cd to ${install_target}"
708+
log_verbose "Changed directory to: $PWD"
709+
710+
# Make symlinks to all the interesting libraries
711+
# Loop over each matched library
712+
for library in "${MATCHED_LIBRARIES[@]}"; do
713+
log_verbose "Linking library: ${library}"
714+
715+
# Get just the library filename
716+
lib_name=$(basename "$library")
717+
718+
# Check if the symlink already exists
719+
if [ -L "$lib_name" ]; then
720+
# Check if it's pointing to the same target
721+
target=$(readlink "$lib_name")
722+
if [ "$target" = "$library" ]; then
723+
log_verbose "Symlink for $lib_name already exists and points to correct target"
724+
continue
725+
else
726+
log_verbose "Symlink for $lib_name exists but points to wrong target: $target, updating..."
727+
rm "$lib_name"
728+
fi
729+
fi
730+
731+
# Create a symlink in the current directory
732+
# and check if the symlink was created successfully
733+
if ! ln -s "$library" .
734+
then
735+
fatal_error "Error: Failed to create symlink for library $library in $PWD"
736+
fi
737+
done
738+
739+
# Inject driver and CUDA versions into the directory
740+
echo "$HOST_GPU_DRIVER_VERSION" > driver_version.txt
741+
echo "$HOST_GPU_CUDA_VERSION" > cuda_version.txt
742+
743+
drivers_linked=1
744+
fi
745+
}
746+
620747
# Logging function for verbose mode
621748
# TODO: move to utils?
622749
log_verbose() {
@@ -635,7 +762,7 @@ check_eessi_initialised
635762

636763
# Verify nvidia-smi availability
637764
log_verbose "Checking for nvidia-smi command..."
638-
command -v nvidia-smi >/dev/null 2>&1 || { echo_yellow "nvidia-smi not found, this script won't do anything useful"; return 1; }
765+
command -v nvidia-smi >/dev/null 2>&1 || { echo_yellow "nvidia-smi not found, this script won't do anything useful"; exit 1; }
639766

640767
# Parse command line arguments
641768
while [[ "$#" -gt 0 ]]; do
@@ -685,7 +812,11 @@ fi
685812

686813
# === 5b. Symlink Mode ===
687814
# If we haven't already exited, we may need to create the symlinks
688-
symlink_mode
815+
if [ "$EESSI_VERSION" == '2023.06' ]; then
816+
symlink_mode_202306
817+
else
818+
symlink_mode
819+
fi
689820

690821
# If everything went OK, show success message
691822
echo_green "Host NVIDIA GPU drivers linked successfully for EESSI"

0 commit comments

Comments
 (0)