Skip to content

Commit e1c3ecb

Browse files
committed
adjust udevadm calls, increase logging amount
udevadm when called concurrently may potentially redo symlinks. Adding a lock, so if multiple pods attach at the same time, we don't call udevadm concurrently. Adjusts the order so we first check if the device is available, before doing udevadm calls. Adjust order of udevadm calls itself: first trigger, then settle. Reason: - settle waits for new events to finish - trigger triggers new events but doesn't wait for them Improve logging and increase the amount of logs (in debug level) to potentially debug issues with more data.
1 parent 1daca2b commit e1c3ecb

File tree

8 files changed

+157
-63
lines changed

8 files changed

+157
-63
lines changed

charts/csi-cloudscale/templates/daemonset.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ spec:
5353
args :
5454
- "--endpoint=$(CSI_ENDPOINT)"
5555
- "--url=$(CLOUDSCALE_API_URL)"
56+
- "--log-level={{ .Values.node.logLevel }}"
5657
{{- with .Values.node.resources }}
5758
resources:
5859
{{ toYaml . | indent 12 }}

charts/csi-cloudscale/templates/statefulset.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ spec:
7676
args :
7777
- "--endpoint=$(CSI_ENDPOINT)"
7878
- "--url=$(CLOUDSCALE_API_URL)"
79+
- "--log-level={{ .Values.controller.logLevel }}"
7980
{{- with .Values.controller.resources }}
8081
resources:
8182
{{ toYaml . | indent 12 }}

charts/csi-cloudscale/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ controller:
8787
tag: v3.5.6
8888
pullPolicy: IfNotPresent
8989
serviceAccountName:
90+
logLevel: info
9091
resources: {}
9192
# limits:
9293
# cpu: 100m
@@ -104,6 +105,7 @@ node:
104105
nodeSelector: {}
105106
tolerations: []
106107
serviceAccountName:
108+
logLevel: info
107109
resources: {}
108110
# limits:
109111
# cpu: 100m

cmd/cloudscale-csi-plugin/main.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"os"
2525

2626
"github.com/cloudscale-ch/csi-cloudscale/driver"
27+
"github.com/sirupsen/logrus"
2728
)
2829

2930
func main() {
@@ -32,6 +33,7 @@ func main() {
3233
token = flag.String("token", "", "cloudscale.ch access token")
3334
url = flag.String("url", "https://api.cloudscale.ch/", "cloudscale.ch API URL")
3435
version = flag.Bool("version", false, "Print the version and exit.")
36+
logLevel = flag.String("log-level", "info", "Log level (trace, debug, info, warn, error, fatal, panic)")
3537
)
3638
flag.Parse()
3739

@@ -44,7 +46,12 @@ func main() {
4446
os.Exit(0)
4547
}
4648

47-
drv, err := driver.NewDriver(*endpoint, *token, *url)
49+
level, err := logrus.ParseLevel(*logLevel)
50+
if err != nil {
51+
log.Fatalf("invalid log level %q: %v", *logLevel, err)
52+
}
53+
54+
drv, err := driver.NewDriver(*endpoint, *token, *url, level)
4855
if err != nil {
4956
log.Fatalln(err)
5057
}

driver/driver.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,9 @@ var (
4848

4949
// Driver implements the following CSI interfaces:
5050
//
51-
// csi.IdentityServer
52-
// csi.ControllerServer
53-
// csi.NodeServer
54-
//
51+
// csi.IdentityServer
52+
// csi.ControllerServer
53+
// csi.NodeServer
5554
type Driver struct {
5655
endpoint string
5756
serverId string
@@ -71,7 +70,7 @@ type Driver struct {
7170
// NewDriver returns a CSI plugin that contains the necessary gRPC
7271
// interfaces to interact with Kubernetes over unix domain sockets for
7372
// managaing cloudscale.ch Volumes
74-
func NewDriver(ep, token, urlstr string) (*Driver, error) {
73+
func NewDriver(ep, token, urlstr string, logLevel logrus.Level) (*Driver, error) {
7574
tokenSource := oauth2.StaticTokenSource(&oauth2.Token{
7675
AccessToken: token,
7776
})
@@ -95,7 +94,9 @@ func NewDriver(ep, token, urlstr string) (*Driver, error) {
9594
}
9695
cloudscaleClient.BaseURL = baseURL
9796

98-
log := logrus.New().WithFields(logrus.Fields{
97+
logger := logrus.New()
98+
logger.SetLevel(logLevel)
99+
log := logger.WithFields(logrus.Fields{
99100
"zone": zone,
100101
"node_id": serverId,
101102
"version": version,
@@ -172,7 +173,9 @@ func (d *Driver) Stop() {
172173

173174
// When building any packages that import version, pass the build/install cmd
174175
// ldflags like so:
175-
// go build -ldflags "-X github.com/cloudscale-ch/csi-cloudscale/driver.version=0.0.1"
176+
//
177+
// go build -ldflags "-X github.com/cloudscale-ch/csi-cloudscale/driver.version=0.0.1"
178+
//
176179
// GetVersion returns the current release version, as inserted at build time.
177180
func GetVersion() string {
178181
return version

driver/driver_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ package driver
2020
import (
2121
"context"
2222
"errors"
23-
"github.com/google/uuid"
24-
"k8s.io/mount-utils"
2523
"math/rand"
2624
"net/http"
2725
"net/url"
@@ -30,9 +28,12 @@ import (
3028
"testing"
3129
"time"
3230

33-
"github.com/cloudscale-ch/cloudscale-go-sdk/v4"
31+
"github.com/google/uuid"
3432
"github.com/kubernetes-csi/csi-test/v5/pkg/sanity"
3533
"github.com/sirupsen/logrus"
34+
"k8s.io/mount-utils"
35+
36+
"github.com/cloudscale-ch/cloudscale-go-sdk/v4"
3637
)
3738

3839
func init() {
@@ -172,9 +173,9 @@ func (f *fakeMounter) HasRequiredSize(log *logrus.Entry, path string, requiredSi
172173
return true, nil
173174
}
174175

175-
func (f *fakeMounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, target string) (*string, error) {
176+
func (f *fakeMounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, target string) (string, error) {
176177
path := "SomePath"
177-
return &path, nil
178+
return path, nil
178179
}
179180

180181
type FakeVolumeServiceOperations struct {

driver/mounter.go

Lines changed: 108 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,23 @@ limitations under the License.
1818
package driver
1919

2020
import (
21+
"context"
2122
"encoding/json"
2223
"errors"
2324
"fmt"
24-
"io/ioutil"
25-
"k8s.io/mount-utils"
26-
kexec "k8s.io/utils/exec"
2725
"os"
2826
"os/exec"
2927
"path/filepath"
3028
"strconv"
3129
"strings"
30+
"sync"
3231
"syscall"
3332
"time"
3433

3534
"github.com/sirupsen/logrus"
3635
"golang.org/x/sys/unix"
36+
"k8s.io/mount-utils"
37+
kexec "k8s.io/utils/exec"
3738
)
3839

3940
const (
@@ -86,7 +87,7 @@ type Mounter interface {
8687

8788
// Used to find a path in /dev/disk/by-id with a serial that we have from
8889
// the cloudscale API.
89-
FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, VolumeId string) (*string, error)
90+
FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, VolumeId string) (string, error)
9091

9192
// GetStatistics returns capacity-related volume statistics for the given
9293
// volume path.
@@ -221,7 +222,29 @@ func (m *mounter) Mount(source, target, fsType string, luksContext LuksContext,
221222
source = luksSource
222223
}
223224

225+
// Resolve source symlink for debug logging
226+
resolvedSource, resolveErr := filepath.EvalSymlinks(source)
227+
if resolveErr != nil {
228+
m.log.WithFields(logrus.Fields{
229+
"source": source,
230+
"target": target,
231+
"fs_type": fsType,
232+
"options": options,
233+
"resolve_error": resolveErr,
234+
}).Debug("Mount: failed to resolve source symlink")
235+
} else {
236+
m.log.WithFields(logrus.Fields{
237+
"source": source,
238+
"resolved_source": resolvedSource,
239+
"target": target,
240+
"fs_type": fsType,
241+
"options": options,
242+
}).Debug("Mount: resolved source device")
243+
}
244+
224245
m.log.WithFields(logrus.Fields{
246+
"source": source,
247+
"target": target,
225248
"options": options,
226249
}).Info("executing mount command")
227250
err := m.kMounter.Mount(source, target, fsType, options)
@@ -418,72 +441,111 @@ See the License for the specific language governing permissions and
418441
limitations under the License.
419442
*/
420443

421-
func guessDiskIDPathByVolumeID(volumeID string) *string {
444+
func guessDiskIDPathByVolumeID(volumeID string, logger *logrus.Entry) string {
422445
// Get the first part of the UUID.
423446
// The linux kernel limits volume serials to 20 bytes:
424447
// include/uapi/linux/virtio_blk.h:#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */
425448
linuxSerial := volumeID[:20]
426449

427450
globExpr := diskIDPath + "/*" + linuxSerial + "*"
428451
matches, _ := filepath.Glob(globExpr)
452+
453+
logger.WithFields(logrus.Fields{
454+
"volumeID": volumeID,
455+
"linuxSerial": linuxSerial,
456+
"matches": matches,
457+
}).Debug("guessDiskIDPathByVolumeID")
458+
429459
if len(matches) > 0 {
430-
return &matches[0]
460+
return matches[0]
431461
}
432-
return nil
462+
return ""
433463
}
434464

435-
func (m *mounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, volumeID string) (*string, error) {
465+
func (m *mounter) FinalizeVolumeAttachmentAndFindPath(logger *logrus.Entry, volumeID string) (string, error) {
436466
numTries := 0
437467
for {
438-
probeAttachedVolume(logger)
439-
440-
diskIDPath := guessDiskIDPathByVolumeID(volumeID)
441-
if diskIDPath != nil {
468+
diskIDPath := guessDiskIDPathByVolumeID(volumeID, logger)
469+
if diskIDPath != "" {
470+
// Resolve and log the actual device for debugging
471+
resolved, err := filepath.EvalSymlinks(diskIDPath)
472+
if err != nil {
473+
logger.WithFields(logrus.Fields{
474+
"disk_id_path": diskIDPath,
475+
"error": err,
476+
}).Debug("FinalizeVolumeAttachmentAndFindPath: found path but failed to resolve symlink")
477+
} else {
478+
logger.WithFields(logrus.Fields{
479+
"disk_id_path": diskIDPath,
480+
"resolved_device": resolved,
481+
"num_tries": numTries,
482+
}).Debug("FinalizeVolumeAttachmentAndFindPath: found device path")
483+
}
442484
return diskIDPath, nil
443485
}
444486

487+
logger.WithFields(logrus.Fields{
488+
"num_tries": numTries,
489+
}).Debug("FinalizeVolumeAttachmentAndFindPath: device not found, probing")
490+
491+
probeAttachedVolume(logger)
492+
445493
numTries++
446-
if numTries == 10 {
494+
if numTries == 30 {
447495
break
448496
}
449497
time.Sleep(time.Second)
450498
}
451-
return nil, errors.New("Could not attach disk: Timeout after 10s")
499+
return "", errors.New("FinalizeVolumeAttachmentAndFindPath: Timeout after 10s")
452500
}
453501

454-
func probeAttachedVolume(logger *logrus.Entry) error {
455-
// rescan scsi bus
456-
scsiHostRescan()
502+
func runCmdWithTimeout(name string, args []string, logger *logrus.Entry, timeout time.Duration) {
503+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
504+
defer cancel()
457505

458-
// udevadm settle waits for udevd to process the device creation
459-
// events for all hardware devices, thus ensuring that any device
460-
// nodes have been created successfully before proceeding.
461-
argsSettle := []string{"settle"}
462-
cmdSettle := exec.Command("udevadm", argsSettle...)
463-
_, errSettle := cmdSettle.CombinedOutput()
464-
if errSettle != nil {
465-
logger.Errorf("error running udevadm settle %v\n", errSettle)
506+
out, err := exec.CommandContext(ctx, name, args...).CombinedOutput()
507+
if err != nil {
508+
logger.WithError(err).
509+
WithFields(logrus.Fields{"out": out, "name": name, "args": args}).
510+
Warn("unable to run cmd " + name)
466511
}
512+
}
513+
514+
var probeLock sync.Mutex
515+
516+
func probeAttachedVolume(logger *logrus.Entry) {
517+
const triggerTimeout = 15 * time.Second
518+
519+
// host rescan and udevadm are global actions and if run concurrently, may run into issues with
520+
// symlinking and partial updates.
521+
probeLock.Lock()
522+
defer probeLock.Unlock()
523+
524+
// rescan scsi bus
525+
logger.Debug("probeAttachedVolume: rescanning SCSI hosts")
526+
scsiHostRescan(logger)
467527

468-
args := []string{"trigger"}
469-
cmd := exec.Command("udevadm", args...)
470-
_, err := cmd.CombinedOutput()
528+
logger.Debug("probeAttachedVolume: running udevadm trigger")
529+
runCmdWithTimeout("udevadm", []string{"trigger"}, logger, triggerTimeout)
530+
531+
logger.Debug("probeAttachedVolume: running udevadm settle")
532+
runCmdWithTimeout("udevadm", []string{"settle"}, logger, triggerTimeout)
533+
534+
logger.Debugf("probeAttachedVolume: done")
535+
}
536+
537+
func scsiHostRescan(logger *logrus.Entry) {
538+
const scsiPath = "/sys/class/scsi_host/"
539+
dirs, err := os.ReadDir(scsiPath)
471540
if err != nil {
472-
logger.Errorf("error running udevadm trigger %v\n", err)
473-
return err
541+
logger.WithError(err).Warn("scsiHostRescan: cannot read scsi_host directory")
542+
return
474543
}
475-
logger.Debugf("Successfully probed all attachments")
476-
return nil
477-
}
478544

479-
func scsiHostRescan() {
480-
scsiPath := "/sys/class/scsi_host/"
481-
if dirs, err := ioutil.ReadDir(scsiPath); err == nil {
482-
for _, f := range dirs {
483-
name := scsiPath + f.Name() + "/scan"
484-
data := []byte("- - -")
485-
ioutil.WriteFile(name, data, 0666)
486-
}
545+
for _, f := range dirs {
546+
name := scsiPath + f.Name() + "/scan"
547+
data := []byte("- - -")
548+
_ = os.WriteFile(name, data, 0666)
487549
}
488550
}
489551

@@ -494,20 +556,20 @@ func (m *mounter) GetDeviceName(mounter mount.Interface, mountPath string) (stri
494556

495557
// FindAbsoluteDeviceByIDPath follows the /dev/disk/by-id symlink to find the absolute path of a device
496558
func (m *mounter) FindAbsoluteDeviceByIDPath(volumeName string) (string, error) {
497-
path := guessDiskIDPathByVolumeID(volumeName)
498-
if path == nil {
559+
path := guessDiskIDPathByVolumeID(volumeName, m.log)
560+
if path == "" {
499561
return "", fmt.Errorf("could not find device-path for volume: %s", volumeName)
500562
}
501563

502564
// EvalSymlinks returns relative link if the file is not a symlink
503565
// so we do not have to check if it is symlink prior to evaluation
504-
resolved, err := filepath.EvalSymlinks(*path)
566+
resolved, err := filepath.EvalSymlinks(path)
505567
if err != nil {
506-
return "", fmt.Errorf("could not resolve symlink %q: %v", *path, err)
568+
return "", fmt.Errorf("could not resolve symlink %q: %v", path, err)
507569
}
508570

509571
if !strings.HasPrefix(resolved, "/dev") {
510-
return "", fmt.Errorf("resolved symlink %q for %q was unexpected", resolved, *path)
572+
return "", fmt.Errorf("resolved symlink %q for %q was unexpected", resolved, path)
511573
}
512574

513575
return resolved, nil

0 commit comments

Comments
 (0)