Skip to content

Commit

Permalink
Added improved logging (#101)
Browse files Browse the repository at this point in the history
* added image size unit
* fixed structured logging
* fixed label in image-size log
* added image size note in readme
* update tag in Makefile and Chart.yaml

Co-authored-by: Edgar Sarkisian <edgar.sarkisian@acqiua.com>
Co-authored-by: Mugdha Adhav <mugdha.adhav@gmail.com>
Co-authored-by: Mriyam Tamuli <mbtamuli@gmail.com>
  • Loading branch information
4 people authored Jan 17, 2024
1 parent 63ba8b7 commit c1787b4
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION ?= v1.0.1
VERSION ?= v1.1.0

IMAGE_BUILDER ?= docker
IMAGE_BUILD_CMD ?= buildx
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,6 @@ See [test/sanity](https://github.com/warm-metal/csi-driver-image/tree/master/tes
### E2E test

See [test/e2e](https://github.com/warm-metal/csi-driver-image/tree/master/test/e2e).

## Note on logging image size
Image sizes are logged after they finish pulling. We've noticed that for smaller images, usually under 1KiB, containerd may report an incorrect image size. An issue has been raised in the containerd github repository: https://github.com/containerd/containerd/issues/9641.
4 changes: 2 additions & 2 deletions charts/warm-metal-csi-driver/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0.1
version: 1.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: v1.0.1
appVersion: v1.1.0
6 changes: 5 additions & 1 deletion cmd/plugin/node_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/containerd/containerd/reference/docker"
"github.com/google/uuid"
"github.com/warm-metal/csi-driver-image/pkg/backend"
"github.com/warm-metal/csi-driver-image/pkg/metrics"
"github.com/warm-metal/csi-driver-image/pkg/mountexecutor"
Expand Down Expand Up @@ -59,7 +60,8 @@ type NodeServer struct {
}

func (n NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (resp *csi.NodePublishVolumeResponse, err error) {
klog.Infof("mount request: %s", req.String())
valuesLogger := klog.LoggerWithValues(klog.NewKlogr(), "pod-name", req.VolumeContext["pod-name"], "namespace", req.VolumeContext["namespace"], "uid", req.VolumeContext["uid"], "request-id", uuid.NewString())
valuesLogger.Info("Incoming NodePublishVolume request", "request string", req.String())
if len(req.VolumeId) == 0 {
err = status.Error(codes.InvalidArgument, "VolumeId is missing")
return
Expand Down Expand Up @@ -134,6 +136,7 @@ func (n NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishV
PullAlways: pullAlways,
Image: image,
PullSecrets: req.Secrets,
Logger: valuesLogger,
}

if e := n.pullExecutor.StartPulling(po); e != nil {
Expand All @@ -157,6 +160,7 @@ func (n NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishV
TargetPath: req.TargetPath,
VolumeCapability: req.VolumeCapability,
ReadOnly: req.Readonly,
Logger: valuesLogger,
}

if e := n.mountExecutor.StartMounting(o); e != nil {
Expand Down
16 changes: 16 additions & 0 deletions cmd/plugin/node_server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ func TestNodePublishVolumeAsync(t *testing.T) {
VolumeContext: map[string]string{
// so that the test would always attempt to pull an image
ctxKeyPullAlways: "true",
// to see improved logs
"pod-name": "test-pod",
"namespace": "test-namespace",
"uid": "test-uid",
},
VolumeCapability: &csi.VolumeCapability{
AccessMode: &csi.VolumeCapability_AccessMode{
Expand Down Expand Up @@ -179,6 +183,10 @@ func TestNodePublishVolumeSync(t *testing.T) {
VolumeContext: map[string]string{
// so that the test would always attempt to pull an image
ctxKeyPullAlways: "true",
// to see improved logs
"pod-name": "test-pod",
"namespace": "test-namespace",
"uid": "test-uid",
},
VolumeCapability: &csi.VolumeCapability{
AccessMode: &csi.VolumeCapability_AccessMode{
Expand Down Expand Up @@ -361,6 +369,10 @@ func TestMetrics(t *testing.T) {
VolumeContext: map[string]string{
// so that the test would always attempt to pull an image
ctxKeyPullAlways: "true",
// to see improved logs
"pod-name": "test-pod",
"namespace": "test-namespace",
"uid": "test-uid",
},
VolumeCapability: &csi.VolumeCapability{
AccessMode: &csi.VolumeCapability_AccessMode{
Expand All @@ -381,6 +393,10 @@ func TestMetrics(t *testing.T) {
VolumeContext: map[string]string{
// so that the test would always attempt to pull an image
ctxKeyPullAlways: "true",
// to see improved logs
"pod-name": "test-pod",
"namespace": "test-namespace",
"uid": "test-uid",
},
VolumeCapability: &csi.VolumeCapability{
AccessMode: &csi.VolumeCapability_AccessMode{
Expand Down
20 changes: 14 additions & 6 deletions pkg/mountexecutor/mountexecutor.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type MountOptions struct {
TargetPath string
VolumeCapability *csi.VolumeCapability
ReadOnly bool
Logger klog.Logger
}

// MountExecutor executes mount
Expand All @@ -59,10 +60,12 @@ func NewMountExecutor(o *MountExecutorOptions) *MountExecutor {
// StartMounting starts the mounting
func (m *MountExecutor) StartMounting(o *MountOptions) error {

o.Logger.Info("Mounting image", "image", o.NamedRef.Name())
if pullstatus.Get(o.NamedRef) != pullstatus.Pulled || mountstatus.Get(o.TargetPath) == mountstatus.StillMounting {
klog.Infof("image '%s' hasn't been pulled yet (status: %s) or volume is still mounting (status: %s)",
o.NamedRef.Name(),
pullstatus.Get(o.NamedRef), mountstatus.Get(o.TargetPath))
o.Logger.Info("Could not mount image because image hasn't finshed pulling or volume is still mounting",
"image", o.NamedRef.Name(),
"pull-status", pullstatus.Get(o.NamedRef),
"mount-status", mountstatus.Get(o.TargetPath))
return nil
}

Expand All @@ -74,11 +77,14 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error {
mountstatus.Update(o.TargetPath, mountstatus.StillMounting)
startTime := time.Now()
if err := m.mounter.Mount(o.Context, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil {
o.Logger.Error(err, "mount error")
metrics.OperationErrorsCount.WithLabelValues("StartMounting").Inc()
mountstatus.Update(o.TargetPath, mountstatus.Errored)
return err
}
metrics.ImageMountTime.WithLabelValues(metrics.Sync).Observe(time.Since(startTime).Seconds())
elapsed := time.Since(startTime)
metrics.ImageMountTime.WithLabelValues(metrics.Sync).Observe(elapsed.Seconds())
o.Logger.Info("Finished mounting", "image", o.NamedRef.Name(), "mount-duration", elapsed)
mountstatus.Update(o.TargetPath, mountstatus.Mounted)
return nil
}
Expand All @@ -92,13 +98,15 @@ func (m *MountExecutor) StartMounting(o *MountOptions) error {
mountstatus.Update(o.TargetPath, mountstatus.StillMounting)
startTime := time.Now()
if err := m.mounter.Mount(ctx, o.VolumeId, backend.MountTarget(o.TargetPath), o.NamedRef, ro); err != nil {
klog.Errorf("mount err: %v", err.Error())
o.Logger.Error(err, "mount error")
metrics.OperationErrorsCount.WithLabelValues("StartMounting").Inc()
mountstatus.Update(o.TargetPath, mountstatus.Errored)
m.asyncErrs[o.NamedRef] = fmt.Errorf("err: %v: %v", err, m.asyncErrs[o.NamedRef])
return
}
metrics.ImageMountTime.WithLabelValues(metrics.Async).Observe(time.Since(startTime).Seconds())
elapsed := time.Since(startTime)
metrics.ImageMountTime.WithLabelValues(metrics.Async).Observe(elapsed.Seconds())
o.Logger.Info("Finished mounting", "image", o.NamedRef.Name(), "mount-duration", elapsed)
mountstatus.Update(o.TargetPath, mountstatus.Mounted)
}()

Expand Down
18 changes: 14 additions & 4 deletions pkg/pullexecutor/pullexecutor.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type PullOptions struct {
PullAlways bool
PullSecrets map[string]string
Image string
Logger klog.Logger
}

// PullExecutor executes the pulls
Expand Down Expand Up @@ -76,15 +77,19 @@ func (m *PullExecutor) StartPulling(o *PullOptions) error {
puller := remoteimage.NewPuller(m.imageSvcClient, o.NamedRef, keyring)
shouldPull := o.PullAlways || !m.mounter.ImageExists(o.Context, o.NamedRef)
if shouldPull {
klog.Infof("pull image %q ", o.Image)
o.Logger.Info("Pulling image", "image", o.Image)
pullstatus.Update(o.NamedRef, pullstatus.StillPulling)
startTime := time.Now()
if err = puller.Pull(o.Context); err != nil {
pullstatus.Update(o.NamedRef, pullstatus.Errored)
metrics.OperationErrorsCount.WithLabelValues("StartPulling").Inc()
o.Logger.Error(err, "Unable to pull image", "image", o.NamedRef)
return errors.Errorf("unable to pull image %q: %s", o.NamedRef, err)
}
metrics.ImagePullTime.WithLabelValues(metrics.Sync).Observe(time.Since(startTime).Seconds())
elapsed := time.Since(startTime)
metrics.ImagePullTime.WithLabelValues(metrics.Sync).Observe(elapsed.Seconds())
size := puller.ImageSize(o.Context)
o.Logger.Info("Finished pulling image", "image", o.Image, "pull-duration", elapsed, "image-size", fmt.Sprintf("%.2f MiB", float64(size)/(1024.0*1024.0)))
}
pullstatus.Update(o.NamedRef, pullstatus.Pulled)
return nil
Expand All @@ -110,17 +115,22 @@ func (m *PullExecutor) StartPulling(o *PullOptions) error {
puller := remoteimage.NewPuller(m.imageSvcClient, o.NamedRef, keyring)
shouldPull := o.PullAlways || !m.mounter.ImageExists(o.Context, o.NamedRef)
if shouldPull {
klog.Infof("pull image %q ", o.Image)
o.Logger.Info("Pulling image asynchronously", "image", o.Image)
pullstatus.Update(o.NamedRef, pullstatus.StillPulling)
startTime := time.Now()

if err = puller.Pull(c); err != nil {
pullstatus.Update(o.NamedRef, pullstatus.Errored)
metrics.OperationErrorsCount.WithLabelValues("StartPulling").Inc()
o.Logger.Error(err, "Unable to pull image", "image", o.Image)
m.asyncErrs[o.NamedRef] = fmt.Errorf("unable to pull image %q: %s", o.Image, err)
return
}
metrics.ImagePullTime.WithLabelValues(metrics.Async).Observe(time.Since(startTime).Seconds())
elapsed := time.Since(startTime)
metrics.ImagePullTime.WithLabelValues(metrics.Async).Observe(elapsed.Seconds())
size := puller.ImageSize(o.Context)
o.Logger.Info("Finished pulling image", "image", o.Image, "pull-duration", elapsed, "image-size", fmt.Sprintf("%.2f MiB", float64(size)/(1024.0*1024.0)))

}
pullstatus.Update(o.NamedRef, pullstatus.Pulled)
}
Expand Down
11 changes: 11 additions & 0 deletions pkg/remoteimage/pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

type Puller interface {
Pull(context.Context) error
ImageSize(context.Context) int
}

func NewPuller(imageSvc cri.ImageServiceClient, image docker.Named,
Expand All @@ -28,6 +29,16 @@ type puller struct {
keyring credentialprovider.DockerKeyring
}

// Returns the compressed size of the image that was pulled in bytes
// see https://github.com/containerd/containerd/issues/9261
func (p puller) ImageSize(ctx context.Context) int {
imageSpec := &cri.ImageSpec{Image: p.image.String()}
imageStatusResponse, _ := p.imageSvc.ImageStatus(ctx, &cri.ImageStatusRequest{
Image: imageSpec,
})
return int(imageStatusResponse.Image.Size_)
}

func (p puller) Pull(ctx context.Context) (err error) {
repo := p.image.Name()
imageSpec := &cri.ImageSpec{Image: p.image.String()}
Expand Down

0 comments on commit c1787b4

Please sign in to comment.