devmapper: activate dm device if snap device marked as activated

- reproducer
 1. stop a container;
 2. reboot, or dmsetup remove its corresponding dm device;
 3. start the container, it will fail like:

 """
 Error: failed to start containers: {"message":"failed to create container(4f33d2760760c41518a84821153ccdf7f80980b797b783cdd75178fc6ca0bf4b) on containerd: failed to create task for container(4f33d2760760c41518a84821153ccdf7f80980b797b783cdd75178fc6ca0bf4b): failed to mount rootfs component &{ext4 /dev/mapper/vg0-mythinpool-snap-2 []}: no such file or directory: unknown"}
 """
- how the fix works
 activate the dm device if necessary, and give a warn msg:

 """
 time="2019-08-21T22:44:08.422695797+08:00" level=warning msg="devmapper device \"vg0-mythinpool-snap-2\" marked as \"Activated\" but not active, activating it"
 """

Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
This commit is contained in:
renzhen.rz 2019-08-21 22:54:01 +08:00
parent 4924bcb5fe
commit 4d11bb36ad
2 changed files with 32 additions and 12 deletions

View File

@ -73,23 +73,41 @@ func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) {
return poolDevice, nil
}
// ensureDeviceStates marks devices with incomplete states (after crash) as 'Faulty'
// ensureDeviceStates updates devices to their real state:
// - marks devices with incomplete states (after crash) as 'Faulty'
// - activates devices if they are marked as 'Activated' but the dm
// device is not active, which can happen to a stopped container
// after a reboot
func (p *PoolDevice) ensureDeviceStates(ctx context.Context) error {
var devices []*DeviceInfo
var faultyDevices []*DeviceInfo
var activatedDevices []*DeviceInfo
if err := p.metadata.WalkDevices(ctx, func(info *DeviceInfo) error {
switch info.State {
case Activated, Suspended, Resumed, Deactivated, Removed, Faulty:
return nil
case Suspended, Resumed, Deactivated, Removed, Faulty:
case Activated:
activatedDevices = append(activatedDevices, info)
default:
faultyDevices = append(faultyDevices, info)
}
devices = append(devices, info)
return nil
}); err != nil {
return errors.Wrap(err, "failed to query devices from metastore")
}
var result *multierror.Error
for _, dev := range devices {
for _, dev := range activatedDevices {
if p.IsActivated(dev.Name) {
continue
}
log.G(ctx).Warnf("devmapper device %q marked as %q but not active, activating it", dev.Name, dev.State)
if err := p.activateDevice(ctx, dev); err != nil {
result = multierror.Append(result, err)
}
}
for _, dev := range faultyDevices {
log.G(ctx).
WithField("dev_id", dev.DeviceID).
WithField("parent", dev.ParentName).
@ -350,7 +368,7 @@ func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, de
return nil
}
// IsActivated returns true if thin-device is activated and not suspended
// IsActivated returns true if thin-device is activated
func (p *PoolDevice) IsActivated(deviceName string) bool {
infos, err := dmsetup.Info(deviceName)
if err != nil || len(infos) != 1 {
@ -358,11 +376,11 @@ func (p *PoolDevice) IsActivated(deviceName string) bool {
return false
}
if devInfo := infos[0]; devInfo.Suspended {
return false
if devInfo := infos[0]; devInfo.TableLive {
return true
}
return true
return false
}
// IsLoaded returns true if thin-device is visible for dmsetup

View File

@ -161,7 +161,9 @@ func TestPoolDeviceMarkFaulty(t *testing.T) {
err := store.AddDevice(testCtx, &DeviceInfo{Name: "1", State: Unknown})
assert.NilError(t, err)
err = store.AddDevice(testCtx, &DeviceInfo{Name: "2", State: Activated})
// Note: do not use 'Activated' here because pool.ensureDeviceStates() will
// try to activate the real dm device, which will fail on a faked device.
err = store.AddDevice(testCtx, &DeviceInfo{Name: "2", State: Deactivated})
assert.NilError(t, err)
pool := &PoolDevice{metadata: store}
@ -177,7 +179,7 @@ func TestPoolDeviceMarkFaulty(t *testing.T) {
assert.Equal(t, Faulty, info.State)
assert.Equal(t, "1", info.Name)
case 2:
assert.Equal(t, Activated, info.State)
assert.Equal(t, Deactivated, info.State)
assert.Equal(t, "2", info.Name)
default:
t.Error("unexpected walk call")