package testcase import ( "context" "crypto/sha256" "encoding/hex" "fmt" "os" "path/filepath" "sync" "syscall" "time" "plp-test/internal/config" "plp-test/internal/model" "plp-test/internal/utils" "github.com/sirupsen/logrus" ) // PowerLossTest 断电测试 type PowerLossTest struct { *BaseTestCase testDir string blockSize int totalBlocks int writtenBlocks int verifiedBlocks int corruptedBlocks int blocks []*model.TestBlock recoveryTimeMs float64 powerCutInfo *model.PowerCutInfo integrityInfo *model.IntegrityInfo blocksMu sync.RWMutex // 保护数据块访问 blocksMap map[int]model.BlockStatus // 数据块状态映射 } // NewPowerLossTest 创建断电测试 func NewPowerLossTest(cfg *config.Config, logger *logrus.Logger) *PowerLossTest { baseTest := NewBaseTestCase( "power_loss", "测试在断电情况下Open-CAS的数据完整性保护能力", cfg, logger, ) return &PowerLossTest{ BaseTestCase: baseTest, blockSize: utils.KBToBytes(float64(cfg.Test.BlockSize)), totalBlocks: utils.MBToBytes(float64(cfg.Test.DataSizeMB)) / utils.KBToBytes(float64(cfg.Test.BlockSize)), blocksMap: make(map[int]model.BlockStatus), } } // Setup 设置测试环境 func (t *PowerLossTest) Setup(ctx context.Context) error { if err := t.BaseTestCase.Setup(ctx); err != nil { return err } t.setMessage("创建Open-CAS缓存实例") id := t.config.Server.CacheInstanceID nvme := t.config.Server.DevicesNVMe hdd := t.config.Server.DevicesHDD // 创建缓存实例 - 使用Write-Back模式以测试断电恢复 err := t.casManager.CreateCacheInstance(id, nvme, hdd, "wb") if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("创建缓存实例失败: %v", err) } // 获取缓存设备路径 cacheDevice := fmt.Sprintf("/dev/cas%s-1", id) t.setMessage(fmt.Sprintf("格式化缓存设备 %s", cacheDevice)) // 格式化缓存设备 err = t.casManager.FormatDevice(cacheDevice, "ext4") if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("格式化缓存设备失败: %v", err) } // 挂载缓存设备 mountPoint := t.config.Server.MountPoint t.setMessage(fmt.Sprintf("挂载缓存设备到 %s", mountPoint)) err = t.casManager.MountDevice(cacheDevice, mountPoint) if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("挂载缓存设备失败: %v", err) } // 创建测试目录 t.testDir = filepath.Join(mountPoint, "power_loss_test") t.setMessage(fmt.Sprintf("创建测试目录 %s", t.testDir)) err = utils.CreateDirIfNotExist(t.testDir) if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("创建测试目录失败: %v", err) } // 初始化测试数据 t.blocks = make([]*model.TestBlock, 0, t.totalBlocks) t.powerCutInfo = &model.PowerCutInfo{} // 初始化完整性信息 t.integrityInfo = &model.IntegrityInfo{ TestID: t.testID, TestType: t.name, CheckTime: time.Time{}, TotalBlocks: t.totalBlocks, ExpectedBlocks: t.totalBlocks, AvailableBlocks: 0, CorruptedBlocks: 0, MissingBlocks: 0, DataLossMB: 0, RecoverySuccess: false, BlocksMap: make(map[int]model.BlockStatus), } t.setProgress(10) return nil } // Run 运行测试 func (t *PowerLossTest) Run(ctx context.Context) (*model.TestResult, error) { t.setMessage("开始断电测试") startTime := time.Now() var totalBytesWritten int // 第一阶段 - 持续写入数据,直到手动断电 t.setMessage("写入数据 (请在适当时手动断电)") for i := 0; i < t.totalBlocks; i++ { select { case <-ctx.Done(): t.setStatus(StatusAborted) return nil, ctx.Err() default: // 生成随机数据 data, err := utils.GenerateRandomData(utils.KBToBytes(float64(t.blockSize))) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("生成随机数据失败: %v", err) } // 创建测试数据块 block := model.NewTestBlock(data, i) // 添加到数据块列表 t.blocksMu.Lock() t.blocks = append(t.blocks, block) // 记录数据块状态 blockStatus := model.BlockStatus{ Available: true, Corrupted: false, Checksum: block.Checksum, FilePath: fmt.Sprintf("block_%d.dat", i), } t.blocksMap[i] = blockStatus t.blocksMu.Unlock() // 写入文件 filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i)) // direct IO file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|syscall.O_DIRECT, 0644) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err) } defer file.Close() _, err = file.Write(data) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err) } t.writtenBlocks++ totalBytesWritten += len(data) // 每写入一定数量的块后执行同步 if i > 0 && i%10 == 0 { t.setMessage(fmt.Sprintf("同步数据到磁盘 (已写入 %d/%d 块)", i, t.totalBlocks)) _, err := utils.ExecuteCommand("sync") if err != nil { t.logger.Warnf("执行sync命令失败: %v", err) } } // 更新进度 progress := float64(i+1) / float64(t.totalBlocks) * 100 t.setProgress(progress) // 每写入一定数量的块后暂停一下,给用户断电的机会 if i > 0 && i%100 == 0 { t.setMessage(fmt.Sprintf("已写入 %d/%d 块数据, 共 %.2f MB", i, t.totalBlocks, float64(i*t.blockSize)/(1024*1024))) time.Sleep(1 * time.Second) } } } // 记录写入数据的信息 t.powerCutInfo.BlocksWritten = t.writtenBlocks // 完成所有数据写入后,同步到磁盘 t.setMessage("同步所有数据到磁盘") _, err := utils.ExecuteCommand("sync") if err != nil { t.logger.Warnf("执行sync命令失败: %v", err) } t.setProgress(100) t.setStatus(StatusCompleted) t.setMessage("数据写入完成") // 构造测试结果 result := t.getTestResult() result.BlocksWritten = t.writtenBlocks result.BlocksVerified = t.verifiedBlocks result.DataWrittenMB = utils.BytesToMB(totalBytesWritten) result.WriteSpeedMBs = utils.BytesToMB(totalBytesWritten) / time.Since(startTime).Seconds() result.Metrics = model.TestMetrics{ DataIntegrityLoss: t.corruptedBlocks, RecoveryTimeMs: t.recoveryTimeMs, } return result, nil } // CheckIntegrity 检查数据完整性 func (t *PowerLossTest) CheckIntegrity() *model.IntegrityInfo { t.setMessage("开始检查数据完整性") t.integrityInfo.CheckTime = time.Now() // 重置计数器 t.integrityInfo.AvailableBlocks = 0 t.integrityInfo.CorruptedBlocks = 0 t.integrityInfo.MissingBlocks = 0 // 为所有块创建状态记录 for i := 0; i < t.totalBlocks; i++ { filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i)) if !utils.FileExists(filePath) { // 文件不存在 t.integrityInfo.MissingBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: false, Corrupted: false, FilePath: fmt.Sprintf("block_%d.dat", i), } continue } // 读取文件数据 data, err := os.ReadFile(filePath) if err != nil { // 无法读取文件 t.integrityInfo.CorruptedBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: true, Corrupted: true, FilePath: fmt.Sprintf("block_%d.dat", i), } continue } // 计算和验证校验和 hash := sha256.Sum256(data) checksum := hex.EncodeToString(hash[:]) var blockChecksum string t.blocksMu.RLock() if i < len(t.blocks) && t.blocks[i] != nil { blockChecksum = t.blocks[i].Checksum } t.blocksMu.RUnlock() if blockChecksum != "" && checksum != blockChecksum { // 数据损坏 t.integrityInfo.CorruptedBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: true, Corrupted: true, Checksum: checksum, FilePath: fmt.Sprintf("block_%d.dat", i), } } else { // 数据完好 t.integrityInfo.AvailableBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: true, Corrupted: false, Checksum: checksum, FilePath: fmt.Sprintf("block_%d.dat", i), } } } // 计算数据丢失量 t.integrityInfo.DataLossMB = utils.BytesToMB((t.integrityInfo.MissingBlocks + t.integrityInfo.CorruptedBlocks) * t.blockSize) t.integrityInfo.RecoverySuccess = t.integrityInfo.CorruptedBlocks == 0 && t.integrityInfo.MissingBlocks == 0 t.setMessage(fmt.Sprintf("数据完整性检查完成: %d 个块正常, %d 个块丢失, %d 个块损坏", t.integrityInfo.AvailableBlocks, t.integrityInfo.MissingBlocks, t.integrityInfo.CorruptedBlocks)) return t.integrityInfo } // Cleanup 清理测试环境 func (t *PowerLossTest) Cleanup(ctx context.Context) error { if err := t.BaseTestCase.Cleanup(ctx); err != nil { return err } t.setMessage("卸载缓存设备") err := t.casManager.UnmountDevice(t.config.Server.MountPoint) if err != nil { t.logger.Warnf("卸载缓存设备失败: %v", err) } t.setMessage("停止缓存实例") err = t.casManager.StopCacheInstance(t.config.Server.CacheInstanceID) if err != nil { t.logger.Warnf("停止缓存实例失败: %v", err) } return nil }