package testcase import ( "context" "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "os" "path/filepath" "sync" "time" "plp-test/internal/config" "plp-test/internal/model" "plp-test/internal/utils" "github.com/sirupsen/logrus" ) // PowerLossTest 断电测试 type PowerLossTest struct { *BaseTestCase testDir string blockSize int totalBlocks int writtenBlocks int verifiedBlocks int corruptedBlocks int blocks []*model.TestBlock recoveryTimeMs float64 powerCutInfo *model.PowerCutInfo integrityInfo *model.IntegrityInfo blocksMu sync.RWMutex // 保护数据块访问 blocksMap map[int]model.BlockStatus // 数据块状态映射 } // NewPowerLossTest 创建断电测试 func NewPowerLossTest(cfg *config.Config, logger *logrus.Logger) *PowerLossTest { baseTest := NewBaseTestCase( "power_loss", "测试在断电情况下Open-CAS的数据完整性保护能力", cfg, logger, ) return &PowerLossTest{ BaseTestCase: baseTest, blockSize: utils.KBToBytes(float64(cfg.Test.BlockSize)), totalBlocks: utils.MBToBytes(float64(cfg.Test.DataSizeMB)) / utils.KBToBytes(float64(cfg.Test.BlockSize)), blocksMap: make(map[int]model.BlockStatus), } } // Setup 设置测试环境 func (t *PowerLossTest) Setup(ctx context.Context, recovery bool) error { if err := t.BaseTestCase.Setup(ctx); err != nil { return err } t.setMessage("创建Open-CAS缓存实例") id := t.config.Server.CacheInstanceID nvme := t.config.Server.DevicesNVMe hdd := t.config.Server.DevicesHDD // 创建缓存实例 - 使用Write-Back模式以测试断电恢复 err := t.casManager.CreateCacheInstance(id, nvme, hdd, "wb") if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("创建缓存实例失败: %v", err) } // 获取缓存设备路径 cacheDevice := fmt.Sprintf("/dev/cas%s-1", id) t.setMessage(fmt.Sprintf("格式化缓存设备 %s", cacheDevice)) // 确认挂载点没人挂载 if utils.IsMounted(t.config.Server.MountPoint) { // 卸载挂载点 err = t.casManager.UnmountDevice(t.config.Server.MountPoint) if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("卸载挂载点失败: %v", err) } } if !recovery { // 格式化缓存设备 err = t.casManager.FormatDevice(cacheDevice, "ext4") if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("格式化缓存设备失败: %v", err) } } // 挂载缓存设备 mountPoint := t.config.Server.MountPoint t.setMessage(fmt.Sprintf("挂载缓存设备到 %s", mountPoint)) err = t.casManager.MountDevice(cacheDevice, mountPoint) if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("挂载缓存设备失败: %v", err) } // 创建测试目录 t.testDir = filepath.Join(mountPoint, "power_loss_test") t.setMessage(fmt.Sprintf("创建测试目录 %s", t.testDir)) err = utils.CreateDirIfNotExist(t.testDir) if err != nil { t.setStatus(StatusFailed) return fmt.Errorf("创建测试目录失败: %v", err) } // 初始化测试数据 t.blocks = make([]*model.TestBlock, 0, t.totalBlocks) t.powerCutInfo = &model.PowerCutInfo{} // 初始化完整性信息 t.integrityInfo = &model.IntegrityInfo{ TestID: t.testID, TestType: t.name, CheckTime: time.Time{}, TotalBlocks: t.totalBlocks, ExpectedBlocks: t.totalBlocks, AvailableBlocks: 0, CorruptedBlocks: 0, MissingBlocks: 0, DataLossMB: 0, RecoverySuccess: false, BlocksMap: make(map[int]model.BlockStatus), } t.setProgress(10) return nil } // Run 运行测试 func (t *PowerLossTest) Run(ctx context.Context) (*model.TestResult, error) { t.setMessage("开始断电测试") startTime := time.Now() var totalBytesWritten int // 第一阶段 - 在内存中预先生成所有数据块 t.setMessage("在内存中预生成数据块") // 预先在内存中生成所有数据块 blocksInMemory := make([]*model.TestBlock, t.totalBlocks) checksumMap := make(map[int]string) for i := 0; i < t.totalBlocks; i++ { select { case <-ctx.Done(): t.setStatus(StatusAborted) return nil, ctx.Err() default: // 生成随机数据 data, err := utils.GenerateRandomData(utils.KBToBytes(float64(t.blockSize))) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("生成随机数据失败: %v", err) } // 创建测试数据块 block := model.NewTestBlock(data, i) // 存储到内存中 blocksInMemory[i] = block checksumMap[i] = block.Checksum // 更新进度 if i > 0 && i%100 == 0 { progress := float64(i+1) / float64(t.totalBlocks) * 30 // 前30%进度用于生成 t.setProgress(progress) t.setMessage(fmt.Sprintf("已在内存中生成 %d/%d 个数据块", i, t.totalBlocks)) } } } // 将校验和映射持久化到文件 t.setMessage("持久化校验和映射到文件") checksumFilePath := filepath.Join(t.testDir, "checksums.json") checksumData, err := json.Marshal(checksumMap) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("序列化校验和映射失败: %v", err) } err = os.WriteFile(checksumFilePath, checksumData, 0644) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("保存校验和映射文件失败: %v", err) } // 确保校验和映射已落盘 t.setMessage("同步校验和映射到磁盘") _, err = utils.ExecuteCommand("sync") if err != nil { t.logger.Warnf("执行sync命令失败: %v", err) } t.setProgress(35) t.setMessage("校验和映射已保存,准备断电测试,开始写入数据块...") time.Sleep(3 * time.Second) // 给用户一些时间准备 // 第二阶段 - 写入数据块到磁盘 t.setMessage("写入数据块到磁盘 (请在适当时手动断电)") for i, block := range blocksInMemory { select { case <-ctx.Done(): t.setStatus(StatusAborted) return nil, ctx.Err() default: // 添加到数据块列表 t.blocksMu.Lock() t.blocks = append(t.blocks, block) // 记录数据块状态 blockStatus := model.BlockStatus{ Available: true, Corrupted: false, Checksum: block.Checksum, FilePath: fmt.Sprintf("block_%d.dat", i), } t.blocksMap[i] = blockStatus t.blocksMu.Unlock() // 写入文件 filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i)) // direct IO 直接写入磁盘,使用跨平台的DirectIOFlag file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|DirectIOFlag, 0644) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err) } defer file.Close() _, err = file.Write(block.Data) if err != nil { t.setStatus(StatusFailed) return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err) } t.writtenBlocks++ totalBytesWritten += len(block.Data) t.setMessage(fmt.Sprintf("同步数据到磁盘 (已写入 %d/%d 块)", i, t.totalBlocks)) _, err = utils.ExecuteCommand("sync") if err != nil { t.logger.Warnf("执行sync命令失败: %v", err) } // 更新进度 progress := 35 + float64(i+1)/float64(t.totalBlocks)*65 // 余下65%进度用于写入 t.setProgress(progress) // 每写入一定数量的打印输出下当前进度到日志 if i > 0 && i%100 == 0 { t.setMessage(fmt.Sprintf("已写入 %d/%d 块数据, 共 %.2f MB", i, t.totalBlocks, float64(i*t.blockSize)/(1024*1024))) } } } // 记录写入数据的信息 t.powerCutInfo.BlocksWritten = t.writtenBlocks // 完成所有数据写入后,同步到磁盘 t.setMessage("同步所有数据到磁盘") _, err = utils.ExecuteCommand("sync") if err != nil { t.logger.Warnf("执行sync命令失败: %v", err) } t.setProgress(100) t.setStatus(StatusCompleted) t.setMessage("数据写入完成") // 构造测试结果 result := t.getTestResult() result.BlocksWritten = t.writtenBlocks result.BlocksVerified = t.verifiedBlocks result.DataWrittenMB = utils.BytesToMB(totalBytesWritten) result.WriteSpeedMBs = utils.BytesToMB(totalBytesWritten) / time.Since(startTime).Seconds() result.Metrics = model.TestMetrics{ DataIntegrityLoss: t.corruptedBlocks, RecoveryTimeMs: t.recoveryTimeMs, } return result, nil } // CheckIntegrity 检查数据完整性 func (t *PowerLossTest) CheckIntegrity() *model.IntegrityInfo { t.setMessage("开始检查数据完整性") t.integrityInfo.CheckTime = time.Now() // 重置计数器 t.integrityInfo.AvailableBlocks = 0 t.integrityInfo.CorruptedBlocks = 0 t.integrityInfo.MissingBlocks = 0 // 尝试从文件加载校验和映射 checksumMap := make(map[int]string) checksumFilePath := filepath.Join(t.testDir, "checksums.json") if utils.FileExists(checksumFilePath) { t.setMessage("从文件加载校验和映射") data, err := os.ReadFile(checksumFilePath) if err == nil { err = json.Unmarshal(data, &checksumMap) if err != nil { t.logger.Warnf("解析校验和映射文件失败: %v,将使用内存中的校验和", err) } else { t.logger.Infof("从文件成功加载了 %d 个校验和", len(checksumMap)) } } else { t.logger.Warnf("读取校验和映射文件失败: %v,将使用内存中的校验和", err) } } else { t.logger.Warnf("校验和映射文件不存在,将使用内存中的校验和") } // 为所有块创建状态记录 for i := 0; i < t.totalBlocks; i++ { filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i)) if !utils.FileExists(filePath) { // 文件不存在 t.integrityInfo.MissingBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: false, Corrupted: false, FilePath: fmt.Sprintf("block_%d.dat", i), } continue } // 读取文件数据 data, err := os.ReadFile(filePath) if err != nil { // 无法读取文件 t.integrityInfo.CorruptedBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: true, Corrupted: true, FilePath: fmt.Sprintf("block_%d.dat", i), } continue } // 计算和验证校验和 hash := sha256.Sum256(data) checksum := hex.EncodeToString(hash[:]) // 获取期望的校验和 - 优先使用文件中的校验和映射 var expectedChecksum string if storedChecksum, ok := checksumMap[i]; ok { expectedChecksum = storedChecksum } else { // 回退到内存中的校验和 t.blocksMu.RLock() if i < len(t.blocks) && t.blocks[i] != nil { expectedChecksum = t.blocks[i].Checksum } t.blocksMu.RUnlock() } if expectedChecksum != "" && checksum != expectedChecksum { // 数据损坏 t.integrityInfo.CorruptedBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: true, Corrupted: true, Checksum: checksum, FilePath: fmt.Sprintf("block_%d.dat", i), } } else { // 数据完好 t.integrityInfo.AvailableBlocks++ t.integrityInfo.BlocksMap[i] = model.BlockStatus{ Available: true, Corrupted: false, Checksum: checksum, FilePath: fmt.Sprintf("block_%d.dat", i), } } } // 计算数据丢失量 t.integrityInfo.DataLossMB = utils.BytesToMB((t.integrityInfo.MissingBlocks + t.integrityInfo.CorruptedBlocks) * t.blockSize) t.integrityInfo.RecoverySuccess = t.integrityInfo.CorruptedBlocks == 0 && t.integrityInfo.MissingBlocks == 0 t.setMessage(fmt.Sprintf("数据完整性检查完成: %d 个块正常, %d 个块丢失, %d 个块损坏", t.integrityInfo.AvailableBlocks, t.integrityInfo.MissingBlocks, t.integrityInfo.CorruptedBlocks)) return t.integrityInfo } // Cleanup 清理测试环境 func (t *PowerLossTest) Cleanup(ctx context.Context) error { if err := t.BaseTestCase.Cleanup(ctx); err != nil { return err } t.setMessage("卸载缓存设备") err := t.casManager.UnmountDevice(t.config.Server.MountPoint) if err != nil { t.logger.Warnf("卸载缓存设备失败: %v", err) } t.setMessage("停止缓存实例") err = t.casManager.StopCacheInstance(t.config.Server.CacheInstanceID) if err != nil { t.logger.Warnf("停止缓存实例失败: %v", err) } return nil }