plp-test/internal/testcase/power_loss-test.go
2025-04-23 10:33:06 +08:00

332 lines
9.0 KiB
Go

package testcase
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"sync"
"syscall"
"time"
"plp-test/internal/config"
"plp-test/internal/model"
"plp-test/internal/utils"
"github.com/sirupsen/logrus"
)
// PowerLossTest 断电测试
type PowerLossTest struct {
*BaseTestCase
testDir string
blockSize int
totalBlocks int
writtenBlocks int
verifiedBlocks int
corruptedBlocks int
blocks []*model.TestBlock
recoveryTimeMs float64
powerCutInfo *model.PowerCutInfo
integrityInfo *model.IntegrityInfo
blocksMu sync.RWMutex // 保护数据块访问
blocksMap map[int]model.BlockStatus // 数据块状态映射
}
// NewPowerLossTest 创建断电测试
func NewPowerLossTest(cfg *config.Config, logger *logrus.Logger) *PowerLossTest {
baseTest := NewBaseTestCase(
"power_loss",
"测试在断电情况下Open-CAS的数据完整性保护能力",
cfg,
logger,
)
return &PowerLossTest{
BaseTestCase: baseTest,
blockSize: utils.KBToBytes(float64(cfg.Test.BlockSize)),
totalBlocks: utils.MBToBytes(float64(cfg.Test.DataSizeMB)) / utils.KBToBytes(float64(cfg.Test.BlockSize)),
blocksMap: make(map[int]model.BlockStatus),
}
}
// Setup 设置测试环境
func (t *PowerLossTest) Setup(ctx context.Context) error {
if err := t.BaseTestCase.Setup(ctx); err != nil {
return err
}
t.setMessage("创建Open-CAS缓存实例")
id := t.config.Server.CacheInstanceID
nvme := t.config.Server.DevicesNVMe
hdd := t.config.Server.DevicesHDD
// 创建缓存实例 - 使用Write-Back模式以测试断电恢复
err := t.casManager.CreateCacheInstance(id, nvme, hdd, "wb")
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("创建缓存实例失败: %v", err)
}
// 获取缓存设备路径
cacheDevice := fmt.Sprintf("/dev/cas%s-1", id)
t.setMessage(fmt.Sprintf("格式化缓存设备 %s", cacheDevice))
// 格式化缓存设备
err = t.casManager.FormatDevice(cacheDevice, "ext4")
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("格式化缓存设备失败: %v", err)
}
// 挂载缓存设备
mountPoint := t.config.Server.MountPoint
t.setMessage(fmt.Sprintf("挂载缓存设备到 %s", mountPoint))
err = t.casManager.MountDevice(cacheDevice, mountPoint)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("挂载缓存设备失败: %v", err)
}
// 创建测试目录
t.testDir = filepath.Join(mountPoint, "power_loss_test")
t.setMessage(fmt.Sprintf("创建测试目录 %s", t.testDir))
err = utils.CreateDirIfNotExist(t.testDir)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("创建测试目录失败: %v", err)
}
// 初始化测试数据
t.blocks = make([]*model.TestBlock, 0, t.totalBlocks)
t.powerCutInfo = &model.PowerCutInfo{}
// 初始化完整性信息
t.integrityInfo = &model.IntegrityInfo{
TestID: t.testID,
TestType: t.name,
CheckTime: time.Time{},
TotalBlocks: t.totalBlocks,
ExpectedBlocks: t.totalBlocks,
AvailableBlocks: 0,
CorruptedBlocks: 0,
MissingBlocks: 0,
DataLossMB: 0,
RecoverySuccess: false,
BlocksMap: make(map[int]model.BlockStatus),
}
t.setProgress(10)
return nil
}
// Run 运行测试
func (t *PowerLossTest) Run(ctx context.Context) (*model.TestResult, error) {
t.setMessage("开始断电测试")
startTime := time.Now()
var totalBytesWritten int
// 第一阶段 - 持续写入数据,直到手动断电
t.setMessage("写入数据 (请在适当时手动断电)")
for i := 0; i < t.totalBlocks; i++ {
select {
case <-ctx.Done():
t.setStatus(StatusAborted)
return nil, ctx.Err()
default:
// 生成随机数据
data, err := utils.GenerateRandomData(utils.KBToBytes(float64(t.blockSize)))
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("生成随机数据失败: %v", err)
}
// 创建测试数据块
block := model.NewTestBlock(data, i)
// 添加到数据块列表
t.blocksMu.Lock()
t.blocks = append(t.blocks, block)
// 记录数据块状态
blockStatus := model.BlockStatus{
Available: true,
Corrupted: false,
Checksum: block.Checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
t.blocksMap[i] = blockStatus
t.blocksMu.Unlock()
// 写入文件
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
// direct IO
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|syscall.O_DIRECT, 0644)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
}
defer file.Close()
_, err = file.Write(data)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
}
t.writtenBlocks++
totalBytesWritten += len(data)
// 每写入一定数量的块后执行同步
if i > 0 && i%10 == 0 {
t.setMessage(fmt.Sprintf("同步数据到磁盘 (已写入 %d/%d 块)", i, t.totalBlocks))
_, err := utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
}
// 更新进度
progress := float64(i+1) / float64(t.totalBlocks) * 100
t.setProgress(progress)
// 每写入一定数量的块后暂停一下,给用户断电的机会
if i > 0 && i%100 == 0 {
t.setMessage(fmt.Sprintf("已写入 %d/%d 块数据, 共 %.2f MB", i, t.totalBlocks, float64(i*t.blockSize)/(1024*1024)))
time.Sleep(1 * time.Second)
}
}
}
// 记录写入数据的信息
t.powerCutInfo.BlocksWritten = t.writtenBlocks
// 完成所有数据写入后,同步到磁盘
t.setMessage("同步所有数据到磁盘")
_, err := utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
t.setProgress(100)
t.setStatus(StatusCompleted)
t.setMessage("数据写入完成")
// 构造测试结果
result := t.getTestResult()
result.BlocksWritten = t.writtenBlocks
result.BlocksVerified = t.verifiedBlocks
result.DataWrittenMB = utils.BytesToMB(totalBytesWritten)
result.WriteSpeedMBs = utils.BytesToMB(totalBytesWritten) / time.Since(startTime).Seconds()
result.Metrics = model.TestMetrics{
DataIntegrityLoss: t.corruptedBlocks,
RecoveryTimeMs: t.recoveryTimeMs,
}
return result, nil
}
// CheckIntegrity 检查数据完整性
func (t *PowerLossTest) CheckIntegrity() *model.IntegrityInfo {
t.setMessage("开始检查数据完整性")
t.integrityInfo.CheckTime = time.Now()
// 重置计数器
t.integrityInfo.AvailableBlocks = 0
t.integrityInfo.CorruptedBlocks = 0
t.integrityInfo.MissingBlocks = 0
// 为所有块创建状态记录
for i := 0; i < t.totalBlocks; i++ {
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
if !utils.FileExists(filePath) {
// 文件不存在
t.integrityInfo.MissingBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: false,
Corrupted: false,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
continue
}
// 读取文件数据
data, err := os.ReadFile(filePath)
if err != nil {
// 无法读取文件
t.integrityInfo.CorruptedBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: true,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
continue
}
// 计算和验证校验和
hash := sha256.Sum256(data)
checksum := hex.EncodeToString(hash[:])
var blockChecksum string
t.blocksMu.RLock()
if i < len(t.blocks) && t.blocks[i] != nil {
blockChecksum = t.blocks[i].Checksum
}
t.blocksMu.RUnlock()
if blockChecksum != "" && checksum != blockChecksum {
// 数据损坏
t.integrityInfo.CorruptedBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: true,
Checksum: checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
} else {
// 数据完好
t.integrityInfo.AvailableBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: false,
Checksum: checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
}
}
// 计算数据丢失量
t.integrityInfo.DataLossMB = utils.BytesToMB((t.integrityInfo.MissingBlocks + t.integrityInfo.CorruptedBlocks) * t.blockSize)
t.integrityInfo.RecoverySuccess = t.integrityInfo.CorruptedBlocks == 0 && t.integrityInfo.MissingBlocks == 0
t.setMessage(fmt.Sprintf("数据完整性检查完成: %d 个块正常, %d 个块丢失, %d 个块损坏",
t.integrityInfo.AvailableBlocks, t.integrityInfo.MissingBlocks, t.integrityInfo.CorruptedBlocks))
return t.integrityInfo
}
// Cleanup 清理测试环境
func (t *PowerLossTest) Cleanup(ctx context.Context) error {
if err := t.BaseTestCase.Cleanup(ctx); err != nil {
return err
}
t.setMessage("卸载缓存设备")
err := t.casManager.UnmountDevice(t.config.Server.MountPoint)
if err != nil {
t.logger.Warnf("卸载缓存设备失败: %v", err)
}
t.setMessage("停止缓存实例")
err = t.casManager.StopCacheInstance(t.config.Server.CacheInstanceID)
if err != nil {
t.logger.Warnf("停止缓存实例失败: %v", err)
}
return nil
}