plp-test/internal/testcase/power_loss-test.go

341 lines
9.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package testcase
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"sync"
"syscall"
"time"
"plp-test/internal/config"
"plp-test/internal/model"
"plp-test/internal/utils"
"github.com/sirupsen/logrus"
)
// PowerLossTest 断电测试
type PowerLossTest struct {
*BaseTestCase
testDir string
blockSize int
totalBlocks int
writtenBlocks int
verifiedBlocks int
corruptedBlocks int
blocks []*model.TestBlock
recoveryTimeMs float64
powerCutInfo *model.PowerCutInfo
integrityInfo *model.IntegrityInfo
blocksMu sync.RWMutex // 保护数据块访问
blocksMap map[int]model.BlockStatus // 数据块状态映射
}
// NewPowerLossTest 创建断电测试
func NewPowerLossTest(cfg *config.Config, logger *logrus.Logger) *PowerLossTest {
baseTest := NewBaseTestCase(
"power_loss",
"测试在断电情况下Open-CAS的数据完整性保护能力",
cfg,
logger,
)
return &PowerLossTest{
BaseTestCase: baseTest,
blockSize: utils.KBToBytes(float64(cfg.Test.BlockSize)),
totalBlocks: utils.MBToBytes(float64(cfg.Test.DataSizeMB)) / utils.KBToBytes(float64(cfg.Test.BlockSize)),
blocksMap: make(map[int]model.BlockStatus),
}
}
// Setup 设置测试环境
func (t *PowerLossTest) Setup(ctx context.Context, recovery bool) error {
if err := t.BaseTestCase.Setup(ctx); err != nil {
return err
}
t.setMessage("创建Open-CAS缓存实例")
id := t.config.Server.CacheInstanceID
nvme := t.config.Server.DevicesNVMe
hdd := t.config.Server.DevicesHDD
// 创建缓存实例 - 使用Write-Back模式以测试断电恢复
err := t.casManager.CreateCacheInstance(id, nvme, hdd, "wb")
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("创建缓存实例失败: %v", err)
}
// 获取缓存设备路径
cacheDevice := fmt.Sprintf("/dev/cas%s-1", id)
t.setMessage(fmt.Sprintf("格式化缓存设备 %s", cacheDevice))
// 确认挂载点没人挂载
if utils.IsMounted(t.config.Server.MountPoint) {
// 卸载挂载点
err = t.casManager.UnmountDevice(t.config.Server.MountPoint)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("卸载挂载点失败: %v", err)
}
}
if !recovery {
// 格式化缓存设备
err = t.casManager.FormatDevice(cacheDevice, "ext4")
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("格式化缓存设备失败: %v", err)
}
}
// 挂载缓存设备
mountPoint := t.config.Server.MountPoint
t.setMessage(fmt.Sprintf("挂载缓存设备到 %s", mountPoint))
err = t.casManager.MountDevice(cacheDevice, mountPoint)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("挂载缓存设备失败: %v", err)
}
// 创建测试目录
t.testDir = filepath.Join(mountPoint, "power_loss_test")
t.setMessage(fmt.Sprintf("创建测试目录 %s", t.testDir))
err = utils.CreateDirIfNotExist(t.testDir)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("创建测试目录失败: %v", err)
}
// 初始化测试数据
t.blocks = make([]*model.TestBlock, 0, t.totalBlocks)
t.powerCutInfo = &model.PowerCutInfo{}
// 初始化完整性信息
t.integrityInfo = &model.IntegrityInfo{
TestID: t.testID,
TestType: t.name,
CheckTime: time.Time{},
TotalBlocks: t.totalBlocks,
ExpectedBlocks: t.totalBlocks,
AvailableBlocks: 0,
CorruptedBlocks: 0,
MissingBlocks: 0,
DataLossMB: 0,
RecoverySuccess: false,
BlocksMap: make(map[int]model.BlockStatus),
}
t.setProgress(10)
return nil
}
// Run 运行测试
func (t *PowerLossTest) Run(ctx context.Context) (*model.TestResult, error) {
t.setMessage("开始断电测试")
startTime := time.Now()
var totalBytesWritten int
// 第一阶段 - 持续写入数据,直到手动断电
t.setMessage("写入数据 (请在适当时手动断电)")
for i := 0; i < t.totalBlocks; i++ {
select {
case <-ctx.Done():
t.setStatus(StatusAborted)
return nil, ctx.Err()
default:
// 生成随机数据
data, err := utils.GenerateRandomData(utils.KBToBytes(float64(t.blockSize)))
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("生成随机数据失败: %v", err)
}
// 创建测试数据块
block := model.NewTestBlock(data, i)
// 添加到数据块列表
t.blocksMu.Lock()
t.blocks = append(t.blocks, block)
// 记录数据块状态
blockStatus := model.BlockStatus{
Available: true,
Corrupted: false,
Checksum: block.Checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
t.blocksMap[i] = blockStatus
t.blocksMu.Unlock()
// 写入文件
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
// direct IO 直接写入磁盘必须使用syscall.O_DIRECTLinux 2.6.29 及以上版本支持
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|syscall.O_DIRECT, 0644)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
}
defer file.Close()
_, err = file.Write(data)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
}
t.writtenBlocks++
totalBytesWritten += len(data)
t.setMessage(fmt.Sprintf("同步数据到磁盘 (已写入 %d/%d 块)", i, t.totalBlocks))
_, err = utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
// 更新进度
progress := float64(i+1) / float64(t.totalBlocks) * 100
t.setProgress(progress)
// 每写入一定数量的打印输出下当前进度到日志
if i > 0 && i%100 == 0 {
t.setMessage(fmt.Sprintf("已写入 %d/%d 块数据, 共 %.2f MB", i, t.totalBlocks, float64(i*t.blockSize)/(1024*1024)))
// time.Sleep(1 * time.Second)
}
}
}
// 记录写入数据的信息
t.powerCutInfo.BlocksWritten = t.writtenBlocks
// 完成所有数据写入后,同步到磁盘
t.setMessage("同步所有数据到磁盘")
_, err := utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
t.setProgress(100)
t.setStatus(StatusCompleted)
t.setMessage("数据写入完成")
// 构造测试结果
result := t.getTestResult()
result.BlocksWritten = t.writtenBlocks
result.BlocksVerified = t.verifiedBlocks
result.DataWrittenMB = utils.BytesToMB(totalBytesWritten)
result.WriteSpeedMBs = utils.BytesToMB(totalBytesWritten) / time.Since(startTime).Seconds()
result.Metrics = model.TestMetrics{
DataIntegrityLoss: t.corruptedBlocks,
RecoveryTimeMs: t.recoveryTimeMs,
}
return result, nil
}
// CheckIntegrity 检查数据完整性
func (t *PowerLossTest) CheckIntegrity() *model.IntegrityInfo {
t.setMessage("开始检查数据完整性")
t.integrityInfo.CheckTime = time.Now()
// 重置计数器
t.integrityInfo.AvailableBlocks = 0
t.integrityInfo.CorruptedBlocks = 0
t.integrityInfo.MissingBlocks = 0
// 为所有块创建状态记录
for i := 0; i < t.totalBlocks; i++ {
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
if !utils.FileExists(filePath) {
// 文件不存在
t.integrityInfo.MissingBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: false,
Corrupted: false,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
continue
}
// 读取文件数据
data, err := os.ReadFile(filePath)
if err != nil {
// 无法读取文件
t.integrityInfo.CorruptedBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: true,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
continue
}
// 计算和验证校验和
hash := sha256.Sum256(data)
checksum := hex.EncodeToString(hash[:])
var blockChecksum string
t.blocksMu.RLock()
if i < len(t.blocks) && t.blocks[i] != nil {
blockChecksum = t.blocks[i].Checksum
}
t.blocksMu.RUnlock()
if blockChecksum != "" && checksum != blockChecksum {
// 数据损坏
t.integrityInfo.CorruptedBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: true,
Checksum: checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
} else {
// 数据完好
t.integrityInfo.AvailableBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: false,
Checksum: checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
}
}
// 计算数据丢失量
t.integrityInfo.DataLossMB = utils.BytesToMB((t.integrityInfo.MissingBlocks + t.integrityInfo.CorruptedBlocks) * t.blockSize)
t.integrityInfo.RecoverySuccess = t.integrityInfo.CorruptedBlocks == 0 && t.integrityInfo.MissingBlocks == 0
t.setMessage(fmt.Sprintf("数据完整性检查完成: %d 个块正常, %d 个块丢失, %d 个块损坏",
t.integrityInfo.AvailableBlocks, t.integrityInfo.MissingBlocks, t.integrityInfo.CorruptedBlocks))
return t.integrityInfo
}
// Cleanup 清理测试环境
func (t *PowerLossTest) Cleanup(ctx context.Context) error {
if err := t.BaseTestCase.Cleanup(ctx); err != nil {
return err
}
t.setMessage("卸载缓存设备")
err := t.casManager.UnmountDevice(t.config.Server.MountPoint)
if err != nil {
t.logger.Warnf("卸载缓存设备失败: %v", err)
}
t.setMessage("停止缓存实例")
err = t.casManager.StopCacheInstance(t.config.Server.CacheInstanceID)
if err != nil {
t.logger.Warnf("停止缓存实例失败: %v", err)
}
return nil
}