plp-test/internal/testcase/power_loss-test.go

418 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package testcase
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"plp-test/internal/config"
"plp-test/internal/model"
"plp-test/internal/utils"
"github.com/sirupsen/logrus"
)
// PowerLossTest 断电测试
type PowerLossTest struct {
*BaseTestCase
testDir string
blockSize int
totalBlocks int
writtenBlocks int
verifiedBlocks int
corruptedBlocks int
blocks []*model.TestBlock
recoveryTimeMs float64
powerCutInfo *model.PowerCutInfo
integrityInfo *model.IntegrityInfo
blocksMu sync.RWMutex // 保护数据块访问
blocksMap map[int]model.BlockStatus // 数据块状态映射
}
// NewPowerLossTest 创建断电测试
func NewPowerLossTest(cfg *config.Config, logger *logrus.Logger) *PowerLossTest {
baseTest := NewBaseTestCase(
"power_loss",
"测试在断电情况下Open-CAS的数据完整性保护能力",
cfg,
logger,
)
return &PowerLossTest{
BaseTestCase: baseTest,
blockSize: utils.KBToBytes(float64(cfg.Test.BlockSize)),
totalBlocks: utils.MBToBytes(float64(cfg.Test.DataSizeMB)) / utils.KBToBytes(float64(cfg.Test.BlockSize)),
blocksMap: make(map[int]model.BlockStatus),
}
}
// Setup 设置测试环境
func (t *PowerLossTest) Setup(ctx context.Context, recovery bool) error {
if err := t.BaseTestCase.Setup(ctx); err != nil {
return err
}
t.setMessage("创建Open-CAS缓存实例")
id := t.config.Server.CacheInstanceID
nvme := t.config.Server.DevicesNVMe
hdd := t.config.Server.DevicesHDD
// 创建缓存实例 - 使用Write-Back模式以测试断电恢复
err := t.casManager.CreateCacheInstance(id, nvme, hdd, "wb")
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("创建缓存实例失败: %v", err)
}
// 获取缓存设备路径
cacheDevice := fmt.Sprintf("/dev/cas%s-1", id)
t.setMessage(fmt.Sprintf("格式化缓存设备 %s", cacheDevice))
// 确认挂载点没人挂载
if utils.IsMounted(t.config.Server.MountPoint) {
// 卸载挂载点
err = t.casManager.UnmountDevice(t.config.Server.MountPoint)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("卸载挂载点失败: %v", err)
}
}
if !recovery {
// 格式化缓存设备
err = t.casManager.FormatDevice(cacheDevice, "ext4")
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("格式化缓存设备失败: %v", err)
}
}
// 挂载缓存设备
mountPoint := t.config.Server.MountPoint
t.setMessage(fmt.Sprintf("挂载缓存设备到 %s", mountPoint))
err = t.casManager.MountDevice(cacheDevice, mountPoint)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("挂载缓存设备失败: %v", err)
}
// 创建测试目录
t.testDir = filepath.Join(mountPoint, "power_loss_test")
t.setMessage(fmt.Sprintf("创建测试目录 %s", t.testDir))
err = utils.CreateDirIfNotExist(t.testDir)
if err != nil {
t.setStatus(StatusFailed)
return fmt.Errorf("创建测试目录失败: %v", err)
}
// 初始化测试数据
t.blocks = make([]*model.TestBlock, 0, t.totalBlocks)
t.powerCutInfo = &model.PowerCutInfo{}
// 初始化完整性信息
t.integrityInfo = &model.IntegrityInfo{
TestID: t.testID,
TestType: t.name,
CheckTime: time.Time{},
TotalBlocks: t.totalBlocks,
ExpectedBlocks: t.totalBlocks,
AvailableBlocks: 0,
CorruptedBlocks: 0,
MissingBlocks: 0,
DataLossMB: 0,
RecoverySuccess: false,
BlocksMap: make(map[int]model.BlockStatus),
}
t.setProgress(10)
return nil
}
// Run 运行测试
func (t *PowerLossTest) Run(ctx context.Context) (*model.TestResult, error) {
t.setMessage("开始断电测试")
startTime := time.Now()
var totalBytesWritten int
// 第一阶段 - 在内存中预先生成所有数据块
t.setMessage("在内存中预生成数据块")
// 预先在内存中生成所有数据块
blocksInMemory := make([]*model.TestBlock, t.totalBlocks)
checksumMap := make(map[int]string)
for i := 0; i < t.totalBlocks; i++ {
select {
case <-ctx.Done():
t.setStatus(StatusAborted)
return nil, ctx.Err()
default:
// 生成随机数据
data, err := utils.GenerateRandomData(utils.KBToBytes(float64(t.blockSize)))
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("生成随机数据失败: %v", err)
}
// 创建测试数据块
block := model.NewTestBlock(data, i)
// 存储到内存中
blocksInMemory[i] = block
checksumMap[i] = block.Checksum
// 更新进度
if i > 0 && i%100 == 0 {
progress := float64(i+1) / float64(t.totalBlocks) * 30 // 前30%进度用于生成
t.setProgress(progress)
t.setMessage(fmt.Sprintf("已在内存中生成 %d/%d 个数据块", i, t.totalBlocks))
}
}
}
// 将校验和映射持久化到文件
t.setMessage("持久化校验和映射到文件")
checksumFilePath := filepath.Join(t.testDir, "checksums.json")
checksumData, err := json.Marshal(checksumMap)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("序列化校验和映射失败: %v", err)
}
err = os.WriteFile(checksumFilePath, checksumData, 0644)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("保存校验和映射文件失败: %v", err)
}
// 确保校验和映射已落盘
t.setMessage("同步校验和映射到磁盘")
_, err = utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
t.setProgress(35)
t.setMessage("校验和映射已保存,准备断电测试,开始写入数据块...")
time.Sleep(3 * time.Second) // 给用户一些时间准备
// 第二阶段 - 写入数据块到磁盘
t.setMessage("写入数据块到磁盘 (请在适当时手动断电)")
for i, block := range blocksInMemory {
select {
case <-ctx.Done():
t.setStatus(StatusAborted)
return nil, ctx.Err()
default:
// 添加到数据块列表
t.blocksMu.Lock()
t.blocks = append(t.blocks, block)
// 记录数据块状态
blockStatus := model.BlockStatus{
Available: true,
Corrupted: false,
Checksum: block.Checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
t.blocksMap[i] = blockStatus
t.blocksMu.Unlock()
// 写入文件
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
// direct IO 直接写入磁盘使用跨平台的DirectIOFlag
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|DirectIOFlag, 0644)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
}
defer file.Close()
_, err = file.Write(block.Data)
if err != nil {
t.setStatus(StatusFailed)
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
}
t.writtenBlocks++
totalBytesWritten += len(block.Data)
t.setMessage(fmt.Sprintf("同步数据到磁盘 (已写入 %d/%d 块)", i, t.totalBlocks))
_, err = utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
// 更新进度
progress := 35 + float64(i+1)/float64(t.totalBlocks)*65 // 余下65%进度用于写入
t.setProgress(progress)
// 每写入一定数量的打印输出下当前进度到日志
if i > 0 && i%100 == 0 {
t.setMessage(fmt.Sprintf("已写入 %d/%d 块数据, 共 %.2f MB", i, t.totalBlocks, float64(i*t.blockSize)/(1024*1024)))
}
}
}
// 记录写入数据的信息
t.powerCutInfo.BlocksWritten = t.writtenBlocks
// 完成所有数据写入后,同步到磁盘
t.setMessage("同步所有数据到磁盘")
_, err = utils.ExecuteCommand("sync")
if err != nil {
t.logger.Warnf("执行sync命令失败: %v", err)
}
t.setProgress(100)
t.setStatus(StatusCompleted)
t.setMessage("数据写入完成")
// 构造测试结果
result := t.getTestResult()
result.BlocksWritten = t.writtenBlocks
result.BlocksVerified = t.verifiedBlocks
result.DataWrittenMB = utils.BytesToMB(totalBytesWritten)
result.WriteSpeedMBs = utils.BytesToMB(totalBytesWritten) / time.Since(startTime).Seconds()
result.Metrics = model.TestMetrics{
DataIntegrityLoss: t.corruptedBlocks,
RecoveryTimeMs: t.recoveryTimeMs,
}
return result, nil
}
// CheckIntegrity 检查数据完整性
func (t *PowerLossTest) CheckIntegrity() *model.IntegrityInfo {
t.setMessage("开始检查数据完整性")
t.integrityInfo.CheckTime = time.Now()
// 重置计数器
t.integrityInfo.AvailableBlocks = 0
t.integrityInfo.CorruptedBlocks = 0
t.integrityInfo.MissingBlocks = 0
// 尝试从文件加载校验和映射
checksumMap := make(map[int]string)
checksumFilePath := filepath.Join(t.testDir, "checksums.json")
if utils.FileExists(checksumFilePath) {
t.setMessage("从文件加载校验和映射")
data, err := os.ReadFile(checksumFilePath)
if err == nil {
err = json.Unmarshal(data, &checksumMap)
if err != nil {
t.logger.Warnf("解析校验和映射文件失败: %v将使用内存中的校验和", err)
} else {
t.logger.Infof("从文件成功加载了 %d 个校验和", len(checksumMap))
}
} else {
t.logger.Warnf("读取校验和映射文件失败: %v将使用内存中的校验和", err)
}
} else {
t.logger.Warnf("校验和映射文件不存在,将使用内存中的校验和")
}
// 为所有块创建状态记录
for i := 0; i < t.totalBlocks; i++ {
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
if !utils.FileExists(filePath) {
// 文件不存在
t.integrityInfo.MissingBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: false,
Corrupted: false,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
continue
}
// 读取文件数据
data, err := os.ReadFile(filePath)
if err != nil {
// 无法读取文件
t.integrityInfo.CorruptedBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: true,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
continue
}
// 计算和验证校验和
hash := sha256.Sum256(data)
checksum := hex.EncodeToString(hash[:])
// 获取期望的校验和 - 优先使用文件中的校验和映射
var expectedChecksum string
if storedChecksum, ok := checksumMap[i]; ok {
expectedChecksum = storedChecksum
} else {
// 回退到内存中的校验和
t.blocksMu.RLock()
if i < len(t.blocks) && t.blocks[i] != nil {
expectedChecksum = t.blocks[i].Checksum
}
t.blocksMu.RUnlock()
}
if expectedChecksum != "" && checksum != expectedChecksum {
// 数据损坏
t.integrityInfo.CorruptedBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: true,
Checksum: checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
} else {
// 数据完好
t.integrityInfo.AvailableBlocks++
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
Available: true,
Corrupted: false,
Checksum: checksum,
FilePath: fmt.Sprintf("block_%d.dat", i),
}
}
}
// 计算数据丢失量
t.integrityInfo.DataLossMB = utils.BytesToMB((t.integrityInfo.MissingBlocks + t.integrityInfo.CorruptedBlocks) * t.blockSize)
t.integrityInfo.RecoverySuccess = t.integrityInfo.CorruptedBlocks == 0 && t.integrityInfo.MissingBlocks == 0
t.setMessage(fmt.Sprintf("数据完整性检查完成: %d 个块正常, %d 个块丢失, %d 个块损坏",
t.integrityInfo.AvailableBlocks, t.integrityInfo.MissingBlocks, t.integrityInfo.CorruptedBlocks))
return t.integrityInfo
}
// Cleanup 清理测试环境
func (t *PowerLossTest) Cleanup(ctx context.Context) error {
if err := t.BaseTestCase.Cleanup(ctx); err != nil {
return err
}
t.setMessage("卸载缓存设备")
err := t.casManager.UnmountDevice(t.config.Server.MountPoint)
if err != nil {
t.logger.Warnf("卸载缓存设备失败: %v", err)
}
t.setMessage("停止缓存实例")
err = t.casManager.StopCacheInstance(t.config.Server.CacheInstanceID)
if err != nil {
t.logger.Warnf("停止缓存实例失败: %v", err)
}
return nil
}