418 lines
12 KiB
Go
418 lines
12 KiB
Go
package testcase
|
||
|
||
import (
|
||
"context"
|
||
"crypto/sha256"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"fmt"
|
||
"os"
|
||
"path/filepath"
|
||
"sync"
|
||
"time"
|
||
|
||
"plp-test/internal/config"
|
||
"plp-test/internal/model"
|
||
"plp-test/internal/utils"
|
||
|
||
"github.com/sirupsen/logrus"
|
||
)
|
||
|
||
// PowerLossTest 断电测试
|
||
type PowerLossTest struct {
|
||
*BaseTestCase
|
||
testDir string
|
||
blockSize int
|
||
totalBlocks int
|
||
writtenBlocks int
|
||
verifiedBlocks int
|
||
corruptedBlocks int
|
||
blocks []*model.TestBlock
|
||
recoveryTimeMs float64
|
||
powerCutInfo *model.PowerCutInfo
|
||
integrityInfo *model.IntegrityInfo
|
||
blocksMu sync.RWMutex // 保护数据块访问
|
||
blocksMap map[int]model.BlockStatus // 数据块状态映射
|
||
}
|
||
|
||
// NewPowerLossTest 创建断电测试
|
||
func NewPowerLossTest(cfg *config.Config, logger *logrus.Logger) *PowerLossTest {
|
||
baseTest := NewBaseTestCase(
|
||
"power_loss",
|
||
"测试在断电情况下Open-CAS的数据完整性保护能力",
|
||
cfg,
|
||
logger,
|
||
)
|
||
|
||
return &PowerLossTest{
|
||
BaseTestCase: baseTest,
|
||
blockSize: utils.KBToBytes(float64(cfg.Test.BlockSize)),
|
||
totalBlocks: utils.MBToBytes(float64(cfg.Test.DataSizeMB)) / utils.KBToBytes(float64(cfg.Test.BlockSize)),
|
||
blocksMap: make(map[int]model.BlockStatus),
|
||
}
|
||
}
|
||
|
||
// Setup 设置测试环境
|
||
func (t *PowerLossTest) Setup(ctx context.Context, recovery bool) error {
|
||
if err := t.BaseTestCase.Setup(ctx); err != nil {
|
||
return err
|
||
}
|
||
|
||
t.setMessage("创建Open-CAS缓存实例")
|
||
id := t.config.Server.CacheInstanceID
|
||
nvme := t.config.Server.DevicesNVMe
|
||
hdd := t.config.Server.DevicesHDD
|
||
|
||
// 创建缓存实例 - 使用Write-Back模式以测试断电恢复
|
||
err := t.casManager.CreateCacheInstance(id, nvme, hdd, "wb")
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return fmt.Errorf("创建缓存实例失败: %v", err)
|
||
}
|
||
|
||
// 获取缓存设备路径
|
||
cacheDevice := fmt.Sprintf("/dev/cas%s-1", id)
|
||
t.setMessage(fmt.Sprintf("格式化缓存设备 %s", cacheDevice))
|
||
|
||
// 确认挂载点没人挂载
|
||
if utils.IsMounted(t.config.Server.MountPoint) {
|
||
// 卸载挂载点
|
||
err = t.casManager.UnmountDevice(t.config.Server.MountPoint)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return fmt.Errorf("卸载挂载点失败: %v", err)
|
||
}
|
||
}
|
||
|
||
if !recovery {
|
||
// 格式化缓存设备
|
||
err = t.casManager.FormatDevice(cacheDevice, "ext4")
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return fmt.Errorf("格式化缓存设备失败: %v", err)
|
||
}
|
||
}
|
||
|
||
// 挂载缓存设备
|
||
mountPoint := t.config.Server.MountPoint
|
||
t.setMessage(fmt.Sprintf("挂载缓存设备到 %s", mountPoint))
|
||
err = t.casManager.MountDevice(cacheDevice, mountPoint)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return fmt.Errorf("挂载缓存设备失败: %v", err)
|
||
}
|
||
|
||
// 创建测试目录
|
||
t.testDir = filepath.Join(mountPoint, "power_loss_test")
|
||
t.setMessage(fmt.Sprintf("创建测试目录 %s", t.testDir))
|
||
err = utils.CreateDirIfNotExist(t.testDir)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return fmt.Errorf("创建测试目录失败: %v", err)
|
||
}
|
||
|
||
// 初始化测试数据
|
||
t.blocks = make([]*model.TestBlock, 0, t.totalBlocks)
|
||
t.powerCutInfo = &model.PowerCutInfo{}
|
||
|
||
// 初始化完整性信息
|
||
t.integrityInfo = &model.IntegrityInfo{
|
||
TestID: t.testID,
|
||
TestType: t.name,
|
||
CheckTime: time.Time{},
|
||
TotalBlocks: t.totalBlocks,
|
||
ExpectedBlocks: t.totalBlocks,
|
||
AvailableBlocks: 0,
|
||
CorruptedBlocks: 0,
|
||
MissingBlocks: 0,
|
||
DataLossMB: 0,
|
||
RecoverySuccess: false,
|
||
BlocksMap: make(map[int]model.BlockStatus),
|
||
}
|
||
|
||
t.setProgress(10)
|
||
return nil
|
||
}
|
||
|
||
// Run 运行测试
|
||
func (t *PowerLossTest) Run(ctx context.Context) (*model.TestResult, error) {
|
||
t.setMessage("开始断电测试")
|
||
startTime := time.Now()
|
||
var totalBytesWritten int
|
||
|
||
// 第一阶段 - 在内存中预先生成所有数据块
|
||
t.setMessage("在内存中预生成数据块")
|
||
|
||
// 预先在内存中生成所有数据块
|
||
blocksInMemory := make([]*model.TestBlock, t.totalBlocks)
|
||
checksumMap := make(map[int]string)
|
||
|
||
for i := 0; i < t.totalBlocks; i++ {
|
||
select {
|
||
case <-ctx.Done():
|
||
t.setStatus(StatusAborted)
|
||
return nil, ctx.Err()
|
||
default:
|
||
// 生成随机数据
|
||
data, err := utils.GenerateRandomData(utils.KBToBytes(float64(t.blockSize)))
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return nil, fmt.Errorf("生成随机数据失败: %v", err)
|
||
}
|
||
|
||
// 创建测试数据块
|
||
block := model.NewTestBlock(data, i)
|
||
|
||
// 存储到内存中
|
||
blocksInMemory[i] = block
|
||
checksumMap[i] = block.Checksum
|
||
|
||
// 更新进度
|
||
if i > 0 && i%100 == 0 {
|
||
progress := float64(i+1) / float64(t.totalBlocks) * 30 // 前30%进度用于生成
|
||
t.setProgress(progress)
|
||
t.setMessage(fmt.Sprintf("已在内存中生成 %d/%d 个数据块", i, t.totalBlocks))
|
||
}
|
||
}
|
||
}
|
||
|
||
// 将校验和映射持久化到文件
|
||
t.setMessage("持久化校验和映射到文件")
|
||
checksumFilePath := filepath.Join(t.testDir, "checksums.json")
|
||
checksumData, err := json.Marshal(checksumMap)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return nil, fmt.Errorf("序列化校验和映射失败: %v", err)
|
||
}
|
||
|
||
err = os.WriteFile(checksumFilePath, checksumData, 0644)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return nil, fmt.Errorf("保存校验和映射文件失败: %v", err)
|
||
}
|
||
|
||
// 确保校验和映射已落盘
|
||
t.setMessage("同步校验和映射到磁盘")
|
||
_, err = utils.ExecuteCommand("sync")
|
||
if err != nil {
|
||
t.logger.Warnf("执行sync命令失败: %v", err)
|
||
}
|
||
|
||
t.setProgress(35)
|
||
t.setMessage("校验和映射已保存,准备断电测试,开始写入数据块...")
|
||
time.Sleep(3 * time.Second) // 给用户一些时间准备
|
||
|
||
// 第二阶段 - 写入数据块到磁盘
|
||
t.setMessage("写入数据块到磁盘 (请在适当时手动断电)")
|
||
|
||
for i, block := range blocksInMemory {
|
||
select {
|
||
case <-ctx.Done():
|
||
t.setStatus(StatusAborted)
|
||
return nil, ctx.Err()
|
||
default:
|
||
// 添加到数据块列表
|
||
t.blocksMu.Lock()
|
||
t.blocks = append(t.blocks, block)
|
||
|
||
// 记录数据块状态
|
||
blockStatus := model.BlockStatus{
|
||
Available: true,
|
||
Corrupted: false,
|
||
Checksum: block.Checksum,
|
||
FilePath: fmt.Sprintf("block_%d.dat", i),
|
||
}
|
||
t.blocksMap[i] = blockStatus
|
||
t.blocksMu.Unlock()
|
||
|
||
// 写入文件
|
||
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
|
||
// direct IO 直接写入磁盘,使用跨平台的DirectIOFlag
|
||
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|DirectIOFlag, 0644)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
|
||
}
|
||
defer file.Close()
|
||
|
||
_, err = file.Write(block.Data)
|
||
if err != nil {
|
||
t.setStatus(StatusFailed)
|
||
return nil, fmt.Errorf("写入文件 %s 失败: %v", filePath, err)
|
||
}
|
||
|
||
t.writtenBlocks++
|
||
totalBytesWritten += len(block.Data)
|
||
|
||
t.setMessage(fmt.Sprintf("同步数据到磁盘 (已写入 %d/%d 块)", i, t.totalBlocks))
|
||
_, err = utils.ExecuteCommand("sync")
|
||
if err != nil {
|
||
t.logger.Warnf("执行sync命令失败: %v", err)
|
||
}
|
||
|
||
// 更新进度
|
||
progress := 35 + float64(i+1)/float64(t.totalBlocks)*65 // 余下65%进度用于写入
|
||
t.setProgress(progress)
|
||
|
||
// 每写入一定数量的打印输出下当前进度到日志
|
||
if i > 0 && i%100 == 0 {
|
||
t.setMessage(fmt.Sprintf("已写入 %d/%d 块数据, 共 %.2f MB", i, t.totalBlocks, float64(i*t.blockSize)/(1024*1024)))
|
||
}
|
||
}
|
||
}
|
||
|
||
// 记录写入数据的信息
|
||
t.powerCutInfo.BlocksWritten = t.writtenBlocks
|
||
|
||
// 完成所有数据写入后,同步到磁盘
|
||
t.setMessage("同步所有数据到磁盘")
|
||
_, err = utils.ExecuteCommand("sync")
|
||
if err != nil {
|
||
t.logger.Warnf("执行sync命令失败: %v", err)
|
||
}
|
||
|
||
t.setProgress(100)
|
||
t.setStatus(StatusCompleted)
|
||
t.setMessage("数据写入完成")
|
||
|
||
// 构造测试结果
|
||
result := t.getTestResult()
|
||
result.BlocksWritten = t.writtenBlocks
|
||
result.BlocksVerified = t.verifiedBlocks
|
||
result.DataWrittenMB = utils.BytesToMB(totalBytesWritten)
|
||
result.WriteSpeedMBs = utils.BytesToMB(totalBytesWritten) / time.Since(startTime).Seconds()
|
||
result.Metrics = model.TestMetrics{
|
||
DataIntegrityLoss: t.corruptedBlocks,
|
||
RecoveryTimeMs: t.recoveryTimeMs,
|
||
}
|
||
|
||
return result, nil
|
||
}
|
||
|
||
// CheckIntegrity 检查数据完整性
|
||
func (t *PowerLossTest) CheckIntegrity() *model.IntegrityInfo {
|
||
t.setMessage("开始检查数据完整性")
|
||
t.integrityInfo.CheckTime = time.Now()
|
||
|
||
// 重置计数器
|
||
t.integrityInfo.AvailableBlocks = 0
|
||
t.integrityInfo.CorruptedBlocks = 0
|
||
t.integrityInfo.MissingBlocks = 0
|
||
|
||
// 尝试从文件加载校验和映射
|
||
checksumMap := make(map[int]string)
|
||
checksumFilePath := filepath.Join(t.testDir, "checksums.json")
|
||
if utils.FileExists(checksumFilePath) {
|
||
t.setMessage("从文件加载校验和映射")
|
||
data, err := os.ReadFile(checksumFilePath)
|
||
if err == nil {
|
||
err = json.Unmarshal(data, &checksumMap)
|
||
if err != nil {
|
||
t.logger.Warnf("解析校验和映射文件失败: %v,将使用内存中的校验和", err)
|
||
} else {
|
||
t.logger.Infof("从文件成功加载了 %d 个校验和", len(checksumMap))
|
||
}
|
||
} else {
|
||
t.logger.Warnf("读取校验和映射文件失败: %v,将使用内存中的校验和", err)
|
||
}
|
||
} else {
|
||
t.logger.Warnf("校验和映射文件不存在,将使用内存中的校验和")
|
||
}
|
||
|
||
// 为所有块创建状态记录
|
||
for i := 0; i < t.totalBlocks; i++ {
|
||
filePath := filepath.Join(t.testDir, fmt.Sprintf("block_%d.dat", i))
|
||
|
||
if !utils.FileExists(filePath) {
|
||
// 文件不存在
|
||
t.integrityInfo.MissingBlocks++
|
||
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
|
||
Available: false,
|
||
Corrupted: false,
|
||
FilePath: fmt.Sprintf("block_%d.dat", i),
|
||
}
|
||
continue
|
||
}
|
||
|
||
// 读取文件数据
|
||
data, err := os.ReadFile(filePath)
|
||
if err != nil {
|
||
// 无法读取文件
|
||
t.integrityInfo.CorruptedBlocks++
|
||
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
|
||
Available: true,
|
||
Corrupted: true,
|
||
FilePath: fmt.Sprintf("block_%d.dat", i),
|
||
}
|
||
continue
|
||
}
|
||
|
||
// 计算和验证校验和
|
||
hash := sha256.Sum256(data)
|
||
checksum := hex.EncodeToString(hash[:])
|
||
|
||
// 获取期望的校验和 - 优先使用文件中的校验和映射
|
||
var expectedChecksum string
|
||
if storedChecksum, ok := checksumMap[i]; ok {
|
||
expectedChecksum = storedChecksum
|
||
} else {
|
||
// 回退到内存中的校验和
|
||
t.blocksMu.RLock()
|
||
if i < len(t.blocks) && t.blocks[i] != nil {
|
||
expectedChecksum = t.blocks[i].Checksum
|
||
}
|
||
t.blocksMu.RUnlock()
|
||
}
|
||
|
||
if expectedChecksum != "" && checksum != expectedChecksum {
|
||
// 数据损坏
|
||
t.integrityInfo.CorruptedBlocks++
|
||
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
|
||
Available: true,
|
||
Corrupted: true,
|
||
Checksum: checksum,
|
||
FilePath: fmt.Sprintf("block_%d.dat", i),
|
||
}
|
||
} else {
|
||
// 数据完好
|
||
t.integrityInfo.AvailableBlocks++
|
||
t.integrityInfo.BlocksMap[i] = model.BlockStatus{
|
||
Available: true,
|
||
Corrupted: false,
|
||
Checksum: checksum,
|
||
FilePath: fmt.Sprintf("block_%d.dat", i),
|
||
}
|
||
}
|
||
}
|
||
|
||
// 计算数据丢失量
|
||
t.integrityInfo.DataLossMB = utils.BytesToMB((t.integrityInfo.MissingBlocks + t.integrityInfo.CorruptedBlocks) * t.blockSize)
|
||
t.integrityInfo.RecoverySuccess = t.integrityInfo.CorruptedBlocks == 0 && t.integrityInfo.MissingBlocks == 0
|
||
|
||
t.setMessage(fmt.Sprintf("数据完整性检查完成: %d 个块正常, %d 个块丢失, %d 个块损坏",
|
||
t.integrityInfo.AvailableBlocks, t.integrityInfo.MissingBlocks, t.integrityInfo.CorruptedBlocks))
|
||
|
||
return t.integrityInfo
|
||
}
|
||
|
||
// Cleanup 清理测试环境
|
||
func (t *PowerLossTest) Cleanup(ctx context.Context) error {
|
||
if err := t.BaseTestCase.Cleanup(ctx); err != nil {
|
||
return err
|
||
}
|
||
|
||
t.setMessage("卸载缓存设备")
|
||
err := t.casManager.UnmountDevice(t.config.Server.MountPoint)
|
||
if err != nil {
|
||
t.logger.Warnf("卸载缓存设备失败: %v", err)
|
||
}
|
||
|
||
t.setMessage("停止缓存实例")
|
||
err = t.casManager.StopCacheInstance(t.config.Server.CacheInstanceID)
|
||
if err != nil {
|
||
t.logger.Warnf("停止缓存实例失败: %v", err)
|
||
}
|
||
|
||
return nil
|
||
}
|