open-cas-linux/modules/cas_cache/volume/vol_block_dev_top.c
Toby Roth 988ac5f645 Do not submit IO in complete flush
Signed-off-by: Toby Roth <toby.roth@huawei.com>
Signed-off-by: Robert Baldyga <robert.baldyga@huawei.com>
2024-09-10 14:36:43 +02:00

714 lines
17 KiB
C

/*
* Copyright(c) 2012-2022 Intel Corporation
* Copyright(c) 2024 Huawei Technologies Co., Ltd.
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "cas_cache.h"
#include "utils/cas_err.h"
static void blkdev_set_bio_data(struct blk_data *data, struct bio *bio)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
struct bio_vec *bvec;
uint32_t iter = 0, i = 0;
bio_for_each_segment(bvec, bio, iter) {
BUG_ON(i >= data->size);
data->vec[i] = *bvec;
i++;
}
#else
struct bio_vec bvec;
struct bvec_iter iter;
uint32_t i = 0;
bio_for_each_segment(bvec, bio, iter) {
BUG_ON(i >= data->size);
data->vec[i] = bvec;
i++;
}
#endif
}
static void blkdev_set_exported_object_flush_fua(ocf_core_t core)
{
ocf_cache_t cache = ocf_core_get_cache(core);
ocf_volume_t core_vol = ocf_core_get_volume(core);
struct bd_object *bd_core_vol;
struct request_queue *core_q, *exp_q;
bool flush, fua;
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
bd_core_vol = bd_object(core_vol);
core_q = cas_disk_get_queue(bd_core_vol->dsk);
exp_q = cas_exp_obj_get_queue(bd_core_vol->dsk);
flush = (CAS_CHECK_QUEUE_FLUSH(core_q) ||
cache_priv->device_properties.flush);
fua = (CAS_CHECK_QUEUE_FUA(core_q) || cache_priv->device_properties.fua);
cas_set_queue_flush_fua(exp_q, flush, fua);
}
static void blkdev_set_discard_properties(ocf_cache_t cache,
struct request_queue *exp_q, struct block_device *core_bd,
sector_t core_sectors)
{
struct request_queue *core_q;
core_q = bdev_get_queue(core_bd);
cas_set_discard_flag(exp_q);
CAS_SET_DISCARD_ZEROES_DATA(exp_q->limits, 0);
if (core_q && cas_has_discard_support(core_bd)) {
blk_queue_max_discard_sectors(exp_q, core_q->limits.max_discard_sectors);
exp_q->limits.discard_alignment =
bdev_discard_alignment(core_bd);
exp_q->limits.discard_granularity =
core_q->limits.discard_granularity;
} else {
blk_queue_max_discard_sectors(exp_q,
min((uint64_t)core_sectors, (uint64_t)UINT_MAX));
exp_q->limits.discard_granularity = ocf_cache_get_line_size(cache);
exp_q->limits.discard_alignment = 0;
}
}
/**
* Map geometry of underlying (core) object geometry (sectors etc.)
* to geometry of exported object.
*/
static int blkdev_core_set_geometry(struct cas_disk *dsk, void *private)
{
ocf_core_t core;
ocf_cache_t cache;
ocf_volume_t core_vol;
struct request_queue *core_q, *exp_q;
struct block_device *core_bd;
sector_t sectors;
const char *path;
struct cache_priv *cache_priv;
BUG_ON(!private);
core = private;
cache = ocf_core_get_cache(core);
core_vol = ocf_core_get_volume(core);
cache_priv = ocf_cache_get_priv(cache);
path = ocf_volume_get_uuid(core_vol)->data;
core_bd = cas_disk_get_blkdev(dsk);
BUG_ON(!core_bd);
core_q = cas_bdev_whole(core_bd)->bd_disk->queue;
exp_q = cas_exp_obj_get_queue(dsk);
sectors = ocf_volume_get_length(core_vol) >> SECTOR_SHIFT;
set_capacity(cas_exp_obj_get_gendisk(dsk), sectors);
cas_copy_queue_limits(exp_q, &cache_priv->device_properties.queue_limits,
core_q);
if (exp_q->limits.logical_block_size >
core_q->limits.logical_block_size) {
printk(KERN_ERR "Cache device logical sector size is "
"greater than core device %s logical sector size.\n",
path);
return -KCAS_ERR_UNALIGNED;
}
blk_stack_limits(&exp_q->limits, &core_q->limits, 0);
/* We don't want to receive splitted requests*/
CAS_SET_QUEUE_CHUNK_SECTORS(exp_q, 0);
blkdev_set_exported_object_flush_fua(core);
blkdev_set_discard_properties(cache, exp_q, core_bd, sectors);
exp_q->queue_flags |= (1 << QUEUE_FLAG_NONROT);
return 0;
}
struct defer_bio_context {
struct work_struct io_work;
void (*cb)(struct bd_object *bvol, struct bio *bio);
struct bd_object *bvol;
struct bio *bio;
};
static void blkdev_defer_bio_work(struct work_struct *work)
{
struct defer_bio_context *context;
context = container_of(work, struct defer_bio_context, io_work);
context->cb(context->bvol, context->bio);
kfree(context);
}
static void blkdev_defer_bio(struct bd_object *bvol, struct bio *bio,
void (*cb)(struct bd_object *bvol, struct bio *bio))
{
struct defer_bio_context *context;
BUG_ON(!bvol->expobj_wq);
context = kmalloc(sizeof(*context), GFP_ATOMIC);
if (!context) {
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio),
CAS_ERRNO_TO_BLK_STS(-ENOMEM));
return;
}
context->cb = cb;
context->bio = bio;
context->bvol = bvol;
INIT_WORK(&context->io_work, blkdev_defer_bio_work);
queue_work(bvol->expobj_wq, &context->io_work);
}
static void blkdev_complete_data_master(struct blk_data *master, int error)
{
int result;
master->error = master->error ?: error;
if (atomic_dec_return(&master->master_remaining))
return;
cas_generic_end_io_acct(master->bio, master->start_time);
result = map_cas_err_to_generic(master->error);
CAS_BIO_ENDIO(master->bio, master->master_size,
CAS_ERRNO_TO_BLK_STS(result));
cas_free_blk_data(master);
}
static void blkdev_complete_data(struct ocf_io *io, int error)
{
struct bio *bio = io->priv1;
struct blk_data *master = io->priv2;
struct blk_data *data = ocf_io_get_data(io);
ocf_io_put(io);
if (data != master)
cas_free_blk_data(data);
if (bio != master->bio)
bio_put(bio);
blkdev_complete_data_master(master, error);
}
struct blkdev_data_master_ctx {
struct blk_data *data;
struct bio *bio;
uint32_t master_size;
unsigned long long start_time;
};
static int blkdev_handle_data_single(struct bd_object *bvol, struct bio *bio,
struct blkdev_data_master_ctx *master_ctx)
{
ocf_cache_t cache = ocf_volume_get_cache(bvol->front_volume);
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ocf_queue_t queue = cache_priv->io_queues[smp_processor_id()];
struct ocf_io *io;
struct blk_data *data;
uint64_t flags = CAS_BIO_OP_FLAGS(bio);
int ret;
data = cas_alloc_blk_data(bio_segments(bio), GFP_NOIO);
if (!data) {
CAS_PRINT_RL(KERN_CRIT "BIO data vector allocation error\n");
return -ENOMEM;
}
blkdev_set_bio_data(data, bio);
io = ocf_volume_new_io(bvol->front_volume, queue,
CAS_BIO_BISECTOR(bio) << SECTOR_SHIFT,
CAS_BIO_BISIZE(bio), (bio_data_dir(bio) == READ) ?
OCF_READ : OCF_WRITE,
cas_cls_classify(cache, bio), CAS_CLEAR_FLUSH(flags));
if (!io) {
printk(KERN_CRIT "Out of memory. Ending IO processing.\n");
cas_free_blk_data(data);
return -ENOMEM;
}
ret = ocf_io_set_data(io, data, 0);
if (ret < 0) {
ocf_io_put(io);
cas_free_blk_data(data);
return -EINVAL;
}
if (!master_ctx->data) {
atomic_set(&data->master_remaining, 1);
data->bio = master_ctx->bio;
data->master_size = master_ctx->master_size;
data->start_time = master_ctx->start_time;
master_ctx->data = data;
}
atomic_inc(&master_ctx->data->master_remaining);
ocf_io_set_cmpl(io, bio, master_ctx->data, blkdev_complete_data);
ocf_volume_submit_io(io);
return 0;
}
static void blkdev_handle_data(struct bd_object *bvol, struct bio *bio)
{
const uint32_t max_io_sectors = (32*MiB) >> SECTOR_SHIFT;
const uint32_t align_sectors = (128*KiB) >> SECTOR_SHIFT;
struct bio *split = NULL;
uint32_t sectors, to_submit;
int error;
struct blkdev_data_master_ctx master_ctx = {
.bio = bio,
.master_size = CAS_BIO_BISIZE(bio),
};
if (unlikely(CAS_BIO_BISIZE(bio) == 0)) {
CAS_PRINT_RL(KERN_ERR
"Not able to handle empty BIO, flags = "
CAS_BIO_OP_FLAGS_FORMAT "\n", CAS_BIO_OP_FLAGS(bio));
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio),
CAS_ERRNO_TO_BLK_STS(-EINVAL));
return;
}
master_ctx.start_time = cas_generic_start_io_acct(bio);
for (sectors = bio_sectors(bio); sectors > 0;) {
if (sectors <= max_io_sectors) {
split = bio;
sectors = 0;
} else {
to_submit = max_io_sectors -
CAS_BIO_BISECTOR(bio) % align_sectors;
split = cas_bio_split(bio, to_submit);
sectors -= to_submit;
}
error = blkdev_handle_data_single(bvol, split, &master_ctx);
if (error)
goto err;
}
blkdev_complete_data_master(master_ctx.data, 0);
return;
err:
if (split != bio)
bio_put(split);
if (master_ctx.data)
blkdev_complete_data_master(master_ctx.data, error);
else
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(error));
}
static void blkdev_complete_discard(struct ocf_io *io, int error)
{
struct bio *bio = io->priv1;
int result = map_cas_err_to_generic(error);
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(result));
ocf_io_put(io);
}
static void blkdev_handle_discard(struct bd_object *bvol, struct bio *bio)
{
ocf_cache_t cache = ocf_volume_get_cache(bvol->front_volume);
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ocf_queue_t queue = cache_priv->io_queues[smp_processor_id()];
struct ocf_io *io;
io = ocf_volume_new_io(bvol->front_volume, queue,
CAS_BIO_BISECTOR(bio) << SECTOR_SHIFT,
CAS_BIO_BISIZE(bio), OCF_WRITE, 0, 0);
if (!io) {
CAS_PRINT_RL(KERN_CRIT
"Out of memory. Ending IO processing.\n");
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-ENOMEM));
return;
}
ocf_io_set_cmpl(io, bio, NULL, blkdev_complete_discard);
ocf_volume_submit_discard(io);
}
static void blkdev_handle_bio_noflush(struct bd_object *bvol, struct bio *bio)
{
if (CAS_IS_DISCARD(bio))
blkdev_handle_discard(bvol, bio);
else
blkdev_handle_data(bvol, bio);
}
static void blkdev_complete_flush(struct ocf_io *io, int error)
{
struct bio *bio = io->priv1;
struct bd_object *bvol = io->priv2;
int result = map_cas_err_to_generic(error);
ocf_io_put(io);
if (CAS_BIO_BISIZE(bio) == 0 || error) {
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio),
CAS_ERRNO_TO_BLK_STS(result));
return;
}
blkdev_defer_bio(bvol, bio, blkdev_handle_bio_noflush);
}
static void blkdev_handle_flush(struct bd_object *bvol, struct bio *bio)
{
ocf_cache_t cache = ocf_volume_get_cache(bvol->front_volume);
struct cache_priv *cache_priv = ocf_cache_get_priv(cache);
ocf_queue_t queue = cache_priv->io_queues[smp_processor_id()];
struct ocf_io *io;
io = ocf_volume_new_io(bvol->front_volume, queue, 0, 0, OCF_WRITE, 0,
CAS_SET_FLUSH(0));
if (!io) {
CAS_PRINT_RL(KERN_CRIT
"Out of memory. Ending IO processing.\n");
CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-ENOMEM));
return;
}
ocf_io_set_cmpl(io, bio, bvol, blkdev_complete_flush);
ocf_volume_submit_flush(io);
}
static void blkdev_handle_bio(struct bd_object *bvol, struct bio *bio)
{
if (CAS_IS_SET_FLUSH(CAS_BIO_OP_FLAGS(bio)))
blkdev_handle_flush(bvol, bio);
else
blkdev_handle_bio_noflush(bvol, bio);
}
static void blkdev_submit_bio(struct bd_object *bvol, struct bio *bio)
{
if (in_interrupt())
blkdev_defer_bio(bvol, bio, blkdev_handle_bio);
else
blkdev_handle_bio(bvol, bio);
}
static void blkdev_core_submit_bio(struct cas_disk *dsk,
struct bio *bio, void *private)
{
ocf_core_t core = private;
struct bd_object *bvol;
BUG_ON(!core);
bvol = bd_object(ocf_core_get_volume(core));
blkdev_submit_bio(bvol, bio);
}
static struct cas_exp_obj_ops kcas_core_exp_obj_ops = {
.set_geometry = blkdev_core_set_geometry,
.submit_bio = blkdev_core_submit_bio,
};
static int blkdev_cache_set_geometry(struct cas_disk *dsk, void *private)
{
ocf_cache_t cache;
ocf_volume_t volume;
struct bd_object *bvol;
struct request_queue *cache_q, *exp_q;
struct block_device *bd;
sector_t sectors;
BUG_ON(!private);
cache = private;
volume = ocf_cache_get_volume(cache);
bvol = bd_object(volume);
bd = cas_disk_get_blkdev(bvol->dsk);
BUG_ON(!bd);
cache_q = bd->bd_disk->queue;
exp_q = cas_exp_obj_get_queue(dsk);
sectors = ocf_volume_get_length(volume) >> SECTOR_SHIFT;
set_capacity(cas_exp_obj_get_gendisk(dsk), sectors);
cas_copy_queue_limits(exp_q, &cache_q->limits, cache_q);
cas_cache_set_no_merges_flag(cache_q);
blk_stack_limits(&exp_q->limits, &cache_q->limits, 0);
/* We don't want to receive splitted requests*/
CAS_SET_QUEUE_CHUNK_SECTORS(exp_q, 0);
cas_set_queue_flush_fua(exp_q, CAS_CHECK_QUEUE_FLUSH(cache_q),
CAS_CHECK_QUEUE_FUA(cache_q));
return 0;
}
static void blkdev_cache_submit_bio(struct cas_disk *dsk,
struct bio *bio, void *private)
{
ocf_cache_t cache = private;
struct bd_object *bvol;
BUG_ON(!cache);
bvol = bd_object(ocf_cache_get_volume(cache));
blkdev_submit_bio(bvol, bio);
}
static struct cas_exp_obj_ops kcas_cache_exp_obj_ops = {
.set_geometry = blkdev_cache_set_geometry,
.submit_bio = blkdev_cache_submit_bio,
};
/****************************************
* Exported object management functions *
****************************************/
static const char *get_cache_id_string(ocf_cache_t cache)
{
return ocf_cache_get_name(cache) + sizeof("cache") - 1;
}
static const char *get_core_id_string(ocf_core_t core)
{
return ocf_core_get_name(core) + sizeof("core") - 1;
}
static int kcas_volume_create_exported_object(ocf_volume_t volume,
const char *name, void *priv, struct cas_exp_obj_ops *ops)
{
struct bd_object *bvol = bd_object(volume);
int result;
bvol->expobj_wq = alloc_workqueue("expobj_wq_%s",
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0,
name);
if (!bvol->expobj_wq) {
result = -ENOMEM;
goto end;
}
result = cas_exp_obj_create(bvol->dsk, name,
THIS_MODULE, ops, priv);
if (result) {
destroy_workqueue(bvol->expobj_wq);
goto end;
}
bvol->expobj_valid = true;
end:
if (result) {
printk(KERN_ERR "Cannot create exported object %s. Error code %d\n",
name, result);
}
return result;
}
static int kcas_volume_destroy_exported_object(ocf_volume_t volume)
{
struct bd_object *bvol;
int result;
BUG_ON(!volume);
bvol = bd_object(volume);
BUG_ON(!bvol);
if (!bvol->expobj_valid)
return 0;
result = cas_exp_obj_lock(bvol->dsk);
if (result == -EBUSY)
return -KCAS_ERR_DEV_PENDING;
else if (result)
return result;
result = cas_exp_obj_destroy(bvol->dsk);
if (result)
goto err;
bvol->expobj_valid = false;
destroy_workqueue(bvol->expobj_wq);
cas_exp_obj_unlock(bvol->dsk);
cas_exp_obj_cleanup(bvol->dsk);
return 0;
err:
cas_exp_obj_unlock(bvol->dsk);
return result;
}
/**
* @brief this routine actually adds /dev/casM-N inode
*/
int kcas_core_create_exported_object(ocf_core_t core)
{
ocf_cache_t cache = ocf_core_get_cache(core);
ocf_volume_t volume = ocf_core_get_volume(core);
struct bd_object *bvol = bd_object(volume);
char dev_name[DISK_NAME_LEN];
snprintf(dev_name, DISK_NAME_LEN, "cas%s-%s",
get_cache_id_string(cache),
get_core_id_string(core));
bvol->front_volume = ocf_core_get_front_volume(core);
return kcas_volume_create_exported_object(volume, dev_name, core,
&kcas_core_exp_obj_ops);
}
int kcas_core_destroy_exported_object(ocf_core_t core)
{
ocf_volume_t volume = ocf_core_get_volume(core);
return kcas_volume_destroy_exported_object(volume);
}
int kcas_cache_create_exported_object(ocf_cache_t cache)
{
ocf_volume_t volume = ocf_cache_get_volume(cache);
struct bd_object *bvol = bd_object(volume);
char dev_name[DISK_NAME_LEN];
snprintf(dev_name, DISK_NAME_LEN, "cas-cache-%s",
get_cache_id_string(cache));
bvol->front_volume = ocf_cache_get_front_volume(cache);
return kcas_volume_create_exported_object(volume, dev_name, cache,
&kcas_cache_exp_obj_ops);
}
int kcas_cache_destroy_exported_object(ocf_cache_t cache)
{
ocf_volume_t volume = ocf_cache_get_volume(cache);
return kcas_volume_destroy_exported_object(volume);
}
static int kcas_core_lock_exported_object(ocf_core_t core, void *cntx)
{
int result;
struct bd_object *bvol = bd_object(
ocf_core_get_volume(core));
if (!bvol->expobj_valid)
return 0;
result = cas_exp_obj_lock(bvol->dsk);
if (-EBUSY == result) {
printk(KERN_WARNING "Stopping %s failed - device in use\n",
cas_exp_obj_get_gendisk(bvol->dsk)->disk_name);
return -KCAS_ERR_DEV_PENDING;
} else if (result) {
printk(KERN_WARNING "Stopping %s failed - device unavailable\n",
cas_exp_obj_get_gendisk(bvol->dsk)->disk_name);
return -OCF_ERR_CORE_NOT_AVAIL;
}
bvol->expobj_locked = true;
return 0;
}
static int kcas_core_unlock_exported_object(ocf_core_t core, void *cntx)
{
struct bd_object *bvol = bd_object(ocf_core_get_volume(core));
if (bvol->expobj_locked) {
cas_exp_obj_unlock(bvol->dsk);
bvol->expobj_locked = false;
}
return 0;
}
static int kcas_core_stop_exported_object(ocf_core_t core, void *cntx)
{
struct bd_object *bvol = bd_object(
ocf_core_get_volume(core));
int ret;
if (bvol->expobj_valid) {
BUG_ON(!bvol->expobj_locked);
printk(KERN_INFO "Stopping device %s\n",
cas_exp_obj_get_gendisk(bvol->dsk)->disk_name);
ret = cas_exp_obj_destroy(bvol->dsk);
if (!ret) {
bvol->expobj_valid = false;
destroy_workqueue(bvol->expobj_wq);
}
}
if (bvol->expobj_locked) {
cas_exp_obj_unlock(bvol->dsk);
bvol->expobj_locked = false;
}
return 0;
}
static int kcas_core_cleanup_exported_object(ocf_core_t core, void *cntx)
{
struct bd_object *bvol = bd_object(ocf_core_get_volume(core));
cas_exp_obj_cleanup(bvol->dsk);
return 0;
}
int kcas_cache_destroy_all_core_exported_objects(ocf_cache_t cache)
{
int result;
/* Try lock exported objects */
result = ocf_core_visit(cache, kcas_core_lock_exported_object, NULL,
true);
if (result) {
/* Failure, unlock already locked exported objects */
ocf_core_visit(cache, kcas_core_unlock_exported_object, NULL,
true);
return result;
}
ocf_core_visit(cache, kcas_core_stop_exported_object, NULL, true);
ocf_core_visit(cache, kcas_core_cleanup_exported_object, NULL, true);
return 0;
}