/* * Copyright(c) 2012-2021 Intel Corporation * SPDX-License-Identifier: BSD-3-Clause-Clear */ #include "cas_cache.h" #include "utils/cas_err.h" static void _blockdev_set_bio_data(struct blk_data *data, struct bio *bio) { #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) struct bio_vec *bvec; uint32_t iter = 0, i = 0; bio_for_each_segment(bvec, bio, iter) { BUG_ON(i >= data->size); data->vec[i] = *bvec; i++; } #else struct bio_vec bvec; struct bvec_iter iter; uint32_t i = 0; bio_for_each_segment(bvec, bio, iter) { BUG_ON(i >= data->size); data->vec[i] = bvec; i++; } #endif } static inline int _blkdev_can_hndl_bio(struct bio *bio) { if (CAS_CHECK_BARRIER(bio)) { CAS_PRINT_RL(KERN_WARNING "special bio was sent, not supported!\n"); CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-EOPNOTSUPP)); return -ENOTSUPP; } return 0; } void _blockdev_set_exported_object_flush_fua(ocf_core_t core) { ocf_cache_t cache = ocf_core_get_cache(core); ocf_volume_t core_vol = ocf_core_get_volume(core); ocf_volume_t cache_vol = ocf_cache_get_volume(cache); struct bd_object *bd_core_vol, *bd_cache_vol; struct request_queue *core_q, *exp_q, *cache_q; bool flush, fua; BUG_ON(!cache_vol); bd_core_vol = bd_object(core_vol); bd_cache_vol = bd_object(cache_vol); core_q = casdisk_functions.casdsk_disk_get_queue(bd_core_vol->dsk); exp_q = casdisk_functions.casdsk_exp_obj_get_queue(bd_core_vol->dsk); cache_q = casdisk_functions.casdsk_disk_get_queue(bd_cache_vol->dsk); flush = (CAS_CHECK_QUEUE_FLUSH(core_q) || CAS_CHECK_QUEUE_FLUSH(cache_q)); fua = (CAS_CHECK_QUEUE_FUA(core_q) || CAS_CHECK_QUEUE_FUA(cache_q)); cas_set_queue_flush_fua(exp_q, flush, fua); } static void _blockdev_set_discard_properties(ocf_cache_t cache, struct request_queue *exp_q, struct block_device *cache_bd, struct block_device *core_bd, sector_t core_sectors) { struct request_queue *core_q; struct request_queue *cache_q; core_q = bdev_get_queue(core_bd); cache_q = bdev_get_queue(cache_bd); CAS_QUEUE_FLAG_SET(QUEUE_FLAG_DISCARD, exp_q); CAS_SET_DISCARD_ZEROES_DATA(exp_q->limits, 0); if (core_q && blk_queue_discard(core_q)) { blk_queue_max_discard_sectors(exp_q, core_q->limits.max_discard_sectors); exp_q->limits.discard_alignment = bdev_discard_alignment(core_bd); exp_q->limits.discard_granularity = core_q->limits.discard_granularity; } else { blk_queue_max_discard_sectors(exp_q, min((uint64_t)core_sectors, (uint64_t)UINT_MAX)); exp_q->limits.discard_granularity = ocf_cache_get_line_size(cache); exp_q->limits.discard_alignment = 0; } } /** * Map geometry of underlying (core) object geometry (sectors etc.) * to geometry of exported object. */ static int _blockdev_set_geometry(struct casdsk_disk *dsk, void *private) { ocf_core_t core; ocf_cache_t cache; ocf_volume_t core_vol; ocf_volume_t cache_vol; struct bd_object *bd_cache_vol; struct request_queue *core_q, *cache_q, *exp_q; struct block_device *core_bd, *cache_bd; sector_t sectors; const char *path; BUG_ON(!private); core = private; cache = ocf_core_get_cache(core); core_vol = ocf_core_get_volume(core); cache_vol = ocf_cache_get_volume(cache); BUG_ON(!cache_vol); bd_cache_vol = bd_object(cache_vol); path = ocf_volume_get_uuid(core_vol)->data; core_bd = casdisk_functions.casdsk_disk_get_blkdev(dsk); BUG_ON(!core_bd); cache_bd = casdisk_functions.casdsk_disk_get_blkdev(bd_cache_vol->dsk); BUG_ON(!cache_bd); core_q = cas_bdev_whole(core_bd)->bd_disk->queue; cache_q = cache_bd->bd_disk->queue; exp_q = casdisk_functions.casdsk_exp_obj_get_queue(dsk); sectors = ocf_volume_get_length(core_vol) >> SECTOR_SHIFT; set_capacity(casdisk_functions.casdsk_exp_obj_get_gendisk(dsk), sectors); cas_copy_queue_limits(exp_q, cache_q, core_q); if (exp_q->limits.logical_block_size > core_q->limits.logical_block_size) { printk(KERN_ERR "Cache device logical sector size is " "greater than core device %s logical sector size.\n", path); return -KCAS_ERR_UNALIGNED; } blk_stack_limits(&exp_q->limits, &core_q->limits, 0); /* We don't want to receive splitted requests*/ CAS_SET_QUEUE_CHUNK_SECTORS(exp_q, 0); _blockdev_set_exported_object_flush_fua(core); _blockdev_set_discard_properties(cache, exp_q, cache_bd, core_bd, sectors); return 0; } struct defer_bio_context { struct work_struct io_work; void (*cb)(ocf_core_t core, struct bio *bio); ocf_core_t core; struct bio *bio; }; static void _blockdev_defer_bio_work(struct work_struct *work) { struct defer_bio_context *context; context = container_of(work, struct defer_bio_context, io_work); context->cb(context->core, context->bio); kfree(context); } static void _blockdev_defer_bio(ocf_core_t core, struct bio *bio, void (*cb)(ocf_core_t core, struct bio *bio)) { struct defer_bio_context *context; ocf_volume_t volume = ocf_core_get_volume(core); struct bd_object *bvol = bd_object(volume); BUG_ON(!bvol->expobj_wq); context = kmalloc(sizeof(*context), GFP_ATOMIC); if (!context) { CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-ENOMEM)); return; } context->cb = cb; context->bio = bio; context->core = core; INIT_WORK(&context->io_work, _blockdev_defer_bio_work); queue_work(bvol->expobj_wq, &context->io_work); } static void _block_dev_complete_data(struct blk_data *master, int error) { master->error = master->error ?: error; if (atomic_dec_return(&master->master_remaining)) return; cas_generic_end_io_acct(master->bio, master->start_time); CAS_BIO_ENDIO(master->bio, master->master_size, CAS_ERRNO_TO_BLK_STS(error)); cas_free_blk_data(master); } static void block_dev_complete_data(struct ocf_io *io, int error) { struct bio *bio = io->priv1; struct blk_data *master = io->priv2; struct blk_data *data = ocf_io_get_data(io); ocf_io_put(io); if (data != master) cas_free_blk_data(data); if (bio != master->bio) bio_put(bio); _block_dev_complete_data(master, error); } struct blockdev_data_master_ctx { struct blk_data *data; struct bio *bio; uint32_t master_size; unsigned long long start_time; }; static int _blockdev_handle_data_single(ocf_core_t core, struct bio *bio, struct blockdev_data_master_ctx *master_ctx) { ocf_cache_t cache; struct cache_priv *cache_priv; struct ocf_io *io; struct blk_data *data; uint64_t flags = CAS_BIO_OP_FLAGS(bio); int ret; cache = ocf_core_get_cache(core); cache_priv = ocf_cache_get_priv(cache); data = cas_alloc_blk_data(bio_segments(bio), GFP_NOIO); if (!data) { CAS_PRINT_RL(KERN_CRIT "BIO data vector allocation error\n"); return -ENOMEM; } _blockdev_set_bio_data(data, bio); io = ocf_core_new_io(core, cache_priv->io_queues[smp_processor_id()], CAS_BIO_BISECTOR(bio) << SECTOR_SHIFT, CAS_BIO_BISIZE(bio), (bio_data_dir(bio) == READ) ? OCF_READ : OCF_WRITE, cas_cls_classify(cache, bio), CAS_CLEAR_FLUSH(flags)); if (!io) { printk(KERN_CRIT "Out of memory. Ending IO processing.\n"); cas_free_blk_data(data); return -ENOMEM; } ret = ocf_io_set_data(io, data, 0); if (ret < 0) { ocf_io_put(io); cas_free_blk_data(data); return -EINVAL; } if (!master_ctx->data) { atomic_set(&data->master_remaining, 1); data->bio = master_ctx->bio; data->master_size = master_ctx->master_size; data->start_time = master_ctx->start_time; master_ctx->data = data; } atomic_inc(&master_ctx->data->master_remaining); ocf_io_set_cmpl(io, bio, master_ctx->data, block_dev_complete_data); ocf_core_submit_io(io); return 0; } static void _blockdev_handle_data(ocf_core_t core, struct bio *bio) { const uint32_t max_io_sectors = (32*MiB) >> SECTOR_SHIFT; const uint32_t align_sectors = (128*KiB) >> SECTOR_SHIFT; struct bio *split = NULL; uint32_t sectors, to_submit; int error; struct blockdev_data_master_ctx master_ctx = { .bio = bio, .master_size = CAS_BIO_BISIZE(bio), }; if (unlikely(CAS_BIO_BISIZE(bio) == 0)) { CAS_PRINT_RL(KERN_ERR "Not able to handle empty BIO, flags = " CAS_BIO_OP_FLAGS_FORMAT "\n", CAS_BIO_OP_FLAGS(bio)); CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-EINVAL)); return; } master_ctx.start_time = cas_generic_start_io_acct(bio); for (sectors = bio_sectors(bio); sectors > 0;) { if (sectors <= max_io_sectors) { split = bio; sectors = 0; } else { to_submit = max_io_sectors - CAS_BIO_BISECTOR(bio) % align_sectors; split = cas_bio_split(bio, to_submit); sectors -= to_submit; } error = _blockdev_handle_data_single(core, split, &master_ctx); if (error) goto err; } _block_dev_complete_data(master_ctx.data, 0); return; err: if (split != bio) bio_put(split); if (master_ctx.data) _block_dev_complete_data(master_ctx.data, error); else CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(error)); } static void block_dev_complete_discard(struct ocf_io *io, int error) { struct bio *bio = io->priv1; CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(error)); ocf_io_put(io); } static void _blockdev_handle_discard(ocf_core_t core, struct bio *bio) { ocf_cache_t cache = ocf_core_get_cache(core); struct cache_priv *cache_priv = ocf_cache_get_priv(cache); struct ocf_io *io; io = ocf_core_new_io(core, cache_priv->io_queues[smp_processor_id()], CAS_BIO_BISECTOR(bio) << SECTOR_SHIFT, CAS_BIO_BISIZE(bio), OCF_WRITE, 0, CAS_CLEAR_FLUSH(CAS_BIO_OP_FLAGS(bio))); if (!io) { CAS_PRINT_RL(KERN_CRIT "Out of memory. Ending IO processing.\n"); CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-ENOMEM)); return; } ocf_io_set_cmpl(io, bio, NULL, block_dev_complete_discard); ocf_core_submit_discard(io); } static void _blockdev_handle_bio_noflush(ocf_core_t core, struct bio *bio) { if (CAS_IS_DISCARD(bio)) _blockdev_handle_discard(core, bio); else _blockdev_handle_data(core, bio); } static void block_dev_complete_flush(struct ocf_io *io, int error) { struct bio *bio = io->priv1; ocf_core_t core = io->priv2; ocf_io_put(io); if (CAS_BIO_BISIZE(bio) == 0 || error) { CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(error)); return; } if (in_interrupt()) _blockdev_defer_bio(core, bio, _blockdev_handle_bio_noflush); else _blockdev_handle_bio_noflush(core, bio); } static void _blkdev_handle_flush(ocf_core_t core, struct bio *bio) { struct ocf_io *io; ocf_cache_t cache = ocf_core_get_cache(core); struct cache_priv *cache_priv = ocf_cache_get_priv(cache); io = ocf_core_new_io(core, cache_priv->io_queues[smp_processor_id()], 0, 0, OCF_WRITE, 0, CAS_SET_FLUSH(0)); if (!io) { CAS_PRINT_RL(KERN_CRIT "Out of memory. Ending IO processing.\n"); CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-ENOMEM)); return; } ocf_io_set_cmpl(io, bio, core, block_dev_complete_flush); ocf_core_submit_flush(io); } static void _blockdev_handle_bio(ocf_core_t core, struct bio *bio) { if (CAS_IS_SET_FLUSH(CAS_BIO_OP_FLAGS(bio))) _blkdev_handle_flush(core, bio); else _blockdev_handle_bio_noflush(core, bio); } static void _blockdev_submit_bio(struct casdsk_disk *dsk, struct bio *bio, void *private) { ocf_core_t core = private; BUG_ON(!core); if (_blkdev_can_hndl_bio(bio)) { CAS_BIO_ENDIO(bio, CAS_BIO_BISIZE(bio), CAS_ERRNO_TO_BLK_STS(-ENOTSUPP)); return; } if (in_interrupt()) _blockdev_defer_bio(core, bio, _blockdev_handle_bio); else _blockdev_handle_bio(core, bio); } static struct casdsk_exp_obj_ops _blockdev_exp_obj_ops = { .set_geometry = _blockdev_set_geometry, .submit_bio = _blockdev_submit_bio, }; /** * @brief this routine actually adds /dev/casM-N inode */ int block_dev_activate_exported_object(ocf_core_t core) { int ret; ocf_volume_t obj = ocf_core_get_volume(core); ocf_cache_t cache = ocf_core_get_cache(core); struct bd_object *bvol = bd_object(obj); ret = casdisk_functions.casdsk_exp_obj_activate(bvol->dsk); if (ret) { if (-EEXIST == ret) ret = KCAS_ERR_FILE_EXISTS; printk(KERN_ERR "Cannot activate exported object, %s.%s. " "Error code %d\n", ocf_cache_get_name(cache), ocf_core_get_name(core), ret); } return ret; } static const char *get_cache_id_string(ocf_cache_t cache) { return ocf_cache_get_name(cache) + sizeof("cache") - 1; } static const char *get_core_id_string(ocf_core_t core) { return ocf_core_get_name(core) + sizeof("core") - 1; } int block_dev_create_exported_object(ocf_core_t core) { ocf_volume_t obj = ocf_core_get_volume(core); ocf_cache_t cache = ocf_core_get_cache(core); struct bd_object *bvol = bd_object(obj); const struct ocf_volume_uuid *uuid = ocf_volume_get_uuid(obj); char dev_name[DISK_NAME_LEN]; struct casdsk_disk *dsk; int result; snprintf(dev_name, DISK_NAME_LEN, "cas%s-%s", get_cache_id_string(cache), get_core_id_string(core)); dsk = casdisk_functions.casdsk_disk_claim(uuid->data, core); if (dsk != bvol->dsk) { result = -KCAS_ERR_SYSTEM; goto end; } bvol->expobj_wq = alloc_workqueue("expobj_wq%s-%s", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0, get_cache_id_string(cache), get_core_id_string(core)); if (!bvol->expobj_wq) { result = -ENOMEM; goto end; } result = casdisk_functions.casdsk_exp_obj_create(dsk, dev_name, THIS_MODULE, &_blockdev_exp_obj_ops); if (result) { destroy_workqueue(bvol->expobj_wq); goto end; } bvol->expobj_valid = true; end: if (result) { printk(KERN_ERR "Cannot create exported object %s. Error code %d\n", dev_name, result); } return result; } int block_dev_destroy_exported_object(ocf_core_t core) { int ret = 0; ocf_volume_t obj = ocf_core_get_volume(core); struct bd_object *bvol = bd_object(obj); if (!bvol->expobj_valid) return 0; ret = casdisk_functions.casdsk_exp_obj_lock(bvol->dsk); if (ret) { if (-EBUSY == ret) ret = -KCAS_ERR_DEV_PENDING; return ret; } ret = casdisk_functions.casdsk_exp_obj_destroy(bvol->dsk); if (!ret) bvol->expobj_valid = false; destroy_workqueue(bvol->expobj_wq); casdisk_functions.casdsk_exp_obj_unlock(bvol->dsk); return ret; } static int _block_dev_lock_exported_object(ocf_core_t core, void *cntx) { int result; struct bd_object *bvol = bd_object( ocf_core_get_volume(core)); result = casdisk_functions.casdsk_exp_obj_lock(bvol->dsk); if (-EBUSY == result) { printk(KERN_WARNING "Stopping %s failed - device in use\n", casdisk_functions.casdsk_exp_obj_get_gendisk(bvol->dsk)->disk_name); return -KCAS_ERR_DEV_PENDING; } else if (result) { printk(KERN_WARNING "Stopping %s failed - device unavailable\n", casdisk_functions.casdsk_exp_obj_get_gendisk(bvol->dsk)->disk_name); return -OCF_ERR_CORE_NOT_AVAIL; } bvol->expobj_locked = true; return 0; } static int _block_dev_unlock_exported_object(ocf_core_t core, void *cntx) { struct bd_object *bvol = bd_object( ocf_core_get_volume(core)); if (bvol->expobj_locked) { casdisk_functions.casdsk_exp_obj_unlock(bvol->dsk); bvol->expobj_locked = false; } return 0; } static int _block_dev_stop_exported_object(ocf_core_t core, void *cntx) { struct bd_object *bvol = bd_object( ocf_core_get_volume(core)); int ret; if (bvol->expobj_valid) { BUG_ON(!bvol->expobj_locked); printk(KERN_INFO "Stopping device %s\n", casdisk_functions.casdsk_exp_obj_get_gendisk(bvol->dsk)->disk_name); ret = casdisk_functions.casdsk_exp_obj_destroy(bvol->dsk); if (!ret) bvol->expobj_valid = false; } if (bvol->expobj_locked) { casdisk_functions.casdsk_exp_obj_unlock(bvol->dsk); bvol->expobj_locked = false; } return 0; } static int _block_dev_free_exported_object(ocf_core_t core, void *cntx) { struct bd_object *bvol = bd_object( ocf_core_get_volume(core)); casdisk_functions.casdsk_exp_obj_free(bvol->dsk); return 0; } int block_dev_destroy_all_exported_objects(ocf_cache_t cache) { int result; /* Try lock exported objects */ result = ocf_core_visit(cache, _block_dev_lock_exported_object, NULL, true); if (result) { /* Failure, unlock already locked exported objects */ ocf_core_visit(cache, _block_dev_unlock_exported_object, NULL, true); return result; } ocf_core_visit(cache, _block_dev_stop_exported_object, NULL, true); return ocf_core_visit(cache, _block_dev_free_exported_object, NULL, true); }