open-cas-linux/modules/cas_cache/volume/vol_block_dev_bottom.c
Rafal Stefanowski 43f43068ad Update copyright statements (2021)
Signed-off-by: Rafal Stefanowski <rafal.stefanowski@intel.com>
2021-01-21 13:15:38 +01:00

563 lines
12 KiB
C

/*
* Copyright(c) 2012-2021 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define CAS_DEBUG_IO 0
#if CAS_DEBUG_IO == 1
#define CAS_DEBUG_TRACE() printk(KERN_DEBUG \
"[IO] %s:%d\n", __func__, __LINE__)
#define CAS_DEBUG_MSG(msg) printk(KERN_DEBUG \
"[IO] %s:%d - %s\n", __func__, __LINE__, msg)
#define CAS_DEBUG_PARAM(format, ...) printk(KERN_DEBUG \
"[IO] %s:%d - "format"\n", __func__, __LINE__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
int block_dev_open_object(ocf_volume_t vol, void *volume_params)
{
struct bd_object *bdobj = bd_object(vol);
const struct ocf_volume_uuid *uuid = ocf_volume_get_uuid(vol);
struct casdsk_disk *dsk;
if (bdobj->opened_by_bdev) {
/* Bdev has been set manually, so there is nothing to do. */
return 0;
}
if (unlikely(true == cas_upgrade_is_in_upgrade())) {
dsk = casdisk_functions.casdsk_disk_claim(uuid->data, NULL);
casdisk_functions.casdsk_disk_set_attached(dsk);
} else {
dsk = casdisk_functions.casdsk_disk_open(uuid->data, NULL);
}
if (IS_ERR_OR_NULL(dsk)) {
int error = PTR_ERR(dsk) ?: -EINVAL;
if (error == -EBUSY)
error = -OCF_ERR_NOT_OPEN_EXC;
return error;
}
bdobj->dsk = dsk;
bdobj->btm_bd = casdisk_functions.casdsk_disk_get_blkdev(dsk);
return 0;
}
void block_dev_close_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
if (bdobj->opened_by_bdev)
return;
if (likely(!cas_upgrade_is_in_upgrade())) {
casdisk_functions.casdsk_disk_close(bdobj->dsk);
} else {
casdisk_functions.casdsk_disk_set_pt(bdobj->dsk);
casdisk_functions.casdsk_disk_dettach(bdobj->dsk);
}
}
unsigned int block_dev_get_max_io_size(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
return queue_max_sectors(bd->bd_disk->queue) << SECTOR_SHIFT;
}
uint64_t block_dev_get_byte_length(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
uint64_t sector_length;
sector_length = (bd->bd_contains == bd) ?
get_capacity(bd->bd_disk) :
bd->bd_part->nr_sects;
return sector_length << SECTOR_SHIFT;
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 3, 0)
static const char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->elevator_type == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name[0] == 0)
return NULL;
return q->elevator->elevator_type->elevator_name;
}
#else
static const char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->type == NULL)
return NULL;
if (q->elevator->type->elevator_name == NULL)
return NULL;
if (q->elevator->type->elevator_name[0] == 0)
return NULL;
return q->elevator->type->elevator_name;
}
#endif
/*
*
*/
const char *block_dev_get_elevator_name(struct request_queue *q)
{
if (!q)
return NULL;
if (q->elevator == NULL)
return NULL;
return __block_dev_get_elevator_name(q);
}
/*
*
*/
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type)
{
if (type == BLOCK_DEVICE_VOLUME) {
if (atomic_params->is_atomic_capable)
return atomic_params->is_mode_optimal;
} else if (type == ATOMIC_DEVICE_VOLUME) {
return atomic_params->is_mode_optimal;
}
return 1;
}
/*
*
*/
static inline struct bio *cas_bd_io_alloc_bio(struct blkio *bdio)
{
struct bio *bio
= bio_alloc(GFP_NOIO, cas_io_iter_size_left(&bdio->iter));
if (bio)
return bio;
if (cas_io_iter_size_left(&bdio->iter) < MAX_LINES_PER_IO) {
/* BIO vector was small, so it was memory
* common problem - NO RAM!!!
*/
return NULL;
}
/* Retry with smaller */
return bio_alloc(GFP_NOIO, MAX_LINES_PER_IO);
}
/*
*
*/
static void cas_bd_io_end(struct ocf_io *io, int error)
{
struct blkio *bdio = cas_io_to_blkio(io);
if (error)
bdio->error |= error;
if (atomic_dec_return(&bdio->rq_remaning))
return;
CAS_DEBUG_MSG("Completion");
/* Send completion to caller */
io->end(io, bdio->error);
/* Free allocated structures */
ocf_io_put(io);
}
/*
*
*/
CAS_DECLARE_BLOCK_CALLBACK(cas_bd_io_end, struct bio *bio,
unsigned int bytes_done, int error)
{
struct ocf_io *io;
struct blkio *bdio;
struct bd_object *bdobj;
int err;
BUG_ON(!bio);
BUG_ON(!bio->bi_private);
CAS_BLOCK_CALLBACK_INIT(bio);
io = bio->bi_private;
bdobj = bd_object(ocf_io_get_volume(io));
BUG_ON(!bdobj);
err = CAS_BLOCK_CALLBACK_ERROR(bio, error);
bdio = cas_io_to_blkio(io);
BUG_ON(!bdio);
CAS_DEBUG_TRACE();
if (err == -EOPNOTSUPP && (CAS_BIO_OP_FLAGS(bio) & CAS_BIO_DISCARD))
err = 0;
cas_bd_io_end(io, err);
bio_put(bio);
CAS_BLOCK_CALLBACK_RETURN();
}
static void block_dev_submit_flush(struct ocf_io *io)
{
#ifdef CAS_FLUSH_SUPPORTED
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = NULL;
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (q == NULL) {
/* No queue, error */
blkio->error = -EINVAL;
goto out;
}
if (!CAS_CHECK_QUEUE_FLUSH(q)) {
/* This block device does not support flush, call back */
goto out;
}
bio = bio_alloc(GFP_NOIO, 0);
if (bio == NULL) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
goto out;
}
blkio->dir = io->dir;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end);
CAS_BIO_SET_DEV(bio, bdev);
bio->bi_private = io;
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_WRITE_FLUSH, bio);
out:
cas_bd_io_end(io, blkio->error);
#else
/* Running operating system without support for REQ_FLUSH
* (i.e. SLES 11 SP 1) CAS cannot use flushing requests to
* handle power-fail safe Write-Back
*/
io->end(io, -ENOTSUPP);
/* on SLES 11 SP 1 powerfail safety can only be achieved
* through disabling volatile write cache of disk itself.
*/
#endif
}
void block_dev_submit_discard(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
struct block_device *bd = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bd);
struct bio *bio = NULL;
unsigned int max_discard_sectors, granularity, bio_sects;
int alignment;
sector_t sects, start, end, tmp;
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!q) {
/* No queue, error */
blkio->error = -ENXIO;
goto out;
}
if (!blk_queue_discard(q)) {
/* Discard is not supported by bottom device, send completion
* to caller
*/
goto out;
}
granularity = max(q->limits.discard_granularity >> SECTOR_SHIFT, 1U);
alignment = (bdev_discard_alignment(bd) >> SECTOR_SHIFT) % granularity;
max_discard_sectors =
min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT);
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto out;
sects = io->bytes >> SECTOR_SHIFT;
start = io->addr >> SECTOR_SHIFT;
while (sects) {
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
CAS_PRINT_RL(CAS_KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
break;
}
bio_sects = min_t(sector_t, sects, max_discard_sectors);
end = start + bio_sects;
tmp = end;
if (bio_sects < sects &&
sector_div(tmp, granularity) != alignment) {
end = end - alignment;
sector_div(end, granularity);
end = end * granularity + alignment;
bio_sects = end - start;
}
CAS_BIO_SET_DEV(bio, bd);
CAS_BIO_BISECTOR(bio) = start;
CAS_BIO_BISIZE(bio) = bio_sects << SECTOR_SHIFT;
bio->bi_next = NULL;
bio->bi_private = io;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end);
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_BIO_DISCARD, bio);
sects -= bio_sects;
start = end;
cond_resched();
}
out:
cas_bd_io_end(io, blkio->error);
}
static inline bool cas_bd_io_prepare(int *dir, struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
/* Setup DIR */
bdio->dir = *dir;
/* Convert CAS direction into kernel values */
switch (bdio->dir) {
case OCF_READ:
*dir = READ;
break;
case OCF_WRITE:
*dir = WRITE;
break;
default:
bdio->error = -EINVAL;
break;
}
if (!io->bytes) {
/* Don not accept empty request */
CAS_PRINT_RL(KERN_ERR "Invalid zero size IO\n");
bdio->error = -EINVAL;
}
if (bdio->error)
return false;
return true;
}
/*
*
*/
static void block_dev_submit_io(struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(ocf_io_get_volume(io));
struct bio_vec_iter *iter = &bdio->iter;
uint64_t addr = io->addr;
uint32_t bytes = io->bytes;
int dir = io->dir;
if (!CAS_IS_WRITE_FLUSH_FUA(io->flags) &&
CAS_IS_WRITE_FLUSH(io->flags)) {
CAS_DEBUG_MSG("Flush request");
/* It is flush requests handle it */
block_dev_submit_flush(io);
return;
}
CAS_DEBUG_PARAM("Address = %llu, bytes = %u\n", bdio->addr,
bdio->bytes);
/* Increase IO reference */
ocf_io_get(io);
/* Prevent races of completing IO */
atomic_set(&bdio->rq_remaning, 1);
if (!cas_bd_io_prepare(&dir, io)) {
CAS_DEBUG_MSG("Invalid request");
cas_bd_io_end(io, -EINVAL);
return;
}
while (cas_io_iter_is_next(iter) && bytes) {
/* Still IO vectors to be sent */
/* Allocate BIO */
struct bio *bio = cas_bd_io_alloc_bio(bdio);
if (!bio) {
bdio->error = -ENOMEM;
break;
}
/* Setup BIO */
CAS_BIO_SET_DEV(bio, bdobj->btm_bd);
CAS_BIO_BISECTOR(bio) = addr / SECTOR_SIZE;
bio->bi_next = NULL;
bio->bi_private = io;
CAS_BIO_OP_FLAGS(bio) |= io->flags;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end);
/* Add pages */
while (cas_io_iter_is_next(iter) && bytes) {
struct page *page = cas_io_iter_current_page(iter);
uint32_t offset = cas_io_iter_current_offset(iter);
uint32_t length = cas_io_iter_current_length(iter);
int added;
if (length > bytes)
length = bytes;
added = bio_add_page(bio, page, length, offset);
BUG_ON(added < 0);
if (added == 0) {
/* No more space in BIO, stop adding pages */
break;
}
/* Update address, bytes sent */
bytes -= added;
addr += added;
/* Update BIO vector iterator */
if (added != cas_io_iter_move(iter, added)) {
bdio->error = -ENOBUFS;
break;
}
}
if (bdio->error == 0) {
/* Increase IO reference for sending this IO */
atomic_inc(&bdio->rq_remaning);
/* Send BIO */
CAS_DEBUG_MSG("Submit IO");
cas_submit_bio(dir, bio);
bio = NULL;
} else {
if (bio) {
bio_put(bio);
bio = NULL;
}
/* ERROR, stop processed */
break;
}
}
if (bytes && bdio->error == 0) {
/* Not all bytes sent, mark error */
bdio->error = -ENOBUFS;
}
/* Prevent races of completing IO when
* there are still child IOs not being send.
*/
cas_bd_io_end(io, 0);
}
const struct ocf_volume_properties cas_object_blk_properties = {
.name = "Block_Device",
.io_priv_size = sizeof(struct blkio),
.volume_priv_size = sizeof(struct bd_object),
.caps = {
.atomic_writes = 0, /* Atomic writes not supported */
},
.ops = {
.submit_io = block_dev_submit_io,
.submit_flush = block_dev_submit_flush,
.submit_metadata = NULL,
.submit_discard = block_dev_submit_discard,
.open = block_dev_open_object,
.close = block_dev_close_object,
.get_max_io_size = block_dev_get_max_io_size,
.get_length = block_dev_get_byte_length,
},
.io_ops = {
.set_data = cas_blk_io_set_data,
.get_data = cas_blk_io_get_data,
},
.deinit = NULL,
};
int block_dev_init(void)
{
int ret;
ret = ocf_ctx_register_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME,
&cas_object_blk_properties);
if (ret < 0)
return ret;
return 0;
}
int block_dev_try_get_io_class(struct bio *bio, int *io_class)
{
struct ocf_io *io;
if (bio->bi_end_io != CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end))
return -1;
io = bio->bi_private;
*io_class = io->io_class;
return 0;
}