open-cas-linux/modules/cas_cache/volume/vol_block_dev_bottom.c
Michal Mielewczyk 1e5355eba1 Extending 'configure' script
Functions and macros dependent on different kernel versions are now generated
before compilation basing on current kernel capabilities instead of hardcoding
them for specific kernels.

Signed-off-by: Michal Mielewczyk <michal.mielewczyk@intel.com>
2019-05-30 06:29:07 -04:00

597 lines
13 KiB
C

/*
* Copyright(c) 2012-2019 Intel Corporation
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#include "cas_cache.h"
#define CAS_DEBUG_IO 0
#if CAS_DEBUG_IO == 1
#define CAS_DEBUG_TRACE() printk(KERN_DEBUG \
"[IO] %s:%d\n", __func__, __LINE__)
#define CAS_DEBUG_MSG(msg) printk(KERN_DEBUG \
"[IO] %s:%d - %s\n", __func__, __LINE__, msg)
#define CAS_DEBUG_PARAM(format, ...) printk(KERN_DEBUG \
"[IO] %s:%d - "format"\n", __func__, __LINE__, ##__VA_ARGS__)
#else
#define CAS_DEBUG_TRACE()
#define CAS_DEBUG_MSG(msg)
#define CAS_DEBUG_PARAM(format, ...)
#endif
int block_dev_open_object(ocf_volume_t vol, void *volume_params)
{
struct bd_object *bdobj = bd_object(vol);
const struct ocf_volume_uuid *uuid = ocf_volume_get_uuid(vol);
struct casdsk_disk *dsk;
if (bdobj->opened_by_bdev) {
/* Bdev has beed set manually, so there is nothing to do. */
return 0;
}
if (unlikely(true == cas_upgrade_is_in_upgrade())) {
dsk = casdisk_functions.casdsk_disk_claim(uuid->data, NULL);
casdisk_functions.casdsk_disk_set_attached(dsk);
} else {
dsk = casdisk_functions.casdsk_disk_open(uuid->data, NULL);
}
if (IS_ERR_OR_NULL(dsk)) {
int error = PTR_ERR(dsk) ?: -EINVAL;
if (error == -EBUSY)
error = -OCF_ERR_NOT_OPEN_EXC;
return error;
}
bdobj->dsk = dsk;
bdobj->btm_bd = casdisk_functions.casdsk_disk_get_blkdev(dsk);
return 0;
}
void block_dev_close_object(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
if (bdobj->opened_by_bdev)
return;
if (likely(!cas_upgrade_is_in_upgrade())) {
casdisk_functions.casdsk_disk_close(bdobj->dsk);
} else {
casdisk_functions.casdsk_disk_set_pt(bdobj->dsk);
casdisk_functions.casdsk_disk_dettach(bdobj->dsk);
}
}
unsigned int block_dev_get_max_io_size(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
return queue_max_sectors(bd->bd_disk->queue) << SECTOR_SHIFT;
}
uint64_t block_dev_get_byte_length(ocf_volume_t vol)
{
struct bd_object *bdobj = bd_object(vol);
struct block_device *bd = bdobj->btm_bd;
uint64_t sector_length;
sector_length = (bd->bd_contains == bd) ?
get_capacity(bd->bd_disk) :
bd->bd_part->nr_sects;
return sector_length << SECTOR_SHIFT;
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 3, 0)
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->elevator_type == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name == NULL)
return NULL;
if (q->elevator->elevator_type->elevator_name[0] == 0)
return NULL;
return q->elevator->elevator_type->elevator_name;
}
#else
static char *__block_dev_get_elevator_name(struct request_queue *q)
{
if (q->elevator->type == NULL)
return NULL;
if (q->elevator->type->elevator_name == NULL)
return NULL;
if (q->elevator->type->elevator_name[0] == 0)
return NULL;
return q->elevator->type->elevator_name;
}
#endif
/*
*
*/
const char *block_dev_get_elevator_name(struct request_queue *q)
{
if (!q)
return NULL;
if (q->elevator == NULL)
return NULL;
return __block_dev_get_elevator_name(q);
}
/*
*
*/
int block_dev_is_metadata_mode_optimal(struct atomic_dev_params *atomic_params,
uint8_t type)
{
if (type == BLOCK_DEVICE_VOLUME) {
if (atomic_params->is_atomic_capable)
return atomic_params->is_mode_optimal;
} else if (type == ATOMIC_DEVICE_VOLUME) {
return atomic_params->is_mode_optimal;
}
return 1;
}
/*
*
*/
static inline struct bio *cas_bd_io_alloc_bio(struct blkio *bdio)
{
struct bio *bio
= bio_alloc(GFP_NOIO, cas_io_iter_size_left(&bdio->iter));
if (bio)
return bio;
if (cas_io_iter_size_left(&bdio->iter) < MAX_LINES_PER_IO) {
/* BIO vector was small, so it was memory
* common problem - NO RAM!!!
*/
return NULL;
}
/* Retry with smaller */
return bio_alloc(GFP_NOIO, MAX_LINES_PER_IO);
}
/*
*
*/
static void cas_bd_io_end(struct ocf_io *io, int error)
{
struct blkio *bdio = cas_io_to_blkio(io);
if (error)
bdio->error |= error;
if (atomic_dec_return(&bdio->rq_remaning))
return;
CAS_DEBUG_MSG("Completion");
/* Send completion to caller */
io->end(io, bdio->error);
/* Free allocated structures */
ocf_io_put(io);
}
/*
*
*/
CAS_DECLARE_BLOCK_CALLBACK(cas_bd_io_end, struct bio *bio,
unsigned int bytes_done, int error)
{
struct ocf_io *io;
struct blkio *bdio;
struct bd_object *bdobj;
int err;
BUG_ON(!bio);
BUG_ON(!bio->bi_private);
CAS_BLOCK_CALLBACK_INIT(bio);
io = bio->bi_private;
bdobj = bd_object(io->volume);
BUG_ON(!bdobj);
err = CAS_BLOCK_CALLBACK_ERROR(bio, error);
bdio = cas_io_to_blkio(io);
BUG_ON(!bdio);
CAS_DEBUG_TRACE();
if (err)
goto out;
if (bdio->dir == OCF_WRITE) {
/* IO was a write */
if (!cas_blk_is_flush_io(io->flags)) {
/* Device cache is dirty, mark it */
atomic_inc(&bdobj->potentially_dirty);
} else {
/* IO flush finished, update potential
* dirty state
*/
atomic_sub(bdio->dirty, &bdobj->potentially_dirty);
}
}
out:
if (err == -EOPNOTSUPP && (CAS_BIO_OP_FLAGS(bio) & CAS_BIO_DISCARD))
err = 0;
cas_bd_io_end(io, err);
bio_put(bio);
CAS_BLOCK_CALLBACK_RETURN();
}
static void block_dev_submit_flush(struct ocf_io *io)
{
#ifdef CAS_FLUSH_SUPPORTED
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bdev = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = NULL;
blkio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!blkio->dirty) {
/* Didn't write anything to underlying disk; no need to
* send req_flush
*/
goto out;
}
if (q == NULL) {
/* No queue, error */
blkio->error = -EINVAL;
goto out;
}
if (!CAS_CHECK_QUEUE_FLUSH(q)) {
/* This block device does not support flush, call back */
atomic_sub(blkio->dirty, &bdobj->potentially_dirty);
goto out;
}
bio = bio_alloc(GFP_NOIO, 0);
if (bio == NULL) {
CAS_PRINT_RL(KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
goto out;
}
blkio->dir = io->dir;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end);
CAS_BIO_SET_DEV(bio, bdev);
bio->bi_private = io;
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_WRITE_FLUSH, bio);
out:
cas_bd_io_end(io, blkio->error);
#else
/* Running operating system without support for REQ_FLUSH
* (i.e. SLES 11 SP 1) CAS cannot use flushing requests to
* handle power-fail safe Write-Back
*/
io->end(io, -ENOTSUPP);
/* on SLES 11 SP 1 powerfail safety can only be achieved
* through disabling volatile write cache of disk itself.
*/
#endif
}
void block_dev_submit_discard(struct ocf_io *io)
{
struct blkio *blkio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct block_device *bd = bdobj->btm_bd;
struct request_queue *q = bdev_get_queue(bd);
struct bio *bio = NULL;
unsigned int max_discard_sectors, granularity, bio_sects;
int alignment;
sector_t sects, start, end, tmp;
/* Prevent races of completing IO */
atomic_set(&blkio->rq_remaning, 1);
/* Increase IO reference counter for FLUSH IO */
ocf_io_get(io);
if (!q) {
/* No queue, error */
blkio->error = -ENXIO;
goto out;
}
if (!blk_queue_discard(q)) {
/* Discard is not supported by bottom device, send completion
* to caller
*/
goto out;
}
granularity = max(q->limits.discard_granularity >> SECTOR_SHIFT, 1U);
alignment = (bdev_discard_alignment(bd) >> SECTOR_SHIFT) % granularity;
max_discard_sectors =
min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT);
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto out;
sects = io->bytes >> SECTOR_SHIFT;
start = io->addr >> SECTOR_SHIFT;
while (sects) {
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
CAS_PRINT_RL(CAS_KERN_ERR "Couldn't allocate memory for BIO\n");
blkio->error = -ENOMEM;
break;
}
bio_sects = min_t(sector_t, sects, max_discard_sectors);
end = start + bio_sects;
tmp = end;
if (bio_sects < sects &&
sector_div(tmp, granularity) != alignment) {
end = end - alignment;
sector_div(end, granularity);
end = end * granularity + alignment;
bio_sects = end - start;
}
CAS_BIO_SET_DEV(bio, bd);
CAS_BIO_BISECTOR(bio) = start;
CAS_BIO_BISIZE(bio) = bio_sects << SECTOR_SHIFT;
bio->bi_next = NULL;
bio->bi_private = io;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end);
atomic_inc(&blkio->rq_remaning);
cas_submit_bio(CAS_BIO_DISCARD, bio);
sects -= bio_sects;
start = end;
cond_resched();
}
out:
cas_bd_io_end(io, blkio->error);
}
static inline bool cas_bd_io_prepare(int *dir, struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
/* Setup DIR */
bdio->dir = *dir;
/* Save dirty counter */
bdio->dirty = atomic_read(&bdobj->potentially_dirty);
/* Convert CAS direction into kernel values */
switch (bdio->dir) {
case OCF_READ:
*dir = READ;
break;
case OCF_WRITE:
*dir = WRITE;
break;
default:
bdio->error = -EINVAL;
break;
}
if (!io->bytes) {
/* Don not accept empty request */
CAS_PRINT_RL(KERN_ERR "Invalid zero size IO\n");
bdio->error = -EINVAL;
}
if (bdio->error)
return false;
return true;
}
/*
*
*/
static void block_dev_submit_io(struct ocf_io *io)
{
struct blkio *bdio = cas_io_to_blkio(io);
struct bd_object *bdobj = bd_object(io->volume);
struct bio_vec_iter *iter = &bdio->iter;
uint64_t addr = io->addr;
uint32_t bytes = io->bytes;
int dir = io->dir;
if (!CAS_IS_WRITE_FLUSH_FUA(io->flags) &&
CAS_IS_WRITE_FLUSH(io->flags)) {
CAS_DEBUG_MSG("Flush request");
/* It is flush requests handle it */
block_dev_submit_flush(io);
return;
}
CAS_DEBUG_PARAM("Address = %llu, bytes = %u\n", bdio->addr,
bdio->bytes);
/* Increase IO reference */
ocf_io_get(io);
/* Prevent races of completing IO */
atomic_set(&bdio->rq_remaning, 1);
if (!cas_bd_io_prepare(&dir, io)) {
CAS_DEBUG_MSG("Invalid request");
cas_bd_io_end(io, -EINVAL);
return;
}
while (cas_io_iter_is_next(iter) && bytes) {
/* Still IO vectors to be sent */
/* Allocate BIO */
struct bio *bio = cas_bd_io_alloc_bio(bdio);
if (!bio) {
bdio->error = -ENOMEM;
break;
}
/* Setup BIO */
CAS_BIO_SET_DEV(bio, bdobj->btm_bd);
CAS_BIO_BISECTOR(bio) = addr / SECTOR_SIZE;
bio->bi_next = NULL;
bio->bi_private = io;
CAS_BIO_OP_FLAGS(bio) |= io->flags;
bio->bi_end_io = CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end);
/* Add pages */
while (cas_io_iter_is_next(iter) && bytes) {
struct page *page = cas_io_iter_current_page(iter);
uint32_t offset = cas_io_iter_current_offset(iter);
uint32_t length = cas_io_iter_current_length(iter);
int added;
if (length > bytes)
length = bytes;
added = bio_add_page(bio, page, length, offset);
BUG_ON(added < 0);
if (added == 0) {
/* No more space in BIO, stop adding pages */
break;
}
/* Update address, bytes sent */
bytes -= added;
addr += added;
/* Update BIO vector iterator */
if (added != cas_io_iter_move(iter, added)) {
bdio->error = -ENOBUFS;
break;
}
}
if (bdio->error == 0) {
/* Increase IO reference for sending this IO */
atomic_inc(&bdio->rq_remaning);
/* Send BIO */
CAS_DEBUG_MSG("Submit IO");
cas_submit_bio(dir, bio);
bio = NULL;
} else {
if (bio) {
bio_put(bio);
bio = NULL;
}
/* ERROR, stop processed */
break;
}
}
if (bytes && bdio->error == 0) {
/* Not all bytes sent, mark error */
bdio->error = -ENOBUFS;
}
/* Prevent races of completing IO when
* there are still child IOs not being send.
*/
cas_bd_io_end(io, 0);
}
const struct ocf_volume_properties cas_object_blk_properties = {
.name = "Block Device",
.io_priv_size = sizeof(struct blkio),
.volume_priv_size = sizeof(struct bd_object),
.caps = {
.atomic_writes = 0, /* Atomic writes not supported */
},
.ops = {
.submit_io = block_dev_submit_io,
.submit_flush = block_dev_submit_flush,
.submit_metadata = NULL,
.submit_discard = block_dev_submit_discard,
.open = block_dev_open_object,
.close = block_dev_close_object,
.get_max_io_size = block_dev_get_max_io_size,
.get_length = block_dev_get_byte_length,
},
.io_ops = {
.set_data = cas_blk_io_set_data,
.get_data = cas_blk_io_get_data,
},
};
int block_dev_init(void)
{
int ret;
ret = ocf_ctx_register_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME,
&cas_object_blk_properties);
if (ret < 0)
return ret;
return 0;
}
void block_dev_deinit(void)
{
ocf_ctx_unregister_volume_type(cas_ctx, BLOCK_DEVICE_VOLUME);
}
int block_dev_try_get_io_class(struct bio *bio, int *io_class)
{
struct ocf_io *io;
if (bio->bi_end_io != CAS_REFER_BLOCK_CALLBACK(cas_bd_io_end))
return -1;
io = bio->bi_private;
*io_class = io->io_class;
return 0;
}