/**
* submit_bio - submit a bio to the block device layer for I/O
* @bio: The &struct bio which describes the I/O
*
* submit_bio() is very similar in purpose to generic_make_request(), and
* uses that function to do most of the work. Both are fairly rough
* interfaces; @bio must be presetup and ready for I/O.
*
*/
blk_qc_t submit_bio(struct bio *bio)
{
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio)) {
unsigned int count;
if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
if (unlikely(block_dump)) {
char b[BDEVNAME_SIZE];
printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
current->comm, task_pid_nr(current),
op_is_write(bio_op(bio)) ? "WRITE" : "READ",
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(bio->bi_bdev, b),
count);
}
}
return generic_make_request(bio);
}
/**
* generic_make_request - hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
*
* generic_make_request() is used to make I/O requests of block
* devices. It is passed a &struct bio, which describes the I/O that needs
* to be done.
*
* generic_make_request() does not return any status. The
* success/failure status of the request, along with notification of
* completion, is delivered asynchronously through the bio->bi_end_io
* function described (one day) else where.
*
* The caller of generic_make_request must make sure that bi_io_vec
* are set to describe the memory buffer, and that bi_dev and bi_sector are
* set to describe the device address, and the
* bi_end_io and optionally bi_private are set to describe how
* completion notification should be signaled.
*
* generic_make_request and the drivers it calls may use bi_next if this
* bio happens to be merged with someone else, and may resubmit the bio to
* a lower device by calling into generic_make_request recursively, which
* means the bio should NOT be touched after the call to ->make_request_fn.
*/
blk_qc_t generic_make_request(struct bio *bio)
{
struct bio_list bio_list_on_stack;
blk_qc_t ret = BLK_QC_T_NONE;
if (!generic_make_request_checks(bio))
goto out;
/*
* We only want one ->make_request_fn to be active at a time, else
* stack usage with stacked devices could be a problem. So use
* current->bio_list to keep a list of requests submited by a
* make_request_fn function. current->bio_list is also used as a
* flag to say if generic_make_request is currently active in this
* task or not. If it is NULL, then no make_request is active. If
* it is non-NULL, then a make_request is active, and new requests
* should be added at the tail
*/
if (current->bio_list) {
bio_list_add(current->bio_list, bio);
goto out;
}
/* following loop may be a bit non-obvious, and so deserves some
* explanation.
* Before entering the loop, bio->bi_next is NULL (as all callers
* ensure that) so we have a list with a single bio.
* We pretend that we have just taken it off a longer list, so
* we assign bio_list to a pointer to the bio_list_on_stack,
* thus initialising the bio_list of new bios to be
* added. ->make_request() may indeed add some more bios
* through a recursive call to generic_make_request. If it
* did, we find a non-NULL value in bio_list and re-enter the loop
* from the top. In this case we really did just take the bio
* of the top of the list (no pretending) and so remove it from
* bio_list, and call into ->make_request() again.
*/
BUG_ON(bio->bi_next);
bio_list_init(&bio_list_on_stack);
current->bio_list = &bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, false) == 0)) {
ret = q->make_request_fn(q, bio);
blk_queue_exit(q);
bio = bio_list_pop(current->bio_list);
} else {
struct bio *bio_next = bio_list_pop(current->bio_list);
bio_io_error(bio);
bio = bio_next;
}
} while (bio);
current->bio_list = NULL; /* deactivate */
out:
return ret;
}
static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
{
const bool sync = !!(bio->bi_opf & REQ_SYNC);
struct blk_plug *plug;
int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
struct request *req;
unsigned int request_count = 0;
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
* ISA dma in theory)
*/
blk_queue_bounce(q, &bio);
blk_queue_split(q, &bio, q->bio_split);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio->bi_error = -EIO;
bio_endio(bio);
return BLK_QC_T_NONE;
}
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) {
spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
}
/*
* Check if we can merge with the plugged list before grabbing
* any locks.
*/
if (!blk_queue_nomerges(q)) {
if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
return BLK_QC_T_NONE;
} else
request_count = blk_plug_queued_count(q);
spin_lock_irq(q->queue_lock);
el_ret = elv_merge(q, &req, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
if (bio_attempt_front_merge(q, req, bio)) {
elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
}
get_rq:
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
if (sync)
rw_flags |= REQ_SYNC;
/*
* Add in META/PRIO flags, if set, before we get to the IO scheduler
*/
rw_flags |= (bio->bi_opf & (REQ_META | REQ_PRIO));
/*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
if (IS_ERR(req)) {
bio->bi_error = PTR_ERR(req);
bio_endio(bio);
goto out_unlock;
}
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
* We don't worry about that case for efficiency. It won't happen
* often, and the elevators are able to handle it.
*/
init_request_from_bio(req, bio);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
req->cpu = raw_smp_processor_id();
plug = current->plug;
if (plug) {
/*
* If this is the first request added after a plug, fire
* of a plug trace.
*/
if (!request_count)
trace_block_plug(q);
else {
if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
}
list_add_tail(&req->queuelist, &plug->list);
blk_account_io_start(req, true);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
__blk_run_queue(q);
out_unlock:
spin_unlock_irq(q->queue_lock);
}
return BLK_QC_T_NONE;
}