X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=blobdiff_plain;f=block%2Fbfq-iosched.h;h=a73488eec8a47b0e66028243163b302165ecc51a;hp=ad8e513d7e8764dfa39f6649e8363f1cd5ac642c;hb=HEAD;hpb=b8dcef877ab5f2637fccd3efb6fe169c8211961a diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ad8e513d7e87..467e8cfc41a2 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -20,7 +20,6 @@ #define BFQ_DEFAULT_QUEUE_IOPRIO 4 -#define BFQ_WEIGHT_LEGACY_DFL 100 #define BFQ_DEFAULT_GRP_IOPRIO 0 #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE @@ -33,6 +32,14 @@ */ #define BFQ_SOFTRT_WEIGHT_FACTOR 100 +/* + * Maximum number of actuators supported. This constant is used simply + * to define the size of the static array that will contain + * per-actuator data. The current value is hopefully a good upper + * bound to the possible number of actuators of any actual drive. + */ +#define BFQ_MAX_ACTUATORS 8 + struct bfq_entity; /** @@ -197,8 +204,10 @@ struct bfq_entity { /* flag, set to request a weight, ioprio or ioprio_class change */ int prio_changed; +#ifdef CONFIG_BFQ_GROUP_IOSCHED /* flag, set if the entity is counted in groups_with_pending_reqs */ bool in_groups_with_pending_reqs; +#endif /* last child queue of entity created (for non-leaf entities) */ struct bfq_queue *last_bfqq_created; @@ -225,12 +234,14 @@ struct bfq_ttime { * struct bfq_queue - leaf schedulable entity. * * A bfq_queue is a leaf request queue; it can be associated with an - * io_context or more, if it is async or shared between cooperating - * processes. @cgroup holds a reference to the cgroup, to be sure that it - * does not disappear while a bfqq still references it (mostly to avoid - * races between request issuing and task migration followed by cgroup - * destruction). - * All the fields are protected by the queue lock of the containing bfqd. + * io_context or more, if it is async or shared between cooperating + * processes. Besides, it contains I/O requests for only one actuator + * (an io_context is associated with a different bfq_queue for each + * actuator it generates I/O for). @cgroup holds a reference to the + * cgroup, to be sure that it does not disappear while a bfqq still + * references it (mostly to avoid races between request issuing and + * task migration followed by cgroup destruction). All the fields are + * protected by the queue lock of the containing bfqd. */ struct bfq_queue { /* reference counter */ @@ -369,12 +380,8 @@ struct bfq_queue { unsigned long split_time; /* time of last split */ unsigned long first_IO_time; /* time of first I/O for this queue */ - unsigned long creation_time; /* when this queue is created */ - /* max service rate measured so far */ - u32 max_service_rate; - /* * Pointer to the waker queue for this queue, i.e., to the * queue Q such that this queue happens to get new I/O right @@ -399,24 +406,18 @@ struct bfq_queue { * the woken queues when this queue exits. */ struct hlist_head woken_list; + + /* index of the actuator this queue is associated with */ + unsigned int actuator_idx; }; /** - * struct bfq_io_cq - per (request_queue, io_context) structure. - */ -struct bfq_io_cq { - /* associated io_cq structure */ - struct io_cq icq; /* must be the first member */ - /* array of two process queues, the sync and the async */ - struct bfq_queue *bfqq[2]; - /* per (request_queue, blkcg) ioprio */ - int ioprio; -#ifdef CONFIG_BFQ_GROUP_IOSCHED - uint64_t blkcg_serial_nr; /* the current blkcg serial */ -#endif +* struct bfq_data - bfqq data unique and persistent for associated bfq_io_cq +*/ +struct bfq_iocq_bfqq_data { /* * Snapshot of the has_short_time flag before merging; taken - * to remember its value while the queue is merged, so as to + * to remember its values while the queue is merged, so as to * be able to restore it in case of split. */ bool saved_has_short_ttime; @@ -430,7 +431,7 @@ struct bfq_io_cq { u64 saved_tot_idle_time; /* - * Same purpose as the previous fields for the value of the + * Same purpose as the previous fields for the values of the * field keeping the queue's belonging to a large burst */ bool saved_in_large_burst; @@ -468,6 +469,38 @@ struct bfq_io_cq { struct bfq_queue *stable_merge_bfqq; bool stably_merged; /* non splittable if true */ +}; + +/** + * struct bfq_io_cq - per (request_queue, io_context) structure. + */ +struct bfq_io_cq { + /* associated io_cq structure */ + struct io_cq icq; /* must be the first member */ + /* + * Matrix of associated process queues: first row for async + * queues, second row sync queues. Each row contains one + * column for each actuator. An I/O request generated by the + * process is inserted into the queue pointed by bfqq[i][j] if + * the request is to be served by the j-th actuator of the + * drive, where i==0 or i==1, depending on whether the request + * is async or sync. So there is a distinct queue for each + * actuator. + */ + struct bfq_queue *bfqq[2][BFQ_MAX_ACTUATORS]; + /* per (request_queue, blkcg) ioprio */ + int ioprio; +#ifdef CONFIG_BFQ_GROUP_IOSCHED + uint64_t blkcg_serial_nr; /* the current blkcg serial */ +#endif + + /* + * Persistent data for associated synchronous process queues + * (one queue per actuator, see field bfqq above). In + * particular, each of these queues may undergo a merge. + */ + struct bfq_iocq_bfqq_data bfqq_data[BFQ_MAX_ACTUATORS]; + unsigned int requests; /* Number of requests this process has in flight */ }; @@ -495,28 +528,29 @@ struct bfq_data { */ struct rb_root_cached queue_weights_tree; +#ifdef CONFIG_BFQ_GROUP_IOSCHED /* - * Number of groups with at least one descendant process that + * Number of groups with at least one process that * has at least one request waiting for completion. Note that * this accounts for also requests already dispatched, but not * yet completed. Therefore this number of groups may differ * (be larger) than the number of active groups, as a group is * considered active only if its corresponding entity has - * descendant queues with at least one request queued. This + * queues with at least one request queued. This * number is used to decide whether a scenario is symmetric. * For a detailed explanation see comments on the computation * of the variable asymmetric_scenario in the function * bfq_better_to_idle(). * * However, it is hard to compute this number exactly, for - * groups with multiple descendant processes. Consider a group - * that is inactive, i.e., that has no descendant process with + * groups with multiple processes. Consider a group + * that is inactive, i.e., that has no process with * pending I/O inside BFQ queues. Then suppose that * num_groups_with_pending_reqs is still accounting for this - * group, because the group has descendant processes with some + * group, because the group has processes with some * I/O request still in flight. num_groups_with_pending_reqs * should be decremented when the in-flight request of the - * last descendant process is finally completed (assuming that + * last process is finally completed (assuming that * nothing else has changed for the group in the meantime, in * terms of composition of the group and active/inactive state of child * groups and processes). To accomplish this, an additional @@ -525,7 +559,7 @@ struct bfq_data { * we resort to the following tradeoff between simplicity and * accuracy: for an inactive group that is still counted in * num_groups_with_pending_reqs, we decrement - * num_groups_with_pending_reqs when the first descendant + * num_groups_with_pending_reqs when the first * process of the group remains with no request waiting for * completion. * @@ -533,15 +567,16 @@ struct bfq_data { * carefulness: to avoid multiple decrements, we flag a group, * more precisely an entity representing a group, as still * counted in num_groups_with_pending_reqs when it becomes - * inactive. Then, when the first descendant queue of the + * inactive. Then, when the first queue of the * entity remains with no request waiting for completion, * num_groups_with_pending_reqs is decremented, and this flag * is reset. After this flag is reset for the entity, * num_groups_with_pending_reqs won't be decremented any - * longer in case a new descendant queue of the entity remains + * longer in case a new queue of the entity remains * with no request waiting for completion. */ unsigned int num_groups_with_pending_reqs; +#endif /* * Per-class (RT, BE, IDLE) number of bfq_queues containing @@ -554,7 +589,12 @@ struct bfq_data { /* number of queued requests */ int queued; /* number of requests dispatched and waiting for completion */ - int rq_in_driver; + int tot_rq_in_driver; + /* + * number of requests dispatched and waiting for completion + * for each actuator + */ + int rq_in_driver[BFQ_MAX_ACTUATORS]; /* true if the device is non rotational and performs queueing */ bool nonrot_with_queueing; @@ -648,8 +688,13 @@ struct bfq_data { /* maximum budget allotted to a bfq_queue before rescheduling */ int bfq_max_budget; - /* list of all the bfq_queues active on the device */ - struct list_head active_list; + /* + * List of all the bfq_queues active for a specific actuator + * on the device. Keeping active queues separate on a + * per-actuator basis helps implementing per-actuator + * injection more efficiently. + */ + struct list_head active_list[BFQ_MAX_ACTUATORS]; /* list of all the bfq_queues idle on the device */ struct list_head idle_list; @@ -723,8 +768,6 @@ struct bfq_data { * is multiplied. */ unsigned int bfq_wr_coeff; - /* maximum duration of a weight-raising period (jiffies) */ - unsigned int bfq_wr_max_time; /* Maximum weight-raising duration for soft real-time processes */ unsigned int bfq_wr_rt_max_time; @@ -772,6 +815,42 @@ struct bfq_data { */ unsigned int word_depths[2][2]; unsigned int full_depth_shift; + + /* + * Number of independent actuators. This is equal to 1 in + * case of single-actuator drives. + */ + unsigned int num_actuators; + /* + * Disk independent access ranges for each actuator + * in this device. + */ + sector_t sector[BFQ_MAX_ACTUATORS]; + sector_t nr_sectors[BFQ_MAX_ACTUATORS]; + struct blk_independent_access_range ia_ranges[BFQ_MAX_ACTUATORS]; + + /* + * If the number of I/O requests queued in the device for a + * given actuator is below next threshold, then the actuator + * is deemed as underutilized. If this condition is found to + * hold for some actuator upon a dispatch, but (i) the + * in-service queue does not contain I/O for that actuator, + * while (ii) some other queue does contain I/O for that + * actuator, then the head I/O request of the latter queue is + * returned (injected), instead of the head request of the + * currently in-service queue. + * + * We set the threshold, empirically, to the minimum possible + * value for which an actuator is fully utilized, or close to + * be fully utilized. By doing so, injected I/O 'steals' as + * few drive-queue slots as possibile to the in-service + * queue. This reduces as much as possible the probability + * that the service of I/O from the in-service bfq_queue gets + * delayed because of slot exhaustion, i.e., because all the + * slots of the drive queue are filled with I/O injected from + * other queues (NCQ provides for 32 slots). + */ + unsigned int actuator_load_threshold; }; enum bfqq_state_flags { @@ -928,21 +1007,20 @@ struct bfq_group { char blkg_path[128]; /* reference counter (see comments in bfq_bic_update_cgroup) */ - int ref; - /* Is bfq_group still online? */ - bool online; + refcount_t ref; struct bfq_entity entity; struct bfq_sched_data sched_data; - void *bfqd; + struct bfq_data *bfqd; - struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS]; - struct bfq_queue *async_idle_bfqq; + struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS][BFQ_MAX_ACTUATORS]; + struct bfq_queue *async_idle_bfqq[BFQ_MAX_ACTUATORS]; struct bfq_entity *my_entity; int active_entities; + int num_queues_with_pending_reqs; struct rb_root rq_pos_tree; @@ -954,8 +1032,8 @@ struct bfq_group { struct bfq_entity entity; struct bfq_sched_data sched_data; - struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS]; - struct bfq_queue *async_idle_bfqq; + struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS][BFQ_MAX_ACTUATORS]; + struct bfq_queue *async_idle_bfqq[BFQ_MAX_ACTUATORS]; struct rb_root rq_pos_tree; }; @@ -968,17 +1046,14 @@ struct bfq_group { extern const int bfq_timeout; -struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync); -void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync); +struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync, + unsigned int actuator_idx); +void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync, + unsigned int actuator_idx); struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic); void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); -void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq, - struct rb_root_cached *root); -void __bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct rb_root_cached *root); -void bfq_weights_tree_remove(struct bfq_data *bfqd, - struct bfq_queue *bfqq); +void bfq_weights_tree_add(struct bfq_queue *bfqq); +void bfq_weights_tree_remove(struct bfq_queue *bfqq); void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); @@ -993,20 +1068,23 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); /* ---------------- cgroups-support interface ---------------- */ void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq); -void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - blk_opf_t opf); void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf); void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf); void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf); void bfqg_stats_update_dequeue(struct bfq_group *bfqg); -void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); -void bfqg_stats_update_idle_time(struct bfq_group *bfqg); void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg); -void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg); void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg); +#ifdef CONFIG_BFQ_CGROUP_DEBUG +void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, + blk_opf_t opf); +void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); +void bfqg_stats_update_idle_time(struct bfq_group *bfqg); +void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg); +#endif + void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); @@ -1077,9 +1155,10 @@ void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool expiration); -void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bool expiration); -void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq); +void bfq_del_bfqq_busy(struct bfq_queue *bfqq, bool expiration); +void bfq_add_bfqq_busy(struct bfq_queue *bfqq); +void bfq_add_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq); +void bfq_del_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq); /* --------------- end of interface of B-WF2Q+ ---------------- */