1 // SPDX-License-Identifier: GPL-2.0
5 * Author: SeongJae Park <sjpark@amazon.de>
8 #define pr_fmt(fmt) "damon: " fmt
10 #include <linux/damon.h>
11 #include <linux/delay.h>
12 #include <linux/kthread.h>
13 #include <linux/random.h>
14 #include <linux/slab.h>
16 /* Get a random number in [l, r) */
17 #define damon_rand(l, r) (l + prandom_u32_max(r - l))
19 static DEFINE_MUTEX(damon_lock);
20 static int nr_running_ctxs;
23 * Construct a damon_region struct
25 * Returns the pointer to the new struct if success, or NULL otherwise
27 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
29 struct damon_region *region;
31 region = kmalloc(sizeof(*region), GFP_KERNEL);
35 region->ar.start = start;
37 region->nr_accesses = 0;
38 INIT_LIST_HEAD(®ion->list);
44 * Add a region between two other regions
46 inline void damon_insert_region(struct damon_region *r,
47 struct damon_region *prev, struct damon_region *next,
48 struct damon_target *t)
50 __list_add(&r->list, &prev->list, &next->list);
54 void damon_add_region(struct damon_region *r, struct damon_target *t)
56 list_add_tail(&r->list, &t->regions_list);
60 static void damon_del_region(struct damon_region *r, struct damon_target *t)
66 static void damon_free_region(struct damon_region *r)
71 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
73 damon_del_region(r, t);
78 * Construct a damon_target struct
80 * Returns the pointer to the new struct if success, or NULL otherwise
82 struct damon_target *damon_new_target(unsigned long id)
84 struct damon_target *t;
86 t = kmalloc(sizeof(*t), GFP_KERNEL);
92 INIT_LIST_HEAD(&t->regions_list);
97 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
99 list_add_tail(&t->list, &ctx->adaptive_targets);
102 static void damon_del_target(struct damon_target *t)
107 void damon_free_target(struct damon_target *t)
109 struct damon_region *r, *next;
111 damon_for_each_region_safe(r, next, t)
112 damon_free_region(r);
116 void damon_destroy_target(struct damon_target *t)
119 damon_free_target(t);
122 unsigned int damon_nr_regions(struct damon_target *t)
124 return t->nr_regions;
127 struct damon_ctx *damon_new_ctx(void)
129 struct damon_ctx *ctx;
131 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
135 ctx->sample_interval = 5 * 1000;
136 ctx->aggr_interval = 100 * 1000;
137 ctx->primitive_update_interval = 60 * 1000 * 1000;
139 ktime_get_coarse_ts64(&ctx->last_aggregation);
140 ctx->last_primitive_update = ctx->last_aggregation;
142 mutex_init(&ctx->kdamond_lock);
144 ctx->min_nr_regions = 10;
145 ctx->max_nr_regions = 1000;
147 INIT_LIST_HEAD(&ctx->adaptive_targets);
152 static void damon_destroy_targets(struct damon_ctx *ctx)
154 struct damon_target *t, *next_t;
156 if (ctx->primitive.cleanup) {
157 ctx->primitive.cleanup(ctx);
161 damon_for_each_target_safe(t, next_t, ctx)
162 damon_destroy_target(t);
165 void damon_destroy_ctx(struct damon_ctx *ctx)
167 damon_destroy_targets(ctx);
172 * damon_set_attrs() - Set attributes for the monitoring.
173 * @ctx: monitoring context
174 * @sample_int: time interval between samplings
175 * @aggr_int: time interval between aggregations
176 * @primitive_upd_int: time interval between monitoring primitive updates
177 * @min_nr_reg: minimal number of regions
178 * @max_nr_reg: maximum number of regions
180 * This function should not be called while the kdamond is running.
181 * Every time interval is in micro-seconds.
183 * Return: 0 on success, negative error code otherwise.
185 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
186 unsigned long aggr_int, unsigned long primitive_upd_int,
187 unsigned long min_nr_reg, unsigned long max_nr_reg)
189 if (min_nr_reg < 3) {
190 pr_err("min_nr_regions (%lu) must be at least 3\n",
194 if (min_nr_reg > max_nr_reg) {
195 pr_err("invalid nr_regions. min (%lu) > max (%lu)\n",
196 min_nr_reg, max_nr_reg);
200 ctx->sample_interval = sample_int;
201 ctx->aggr_interval = aggr_int;
202 ctx->primitive_update_interval = primitive_upd_int;
203 ctx->min_nr_regions = min_nr_reg;
204 ctx->max_nr_regions = max_nr_reg;
209 /* Returns the size upper limit for each monitoring region */
210 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
212 struct damon_target *t;
213 struct damon_region *r;
214 unsigned long sz = 0;
216 damon_for_each_target(t, ctx) {
217 damon_for_each_region(r, t)
218 sz += r->ar.end - r->ar.start;
221 if (ctx->min_nr_regions)
222 sz /= ctx->min_nr_regions;
223 if (sz < DAMON_MIN_REGION)
224 sz = DAMON_MIN_REGION;
229 static bool damon_kdamond_running(struct damon_ctx *ctx)
233 mutex_lock(&ctx->kdamond_lock);
234 running = ctx->kdamond != NULL;
235 mutex_unlock(&ctx->kdamond_lock);
240 static int kdamond_fn(void *data);
243 * __damon_start() - Starts monitoring with given context.
244 * @ctx: monitoring context
246 * This function should be called while damon_lock is hold.
248 * Return: 0 on success, negative error code otherwise.
250 static int __damon_start(struct damon_ctx *ctx)
254 mutex_lock(&ctx->kdamond_lock);
257 ctx->kdamond_stop = false;
258 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
260 if (IS_ERR(ctx->kdamond)) {
261 err = PTR_ERR(ctx->kdamond);
265 mutex_unlock(&ctx->kdamond_lock);
271 * damon_start() - Starts the monitorings for a given group of contexts.
272 * @ctxs: an array of the pointers for contexts to start monitoring
273 * @nr_ctxs: size of @ctxs
275 * This function starts a group of monitoring threads for a group of monitoring
276 * contexts. One thread per each context is created and run in parallel. The
277 * caller should handle synchronization between the threads by itself. If a
278 * group of threads that created by other 'damon_start()' call is currently
279 * running, this function does nothing but returns -EBUSY.
281 * Return: 0 on success, negative error code otherwise.
283 int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
288 mutex_lock(&damon_lock);
289 if (nr_running_ctxs) {
290 mutex_unlock(&damon_lock);
294 for (i = 0; i < nr_ctxs; i++) {
295 err = __damon_start(ctxs[i]);
300 mutex_unlock(&damon_lock);
306 * __damon_stop() - Stops monitoring of given context.
307 * @ctx: monitoring context
309 * Return: 0 on success, negative error code otherwise.
311 static int __damon_stop(struct damon_ctx *ctx)
313 mutex_lock(&ctx->kdamond_lock);
315 ctx->kdamond_stop = true;
316 mutex_unlock(&ctx->kdamond_lock);
317 while (damon_kdamond_running(ctx))
318 usleep_range(ctx->sample_interval,
319 ctx->sample_interval * 2);
322 mutex_unlock(&ctx->kdamond_lock);
328 * damon_stop() - Stops the monitorings for a given group of contexts.
329 * @ctxs: an array of the pointers for contexts to stop monitoring
330 * @nr_ctxs: size of @ctxs
332 * Return: 0 on success, negative error code otherwise.
334 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
338 for (i = 0; i < nr_ctxs; i++) {
339 /* nr_running_ctxs is decremented in kdamond_fn */
340 err = __damon_stop(ctxs[i]);
349 * damon_check_reset_time_interval() - Check if a time interval is elapsed.
350 * @baseline: the time to check whether the interval has elapsed since
351 * @interval: the time interval (microseconds)
353 * See whether the given time interval has passed since the given baseline
354 * time. If so, it also updates the baseline to current time for next check.
356 * Return: true if the time interval has passed, or false otherwise.
358 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
359 unsigned long interval)
361 struct timespec64 now;
363 ktime_get_coarse_ts64(&now);
364 if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
372 * Check whether it is time to flush the aggregated information
374 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
376 return damon_check_reset_time_interval(&ctx->last_aggregation,
381 * Reset the aggregated monitoring results ('nr_accesses' of each region).
383 static void kdamond_reset_aggregated(struct damon_ctx *c)
385 struct damon_target *t;
387 damon_for_each_target(t, c) {
388 struct damon_region *r;
390 damon_for_each_region(r, t)
395 #define sz_damon_region(r) (r->ar.end - r->ar.start)
398 * Merge two adjacent regions into one region
400 static void damon_merge_two_regions(struct damon_target *t,
401 struct damon_region *l, struct damon_region *r)
403 unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
405 l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
407 l->ar.end = r->ar.end;
408 damon_destroy_region(r, t);
411 #define diff_of(a, b) (a > b ? a - b : b - a)
414 * Merge adjacent regions having similar access frequencies
416 * t target affected by this merge operation
417 * thres '->nr_accesses' diff threshold for the merge
418 * sz_limit size upper limit of each region
420 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
421 unsigned long sz_limit)
423 struct damon_region *r, *prev = NULL, *next;
425 damon_for_each_region_safe(r, next, t) {
426 if (prev && prev->ar.end == r->ar.start &&
427 diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
428 sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
429 damon_merge_two_regions(t, prev, r);
436 * Merge adjacent regions having similar access frequencies
438 * threshold '->nr_accesses' diff threshold for the merge
439 * sz_limit size upper limit of each region
441 * This function merges monitoring target regions which are adjacent and their
442 * access frequencies are similar. This is for minimizing the monitoring
443 * overhead under the dynamically changeable access pattern. If a merge was
444 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
446 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
447 unsigned long sz_limit)
449 struct damon_target *t;
451 damon_for_each_target(t, c)
452 damon_merge_regions_of(t, threshold, sz_limit);
456 * Split a region in two
458 * r the region to be split
459 * sz_r size of the first sub-region that will be made
461 static void damon_split_region_at(struct damon_ctx *ctx,
462 struct damon_target *t, struct damon_region *r,
465 struct damon_region *new;
467 new = damon_new_region(r->ar.start + sz_r, r->ar.end);
471 r->ar.end = new->ar.start;
473 damon_insert_region(new, r, damon_next_region(r), t);
476 /* Split every region in the given target into 'nr_subs' regions */
477 static void damon_split_regions_of(struct damon_ctx *ctx,
478 struct damon_target *t, int nr_subs)
480 struct damon_region *r, *next;
481 unsigned long sz_region, sz_sub = 0;
484 damon_for_each_region_safe(r, next, t) {
485 sz_region = r->ar.end - r->ar.start;
487 for (i = 0; i < nr_subs - 1 &&
488 sz_region > 2 * DAMON_MIN_REGION; i++) {
490 * Randomly select size of left sub-region to be at
491 * least 10 percent and at most 90% of original region
493 sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
494 sz_region / 10, DAMON_MIN_REGION);
495 /* Do not allow blank region */
496 if (sz_sub == 0 || sz_sub >= sz_region)
499 damon_split_region_at(ctx, t, r, sz_sub);
506 * Split every target region into randomly-sized small regions
508 * This function splits every target region into random-sized small regions if
509 * current total number of the regions is equal or smaller than half of the
510 * user-specified maximum number of regions. This is for maximizing the
511 * monitoring accuracy under the dynamically changeable access patterns. If a
512 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
515 static void kdamond_split_regions(struct damon_ctx *ctx)
517 struct damon_target *t;
518 unsigned int nr_regions = 0;
519 static unsigned int last_nr_regions;
520 int nr_subregions = 2;
522 damon_for_each_target(t, ctx)
523 nr_regions += damon_nr_regions(t);
525 if (nr_regions > ctx->max_nr_regions / 2)
528 /* Maybe the middle of the region has different access frequency */
529 if (last_nr_regions == nr_regions &&
530 nr_regions < ctx->max_nr_regions / 3)
533 damon_for_each_target(t, ctx)
534 damon_split_regions_of(ctx, t, nr_subregions);
536 last_nr_regions = nr_regions;
540 * Check whether it is time to check and apply the target monitoring regions
542 * Returns true if it is.
544 static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
546 return damon_check_reset_time_interval(&ctx->last_primitive_update,
547 ctx->primitive_update_interval);
551 * Check whether current monitoring should be stopped
553 * The monitoring is stopped when either the user requested to stop, or all
554 * monitoring targets are invalid.
556 * Returns true if need to stop current monitoring.
558 static bool kdamond_need_stop(struct damon_ctx *ctx)
560 struct damon_target *t;
563 mutex_lock(&ctx->kdamond_lock);
564 stop = ctx->kdamond_stop;
565 mutex_unlock(&ctx->kdamond_lock);
569 if (!ctx->primitive.target_valid)
572 damon_for_each_target(t, ctx) {
573 if (ctx->primitive.target_valid(t))
580 static void set_kdamond_stop(struct damon_ctx *ctx)
582 mutex_lock(&ctx->kdamond_lock);
583 ctx->kdamond_stop = true;
584 mutex_unlock(&ctx->kdamond_lock);
588 * The monitoring daemon that runs as a kernel thread
590 static int kdamond_fn(void *data)
592 struct damon_ctx *ctx = (struct damon_ctx *)data;
593 struct damon_target *t;
594 struct damon_region *r, *next;
595 unsigned int max_nr_accesses = 0;
596 unsigned long sz_limit = 0;
598 mutex_lock(&ctx->kdamond_lock);
599 pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
600 mutex_unlock(&ctx->kdamond_lock);
602 if (ctx->primitive.init)
603 ctx->primitive.init(ctx);
604 if (ctx->callback.before_start && ctx->callback.before_start(ctx))
605 set_kdamond_stop(ctx);
607 sz_limit = damon_region_sz_limit(ctx);
609 while (!kdamond_need_stop(ctx)) {
610 if (ctx->primitive.prepare_access_checks)
611 ctx->primitive.prepare_access_checks(ctx);
612 if (ctx->callback.after_sampling &&
613 ctx->callback.after_sampling(ctx))
614 set_kdamond_stop(ctx);
616 usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
618 if (ctx->primitive.check_accesses)
619 max_nr_accesses = ctx->primitive.check_accesses(ctx);
621 if (kdamond_aggregate_interval_passed(ctx)) {
622 kdamond_merge_regions(ctx,
623 max_nr_accesses / 10,
625 if (ctx->callback.after_aggregation &&
626 ctx->callback.after_aggregation(ctx))
627 set_kdamond_stop(ctx);
628 kdamond_reset_aggregated(ctx);
629 kdamond_split_regions(ctx);
630 if (ctx->primitive.reset_aggregated)
631 ctx->primitive.reset_aggregated(ctx);
634 if (kdamond_need_update_primitive(ctx)) {
635 if (ctx->primitive.update)
636 ctx->primitive.update(ctx);
637 sz_limit = damon_region_sz_limit(ctx);
640 damon_for_each_target(t, ctx) {
641 damon_for_each_region_safe(r, next, t)
642 damon_destroy_region(r, t);
645 if (ctx->callback.before_terminate &&
646 ctx->callback.before_terminate(ctx))
647 set_kdamond_stop(ctx);
648 if (ctx->primitive.cleanup)
649 ctx->primitive.cleanup(ctx);
651 pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
652 mutex_lock(&ctx->kdamond_lock);
654 mutex_unlock(&ctx->kdamond_lock);
656 mutex_lock(&damon_lock);
658 mutex_unlock(&damon_lock);