mm/damon/core.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Data Access Monitor
   4  *
   5  * Author: SeongJae Park <sjpark@amazon.de>
   6  */
   7
   8 #define pr_fmt(fmt) "damon: " fmt
   9
  10 #include <linux/damon.h>
  11 #include <linux/delay.h>
  12 #include <linux/kthread.h>
  13 #include <linux/mm.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16
  17 #define CREATE_TRACE_POINTS
  18 #include <trace/events/damon.h>
  19
  20 #ifdef CONFIG_DAMON_KUNIT_TEST
  21 #undef DAMON_MIN_REGION
  22 #define DAMON_MIN_REGION 1
  23 #endif
  24
  25 static DEFINE_MUTEX(damon_lock);
  26 static int nr_running_ctxs;
  27 static bool running_exclusive_ctxs;
  28
  29 static DEFINE_MUTEX(damon_ops_lock);
  30 static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
  31
  32 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
  33 static bool __damon_is_registered_ops(enum damon_ops_id id)
  34 {
  35         struct damon_operations empty_ops = {};
  36
  37         if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
  38                 return false;
  39         return true;
  40 }
  41
  42 /**
  43  * damon_is_registered_ops() - Check if a given damon_operations is registered.
  44  * @id: Id of the damon_operations to check if registered.
  45  *
  46  * Return: true if the ops is set, false otherwise.
  47  */
  48 bool damon_is_registered_ops(enum damon_ops_id id)
  49 {
  50         bool registered;
  51
  52         if (id >= NR_DAMON_OPS)
  53                 return false;
  54         mutex_lock(&damon_ops_lock);
  55         registered = __damon_is_registered_ops(id);
  56         mutex_unlock(&damon_ops_lock);
  57         return registered;
  58 }
  59
  60 /**
  61  * damon_register_ops() - Register a monitoring operations set to DAMON.
  62  * @ops:        monitoring operations set to register.
  63  *
  64  * This function registers a monitoring operations set of valid &struct
  65  * damon_operations->id so that others can find and use them later.
  66  *
  67  * Return: 0 on success, negative error code otherwise.
  68  */
  69 int damon_register_ops(struct damon_operations *ops)
  70 {
  71         int err = 0;
  72
  73         if (ops->id >= NR_DAMON_OPS)
  74                 return -EINVAL;
  75         mutex_lock(&damon_ops_lock);
  76         /* Fail for already registered ops */
  77         if (__damon_is_registered_ops(ops->id)) {
  78                 err = -EINVAL;
  79                 goto out;
  80         }
  81         damon_registered_ops[ops->id] = *ops;
  82 out:
  83         mutex_unlock(&damon_ops_lock);
  84         return err;
  85 }
  86
  87 /**
  88  * damon_select_ops() - Select a monitoring operations to use with the context.
  89  * @ctx:        monitoring context to use the operations.
  90  * @id:         id of the registered monitoring operations to select.
  91  *
  92  * This function finds registered monitoring operations set of @id and make
  93  * @ctx to use it.
  94  *
  95  * Return: 0 on success, negative error code otherwise.
  96  */
  97 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
  98 {
  99         int err = 0;
 100
 101         if (id >= NR_DAMON_OPS)
 102                 return -EINVAL;
 103
 104         mutex_lock(&damon_ops_lock);
 105         if (!__damon_is_registered_ops(id))
 106                 err = -EINVAL;
 107         else
 108                 ctx->ops = damon_registered_ops[id];
 109         mutex_unlock(&damon_ops_lock);
 110         return err;
 111 }
 112
 113 /*
 114  * Construct a damon_region struct
 115  *
 116  * Returns the pointer to the new struct if success, or NULL otherwise
 117  */
 118 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
 119 {
 120         struct damon_region *region;
 121
 122         region = kmalloc(sizeof(*region), GFP_KERNEL);
 123         if (!region)
 124                 return NULL;
 125
 126         region->ar.start = start;
 127         region->ar.end = end;
 128         region->nr_accesses = 0;
 129         INIT_LIST_HEAD(&region->list);
 130
 131         region->age = 0;
 132         region->last_nr_accesses = 0;
 133
 134         return region;
 135 }
 136
 137 void damon_add_region(struct damon_region *r, struct damon_target *t)
 138 {
 139         list_add_tail(&r->list, &t->regions_list);
 140         t->nr_regions++;
 141 }
 142
 143 static void damon_del_region(struct damon_region *r, struct damon_target *t)
 144 {
 145         list_del(&r->list);
 146         t->nr_regions--;
 147 }
 148
 149 static void damon_free_region(struct damon_region *r)
 150 {
 151         kfree(r);
 152 }
 153
 154 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
 155 {
 156         damon_del_region(r, t);
 157         damon_free_region(r);
 158 }
 159
 160 /*
 161  * Check whether a region is intersecting an address range
 162  *
 163  * Returns true if it is.
 164  */
 165 static bool damon_intersect(struct damon_region *r,
 166                 struct damon_addr_range *re)
 167 {
 168         return !(r->ar.end <= re->start || re->end <= r->ar.start);
 169 }
 170
 171 /*
 172  * damon_set_regions() - Set regions of a target for given address ranges.
 173  * @t:          the given target.
 174  * @ranges:     array of new monitoring target ranges.
 175  * @nr_ranges:  length of @ranges.
 176  *
 177  * This function adds new regions to, or modify existing regions of a
 178  * monitoring target to fit in specific ranges.
 179  *
 180  * Return: 0 if success, or negative error code otherwise.
 181  */
 182 int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
 183                 unsigned int nr_ranges)
 184 {
 185         struct damon_region *r, *next;
 186         unsigned int i;
 187
 188         /* Remove regions which are not in the new ranges */
 189         damon_for_each_region_safe(r, next, t) {
 190                 for (i = 0; i < nr_ranges; i++) {
 191                         if (damon_intersect(r, &ranges[i]))
 192                                 break;
 193                 }
 194                 if (i == nr_ranges)
 195                         damon_destroy_region(r, t);
 196         }
 197
 198         /* Add new regions or resize existing regions to fit in the ranges */
 199         for (i = 0; i < nr_ranges; i++) {
 200                 struct damon_region *first = NULL, *last, *newr;
 201                 struct damon_addr_range *range;
 202
 203                 range = &ranges[i];
 204                 /* Get the first/last regions intersecting with the range */
 205                 damon_for_each_region(r, t) {
 206                         if (damon_intersect(r, range)) {
 207                                 if (!first)
 208                                         first = r;
 209                                 last = r;
 210                         }
 211                         if (r->ar.start >= range->end)
 212                                 break;
 213                 }
 214                 if (!first) {
 215                         /* no region intersects with this range */
 216                         newr = damon_new_region(
 217                                         ALIGN_DOWN(range->start,
 218                                                 DAMON_MIN_REGION),
 219                                         ALIGN(range->end, DAMON_MIN_REGION));
 220                         if (!newr)
 221                                 return -ENOMEM;
 222                         damon_insert_region(newr, damon_prev_region(r), r, t);
 223                 } else {
 224                         /* resize intersecting regions to fit in this range */
 225                         first->ar.start = ALIGN_DOWN(range->start,
 226                                         DAMON_MIN_REGION);
 227                         last->ar.end = ALIGN(range->end, DAMON_MIN_REGION);
 228                 }
 229         }
 230         return 0;
 231 }
 232
 233 struct damos *damon_new_scheme(
 234                 unsigned long min_sz_region, unsigned long max_sz_region,
 235                 unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 236                 unsigned int min_age_region, unsigned int max_age_region,
 237                 enum damos_action action, struct damos_quota *quota,
 238                 struct damos_watermarks *wmarks)
 239 {
 240         struct damos *scheme;
 241
 242         scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
 243         if (!scheme)
 244                 return NULL;
 245         scheme->min_sz_region = min_sz_region;
 246         scheme->max_sz_region = max_sz_region;
 247         scheme->min_nr_accesses = min_nr_accesses;
 248         scheme->max_nr_accesses = max_nr_accesses;
 249         scheme->min_age_region = min_age_region;
 250         scheme->max_age_region = max_age_region;
 251         scheme->action = action;
 252         scheme->stat = (struct damos_stat){};
 253         INIT_LIST_HEAD(&scheme->list);
 254
 255         scheme->quota.ms = quota->ms;
 256         scheme->quota.sz = quota->sz;
 257         scheme->quota.reset_interval = quota->reset_interval;
 258         scheme->quota.weight_sz = quota->weight_sz;
 259         scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
 260         scheme->quota.weight_age = quota->weight_age;
 261         scheme->quota.total_charged_sz = 0;
 262         scheme->quota.total_charged_ns = 0;
 263         scheme->quota.esz = 0;
 264         scheme->quota.charged_sz = 0;
 265         scheme->quota.charged_from = 0;
 266         scheme->quota.charge_target_from = NULL;
 267         scheme->quota.charge_addr_from = 0;
 268
 269         scheme->wmarks.metric = wmarks->metric;
 270         scheme->wmarks.interval = wmarks->interval;
 271         scheme->wmarks.high = wmarks->high;
 272         scheme->wmarks.mid = wmarks->mid;
 273         scheme->wmarks.low = wmarks->low;
 274         scheme->wmarks.activated = true;
 275
 276         return scheme;
 277 }
 278
 279 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 280 {
 281         list_add_tail(&s->list, &ctx->schemes);
 282 }
 283
 284 static void damon_del_scheme(struct damos *s)
 285 {
 286         list_del(&s->list);
 287 }
 288
 289 static void damon_free_scheme(struct damos *s)
 290 {
 291         kfree(s);
 292 }
 293
 294 void damon_destroy_scheme(struct damos *s)
 295 {
 296         damon_del_scheme(s);
 297         damon_free_scheme(s);
 298 }
 299
 300 /*
 301  * Construct a damon_target struct
 302  *
 303  * Returns the pointer to the new struct if success, or NULL otherwise
 304  */
 305 struct damon_target *damon_new_target(void)
 306 {
 307         struct damon_target *t;
 308
 309         t = kmalloc(sizeof(*t), GFP_KERNEL);
 310         if (!t)
 311                 return NULL;
 312
 313         t->pid = NULL;
 314         t->nr_regions = 0;
 315         INIT_LIST_HEAD(&t->regions_list);
 316
 317         return t;
 318 }
 319
 320 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 321 {
 322         list_add_tail(&t->list, &ctx->adaptive_targets);
 323 }
 324
 325 bool damon_targets_empty(struct damon_ctx *ctx)
 326 {
 327         return list_empty(&ctx->adaptive_targets);
 328 }
 329
 330 static void damon_del_target(struct damon_target *t)
 331 {
 332         list_del(&t->list);
 333 }
 334
 335 void damon_free_target(struct damon_target *t)
 336 {
 337         struct damon_region *r, *next;
 338
 339         damon_for_each_region_safe(r, next, t)
 340                 damon_free_region(r);
 341         kfree(t);
 342 }
 343
 344 void damon_destroy_target(struct damon_target *t)
 345 {
 346         damon_del_target(t);
 347         damon_free_target(t);
 348 }
 349
 350 unsigned int damon_nr_regions(struct damon_target *t)
 351 {
 352         return t->nr_regions;
 353 }
 354
 355 struct damon_ctx *damon_new_ctx(void)
 356 {
 357         struct damon_ctx *ctx;
 358
 359         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 360         if (!ctx)
 361                 return NULL;
 362
 363         ctx->sample_interval = 5 * 1000;
 364         ctx->aggr_interval = 100 * 1000;
 365         ctx->ops_update_interval = 60 * 1000 * 1000;
 366
 367         ktime_get_coarse_ts64(&ctx->last_aggregation);
 368         ctx->last_ops_update = ctx->last_aggregation;
 369
 370         mutex_init(&ctx->kdamond_lock);
 371
 372         ctx->min_nr_regions = 10;
 373         ctx->max_nr_regions = 1000;
 374
 375         INIT_LIST_HEAD(&ctx->adaptive_targets);
 376         INIT_LIST_HEAD(&ctx->schemes);
 377
 378         return ctx;
 379 }
 380
 381 static void damon_destroy_targets(struct damon_ctx *ctx)
 382 {
 383         struct damon_target *t, *next_t;
 384
 385         if (ctx->ops.cleanup) {
 386                 ctx->ops.cleanup(ctx);
 387                 return;
 388         }
 389
 390         damon_for_each_target_safe(t, next_t, ctx)
 391                 damon_destroy_target(t);
 392 }
 393
 394 void damon_destroy_ctx(struct damon_ctx *ctx)
 395 {
 396         struct damos *s, *next_s;
 397
 398         damon_destroy_targets(ctx);
 399
 400         damon_for_each_scheme_safe(s, next_s, ctx)
 401                 damon_destroy_scheme(s);
 402
 403         kfree(ctx);
 404 }
 405
 406 /**
 407  * damon_set_attrs() - Set attributes for the monitoring.
 408  * @ctx:                monitoring context
 409  * @sample_int:         time interval between samplings
 410  * @aggr_int:           time interval between aggregations
 411  * @ops_upd_int:        time interval between monitoring operations updates
 412  * @min_nr_reg:         minimal number of regions
 413  * @max_nr_reg:         maximum number of regions
 414  *
 415  * This function should not be called while the kdamond is running.
 416  * Every time interval is in micro-seconds.
 417  *
 418  * Return: 0 on success, negative error code otherwise.
 419  */
 420 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 421                     unsigned long aggr_int, unsigned long ops_upd_int,
 422                     unsigned long min_nr_reg, unsigned long max_nr_reg)
 423 {
 424         if (min_nr_reg < 3)
 425                 return -EINVAL;
 426         if (min_nr_reg > max_nr_reg)
 427                 return -EINVAL;
 428
 429         ctx->sample_interval = sample_int;
 430         ctx->aggr_interval = aggr_int;
 431         ctx->ops_update_interval = ops_upd_int;
 432         ctx->min_nr_regions = min_nr_reg;
 433         ctx->max_nr_regions = max_nr_reg;
 434
 435         return 0;
 436 }
 437
 438 /**
 439  * damon_set_schemes() - Set data access monitoring based operation schemes.
 440  * @ctx:        monitoring context
 441  * @schemes:    array of the schemes
 442  * @nr_schemes: number of entries in @schemes
 443  *
 444  * This function should not be called while the kdamond of the context is
 445  * running.
 446  *
 447  * Return: 0 if success, or negative error code otherwise.
 448  */
 449 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
 450                         ssize_t nr_schemes)
 451 {
 452         struct damos *s, *next;
 453         ssize_t i;
 454
 455         damon_for_each_scheme_safe(s, next, ctx)
 456                 damon_destroy_scheme(s);
 457         for (i = 0; i < nr_schemes; i++)
 458                 damon_add_scheme(ctx, schemes[i]);
 459         return 0;
 460 }
 461
 462 /**
 463  * damon_nr_running_ctxs() - Return number of currently running contexts.
 464  */
 465 int damon_nr_running_ctxs(void)
 466 {
 467         int nr_ctxs;
 468
 469         mutex_lock(&damon_lock);
 470         nr_ctxs = nr_running_ctxs;
 471         mutex_unlock(&damon_lock);
 472
 473         return nr_ctxs;
 474 }
 475
 476 /* Returns the size upper limit for each monitoring region */
 477 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 478 {
 479         struct damon_target *t;
 480         struct damon_region *r;
 481         unsigned long sz = 0;
 482
 483         damon_for_each_target(t, ctx) {
 484                 damon_for_each_region(r, t)
 485                         sz += r->ar.end - r->ar.start;
 486         }
 487
 488         if (ctx->min_nr_regions)
 489                 sz /= ctx->min_nr_regions;
 490         if (sz < DAMON_MIN_REGION)
 491                 sz = DAMON_MIN_REGION;
 492
 493         return sz;
 494 }
 495
 496 static int kdamond_fn(void *data);
 497
 498 /*
 499  * __damon_start() - Starts monitoring with given context.
 500  * @ctx:        monitoring context
 501  *
 502  * This function should be called while damon_lock is hold.
 503  *
 504  * Return: 0 on success, negative error code otherwise.
 505  */
 506 static int __damon_start(struct damon_ctx *ctx)
 507 {
 508         int err = -EBUSY;
 509
 510         mutex_lock(&ctx->kdamond_lock);
 511         if (!ctx->kdamond) {
 512                 err = 0;
 513                 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 514                                 nr_running_ctxs);
 515                 if (IS_ERR(ctx->kdamond)) {
 516                         err = PTR_ERR(ctx->kdamond);
 517                         ctx->kdamond = NULL;
 518                 }
 519         }
 520         mutex_unlock(&ctx->kdamond_lock);
 521
 522         return err;
 523 }
 524
 525 /**
 526  * damon_start() - Starts the monitorings for a given group of contexts.
 527  * @ctxs:       an array of the pointers for contexts to start monitoring
 528  * @nr_ctxs:    size of @ctxs
 529  * @exclusive:  exclusiveness of this contexts group
 530  *
 531  * This function starts a group of monitoring threads for a group of monitoring
 532  * contexts.  One thread per each context is created and run in parallel.  The
 533  * caller should handle synchronization between the threads by itself.  If
 534  * @exclusive is true and a group of threads that created by other
 535  * 'damon_start()' call is currently running, this function does nothing but
 536  * returns -EBUSY.
 537  *
 538  * Return: 0 on success, negative error code otherwise.
 539  */
 540 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
 541 {
 542         int i;
 543         int err = 0;
 544
 545         mutex_lock(&damon_lock);
 546         if ((exclusive && nr_running_ctxs) ||
 547                         (!exclusive && running_exclusive_ctxs)) {
 548                 mutex_unlock(&damon_lock);
 549                 return -EBUSY;
 550         }
 551
 552         for (i = 0; i < nr_ctxs; i++) {
 553                 err = __damon_start(ctxs[i]);
 554                 if (err)
 555                         break;
 556                 nr_running_ctxs++;
 557         }
 558         if (exclusive && nr_running_ctxs)
 559                 running_exclusive_ctxs = true;
 560         mutex_unlock(&damon_lock);
 561
 562         return err;
 563 }
 564
 565 /*
 566  * __damon_stop() - Stops monitoring of a given context.
 567  * @ctx:        monitoring context
 568  *
 569  * Return: 0 on success, negative error code otherwise.
 570  */
 571 static int __damon_stop(struct damon_ctx *ctx)
 572 {
 573         struct task_struct *tsk;
 574
 575         mutex_lock(&ctx->kdamond_lock);
 576         tsk = ctx->kdamond;
 577         if (tsk) {
 578                 get_task_struct(tsk);
 579                 mutex_unlock(&ctx->kdamond_lock);
 580                 kthread_stop(tsk);
 581                 put_task_struct(tsk);
 582                 return 0;
 583         }
 584         mutex_unlock(&ctx->kdamond_lock);
 585
 586         return -EPERM;
 587 }
 588
 589 /**
 590  * damon_stop() - Stops the monitorings for a given group of contexts.
 591  * @ctxs:       an array of the pointers for contexts to stop monitoring
 592  * @nr_ctxs:    size of @ctxs
 593  *
 594  * Return: 0 on success, negative error code otherwise.
 595  */
 596 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
 597 {
 598         int i, err = 0;
 599
 600         for (i = 0; i < nr_ctxs; i++) {
 601                 /* nr_running_ctxs is decremented in kdamond_fn */
 602                 err = __damon_stop(ctxs[i]);
 603                 if (err)
 604                         break;
 605         }
 606         return err;
 607 }
 608
 609 /*
 610  * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 611  * @baseline:   the time to check whether the interval has elapsed since
 612  * @interval:   the time interval (microseconds)
 613  *
 614  * See whether the given time interval has passed since the given baseline
 615  * time.  If so, it also updates the baseline to current time for next check.
 616  *
 617  * Return:      true if the time interval has passed, or false otherwise.
 618  */
 619 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
 620                 unsigned long interval)
 621 {
 622         struct timespec64 now;
 623
 624         ktime_get_coarse_ts64(&now);
 625         if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
 626                         interval * 1000)
 627                 return false;
 628         *baseline = now;
 629         return true;
 630 }
 631
 632 /*
 633  * Check whether it is time to flush the aggregated information
 634  */
 635 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
 636 {
 637         return damon_check_reset_time_interval(&ctx->last_aggregation,
 638                         ctx->aggr_interval);
 639 }
 640
 641 /*
 642  * Reset the aggregated monitoring results ('nr_accesses' of each region).
 643  */
 644 static void kdamond_reset_aggregated(struct damon_ctx *c)
 645 {
 646         struct damon_target *t;
 647         unsigned int ti = 0;    /* target's index */
 648
 649         damon_for_each_target(t, c) {
 650                 struct damon_region *r;
 651
 652                 damon_for_each_region(r, t) {
 653                         trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
 654                         r->last_nr_accesses = r->nr_accesses;
 655                         r->nr_accesses = 0;
 656                 }
 657                 ti++;
 658         }
 659 }
 660
 661 static void damon_split_region_at(struct damon_ctx *ctx,
 662                 struct damon_target *t, struct damon_region *r,
 663                 unsigned long sz_r);
 664
 665 static bool __damos_valid_target(struct damon_region *r, struct damos *s)
 666 {
 667         unsigned long sz;
 668
 669         sz = r->ar.end - r->ar.start;
 670         return s->min_sz_region <= sz && sz <= s->max_sz_region &&
 671                 s->min_nr_accesses <= r->nr_accesses &&
 672                 r->nr_accesses <= s->max_nr_accesses &&
 673                 s->min_age_region <= r->age && r->age <= s->max_age_region;
 674 }
 675
 676 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
 677                 struct damon_region *r, struct damos *s)
 678 {
 679         bool ret = __damos_valid_target(r, s);
 680
 681         if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
 682                 return ret;
 683
 684         return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
 685 }
 686
 687 static void damon_do_apply_schemes(struct damon_ctx *c,
 688                                    struct damon_target *t,
 689                                    struct damon_region *r)
 690 {
 691         struct damos *s;
 692
 693         damon_for_each_scheme(s, c) {
 694                 struct damos_quota *quota = &s->quota;
 695                 unsigned long sz = r->ar.end - r->ar.start;
 696                 struct timespec64 begin, end;
 697                 unsigned long sz_applied = 0;
 698
 699                 if (!s->wmarks.activated)
 700                         continue;
 701
 702                 /* Check the quota */
 703                 if (quota->esz && quota->charged_sz >= quota->esz)
 704                         continue;
 705
 706                 /* Skip previously charged regions */
 707                 if (quota->charge_target_from) {
 708                         if (t != quota->charge_target_from)
 709                                 continue;
 710                         if (r == damon_last_region(t)) {
 711                                 quota->charge_target_from = NULL;
 712                                 quota->charge_addr_from = 0;
 713                                 continue;
 714                         }
 715                         if (quota->charge_addr_from &&
 716                                         r->ar.end <= quota->charge_addr_from)
 717                                 continue;
 718
 719                         if (quota->charge_addr_from && r->ar.start <
 720                                         quota->charge_addr_from) {
 721                                 sz = ALIGN_DOWN(quota->charge_addr_from -
 722                                                 r->ar.start, DAMON_MIN_REGION);
 723                                 if (!sz) {
 724                                         if (r->ar.end - r->ar.start <=
 725                                                         DAMON_MIN_REGION)
 726                                                 continue;
 727                                         sz = DAMON_MIN_REGION;
 728                                 }
 729                                 damon_split_region_at(c, t, r, sz);
 730                                 r = damon_next_region(r);
 731                                 sz = r->ar.end - r->ar.start;
 732                         }
 733                         quota->charge_target_from = NULL;
 734                         quota->charge_addr_from = 0;
 735                 }
 736
 737                 if (!damos_valid_target(c, t, r, s))
 738                         continue;
 739
 740                 /* Apply the scheme */
 741                 if (c->ops.apply_scheme) {
 742                         if (quota->esz &&
 743                                         quota->charged_sz + sz > quota->esz) {
 744                                 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
 745                                                 DAMON_MIN_REGION);
 746                                 if (!sz)
 747                                         goto update_stat;
 748                                 damon_split_region_at(c, t, r, sz);
 749                         }
 750                         ktime_get_coarse_ts64(&begin);
 751                         sz_applied = c->ops.apply_scheme(c, t, r, s);
 752                         ktime_get_coarse_ts64(&end);
 753                         quota->total_charged_ns += timespec64_to_ns(&end) -
 754                                 timespec64_to_ns(&begin);
 755                         quota->charged_sz += sz;
 756                         if (quota->esz && quota->charged_sz >= quota->esz) {
 757                                 quota->charge_target_from = t;
 758                                 quota->charge_addr_from = r->ar.end + 1;
 759                         }
 760                 }
 761                 if (s->action != DAMOS_STAT)
 762                         r->age = 0;
 763
 764 update_stat:
 765                 s->stat.nr_tried++;
 766                 s->stat.sz_tried += sz;
 767                 if (sz_applied)
 768                         s->stat.nr_applied++;
 769                 s->stat.sz_applied += sz_applied;
 770         }
 771 }
 772
 773 /* Shouldn't be called if quota->ms and quota->sz are zero */
 774 static void damos_set_effective_quota(struct damos_quota *quota)
 775 {
 776         unsigned long throughput;
 777         unsigned long esz;
 778
 779         if (!quota->ms) {
 780                 quota->esz = quota->sz;
 781                 return;
 782         }
 783
 784         if (quota->total_charged_ns)
 785                 throughput = quota->total_charged_sz * 1000000 /
 786                         quota->total_charged_ns;
 787         else
 788                 throughput = PAGE_SIZE * 1024;
 789         esz = throughput * quota->ms;
 790
 791         if (quota->sz && quota->sz < esz)
 792                 esz = quota->sz;
 793         quota->esz = esz;
 794 }
 795
 796 static void kdamond_apply_schemes(struct damon_ctx *c)
 797 {
 798         struct damon_target *t;
 799         struct damon_region *r, *next_r;
 800         struct damos *s;
 801
 802         damon_for_each_scheme(s, c) {
 803                 struct damos_quota *quota = &s->quota;
 804                 unsigned long cumulated_sz;
 805                 unsigned int score, max_score = 0;
 806
 807                 if (!s->wmarks.activated)
 808                         continue;
 809
 810                 if (!quota->ms && !quota->sz)
 811                         continue;
 812
 813                 /* New charge window starts */
 814                 if (time_after_eq(jiffies, quota->charged_from +
 815                                         msecs_to_jiffies(
 816                                                 quota->reset_interval))) {
 817                         if (quota->esz && quota->charged_sz >= quota->esz)
 818                                 s->stat.qt_exceeds++;
 819                         quota->total_charged_sz += quota->charged_sz;
 820                         quota->charged_from = jiffies;
 821                         quota->charged_sz = 0;
 822                         damos_set_effective_quota(quota);
 823                 }
 824
 825                 if (!c->ops.get_scheme_score)
 826                         continue;
 827
 828                 /* Fill up the score histogram */
 829                 memset(quota->histogram, 0, sizeof(quota->histogram));
 830                 damon_for_each_target(t, c) {
 831                         damon_for_each_region(r, t) {
 832                                 if (!__damos_valid_target(r, s))
 833                                         continue;
 834                                 score = c->ops.get_scheme_score(
 835                                                 c, t, r, s);
 836                                 quota->histogram[score] +=
 837                                         r->ar.end - r->ar.start;
 838                                 if (score > max_score)
 839                                         max_score = score;
 840                         }
 841                 }
 842
 843                 /* Set the min score limit */
 844                 for (cumulated_sz = 0, score = max_score; ; score--) {
 845                         cumulated_sz += quota->histogram[score];
 846                         if (cumulated_sz >= quota->esz || !score)
 847                                 break;
 848                 }
 849                 quota->min_score = score;
 850         }
 851
 852         damon_for_each_target(t, c) {
 853                 damon_for_each_region_safe(r, next_r, t)
 854                         damon_do_apply_schemes(c, t, r);
 855         }
 856 }
 857
 858 static inline unsigned long sz_damon_region(struct damon_region *r)
 859 {
 860         return r->ar.end - r->ar.start;
 861 }
 862
 863 /*
 864  * Merge two adjacent regions into one region
 865  */
 866 static void damon_merge_two_regions(struct damon_target *t,
 867                 struct damon_region *l, struct damon_region *r)
 868 {
 869         unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
 870
 871         l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 872                         (sz_l + sz_r);
 873         l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
 874         l->ar.end = r->ar.end;
 875         damon_destroy_region(r, t);
 876 }
 877
 878 /*
 879  * Merge adjacent regions having similar access frequencies
 880  *
 881  * t            target affected by this merge operation
 882  * thres        '->nr_accesses' diff threshold for the merge
 883  * sz_limit     size upper limit of each region
 884  */
 885 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 886                                    unsigned long sz_limit)
 887 {
 888         struct damon_region *r, *prev = NULL, *next;
 889
 890         damon_for_each_region_safe(r, next, t) {
 891                 if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
 892                         r->age = 0;
 893                 else
 894                         r->age++;
 895
 896                 if (prev && prev->ar.end == r->ar.start &&
 897                     abs(prev->nr_accesses - r->nr_accesses) <= thres &&
 898                     sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
 899                         damon_merge_two_regions(t, prev, r);
 900                 else
 901                         prev = r;
 902         }
 903 }
 904
 905 /*
 906  * Merge adjacent regions having similar access frequencies
 907  *
 908  * threshold    '->nr_accesses' diff threshold for the merge
 909  * sz_limit     size upper limit of each region
 910  *
 911  * This function merges monitoring target regions which are adjacent and their
 912  * access frequencies are similar.  This is for minimizing the monitoring
 913  * overhead under the dynamically changeable access pattern.  If a merge was
 914  * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 915  */
 916 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
 917                                   unsigned long sz_limit)
 918 {
 919         struct damon_target *t;
 920
 921         damon_for_each_target(t, c)
 922                 damon_merge_regions_of(t, threshold, sz_limit);
 923 }
 924
 925 /*
 926  * Split a region in two
 927  *
 928  * r            the region to be split
 929  * sz_r         size of the first sub-region that will be made
 930  */
 931 static void damon_split_region_at(struct damon_ctx *ctx,
 932                 struct damon_target *t, struct damon_region *r,
 933                 unsigned long sz_r)
 934 {
 935         struct damon_region *new;
 936
 937         new = damon_new_region(r->ar.start + sz_r, r->ar.end);
 938         if (!new)
 939                 return;
 940
 941         r->ar.end = new->ar.start;
 942
 943         new->age = r->age;
 944         new->last_nr_accesses = r->last_nr_accesses;
 945
 946         damon_insert_region(new, r, damon_next_region(r), t);
 947 }
 948
 949 /* Split every region in the given target into 'nr_subs' regions */
 950 static void damon_split_regions_of(struct damon_ctx *ctx,
 951                                      struct damon_target *t, int nr_subs)
 952 {
 953         struct damon_region *r, *next;
 954         unsigned long sz_region, sz_sub = 0;
 955         int i;
 956
 957         damon_for_each_region_safe(r, next, t) {
 958                 sz_region = r->ar.end - r->ar.start;
 959
 960                 for (i = 0; i < nr_subs - 1 &&
 961                                 sz_region > 2 * DAMON_MIN_REGION; i++) {
 962                         /*
 963                          * Randomly select size of left sub-region to be at
 964                          * least 10 percent and at most 90% of original region
 965                          */
 966                         sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
 967                                         sz_region / 10, DAMON_MIN_REGION);
 968                         /* Do not allow blank region */
 969                         if (sz_sub == 0 || sz_sub >= sz_region)
 970                                 continue;
 971
 972                         damon_split_region_at(ctx, t, r, sz_sub);
 973                         sz_region = sz_sub;
 974                 }
 975         }
 976 }
 977
 978 /*
 979  * Split every target region into randomly-sized small regions
 980  *
 981  * This function splits every target region into random-sized small regions if
 982  * current total number of the regions is equal or smaller than half of the
 983  * user-specified maximum number of regions.  This is for maximizing the
 984  * monitoring accuracy under the dynamically changeable access patterns.  If a
 985  * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 986  * it.
 987  */
 988 static void kdamond_split_regions(struct damon_ctx *ctx)
 989 {
 990         struct damon_target *t;
 991         unsigned int nr_regions = 0;
 992         static unsigned int last_nr_regions;
 993         int nr_subregions = 2;
 994
 995         damon_for_each_target(t, ctx)
 996                 nr_regions += damon_nr_regions(t);
 997
 998         if (nr_regions > ctx->max_nr_regions / 2)
 999                 return;
1000
1001         /* Maybe the middle of the region has different access frequency */
1002         if (last_nr_regions == nr_regions &&
1003                         nr_regions < ctx->max_nr_regions / 3)
1004                 nr_subregions = 3;
1005
1006         damon_for_each_target(t, ctx)
1007                 damon_split_regions_of(ctx, t, nr_subregions);
1008
1009         last_nr_regions = nr_regions;
1010 }
1011
1012 /*
1013  * Check whether it is time to check and apply the operations-related data
1014  * structures.
1015  *
1016  * Returns true if it is.
1017  */
1018 static bool kdamond_need_update_operations(struct damon_ctx *ctx)
1019 {
1020         return damon_check_reset_time_interval(&ctx->last_ops_update,
1021                         ctx->ops_update_interval);
1022 }
1023
1024 /*
1025  * Check whether current monitoring should be stopped
1026  *
1027  * The monitoring is stopped when either the user requested to stop, or all
1028  * monitoring targets are invalid.
1029  *
1030  * Returns true if need to stop current monitoring.
1031  */
1032 static bool kdamond_need_stop(struct damon_ctx *ctx)
1033 {
1034         struct damon_target *t;
1035
1036         if (kthread_should_stop())
1037                 return true;
1038
1039         if (!ctx->ops.target_valid)
1040                 return false;
1041
1042         damon_for_each_target(t, ctx) {
1043                 if (ctx->ops.target_valid(t))
1044                         return false;
1045         }
1046
1047         return true;
1048 }
1049
1050 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
1051 {
1052         struct sysinfo i;
1053
1054         switch (metric) {
1055         case DAMOS_WMARK_FREE_MEM_RATE:
1056                 si_meminfo(&i);
1057                 return i.freeram * 1000 / i.totalram;
1058         default:
1059                 break;
1060         }
1061         return -EINVAL;
1062 }
1063
1064 /*
1065  * Returns zero if the scheme is active.  Else, returns time to wait for next
1066  * watermark check in micro-seconds.
1067  */
1068 static unsigned long damos_wmark_wait_us(struct damos *scheme)
1069 {
1070         unsigned long metric;
1071
1072         if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
1073                 return 0;
1074
1075         metric = damos_wmark_metric_value(scheme->wmarks.metric);
1076         /* higher than high watermark or lower than low watermark */
1077         if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
1078                 if (scheme->wmarks.activated)
1079                         pr_debug("deactivate a scheme (%d) for %s wmark\n",
1080                                         scheme->action,
1081                                         metric > scheme->wmarks.high ?
1082                                         "high" : "low");
1083                 scheme->wmarks.activated = false;
1084                 return scheme->wmarks.interval;
1085         }
1086
1087         /* inactive and higher than middle watermark */
1088         if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
1089                         !scheme->wmarks.activated)
1090                 return scheme->wmarks.interval;
1091
1092         if (!scheme->wmarks.activated)
1093                 pr_debug("activate a scheme (%d)\n", scheme->action);
1094         scheme->wmarks.activated = true;
1095         return 0;
1096 }
1097
1098 static void kdamond_usleep(unsigned long usecs)
1099 {
1100         /* See Documentation/timers/timers-howto.rst for the thresholds */
1101         if (usecs > 20 * USEC_PER_MSEC)
1102                 schedule_timeout_idle(usecs_to_jiffies(usecs));
1103         else
1104                 usleep_idle_range(usecs, usecs + 1);
1105 }
1106
1107 /* Returns negative error code if it's not activated but should return */
1108 static int kdamond_wait_activation(struct damon_ctx *ctx)
1109 {
1110         struct damos *s;
1111         unsigned long wait_time;
1112         unsigned long min_wait_time = 0;
1113         bool init_wait_time = false;
1114
1115         while (!kdamond_need_stop(ctx)) {
1116                 damon_for_each_scheme(s, ctx) {
1117                         wait_time = damos_wmark_wait_us(s);
1118                         if (!init_wait_time || wait_time < min_wait_time) {
1119                                 init_wait_time = true;
1120                                 min_wait_time = wait_time;
1121                         }
1122                 }
1123                 if (!min_wait_time)
1124                         return 0;
1125
1126                 kdamond_usleep(min_wait_time);
1127
1128                 if (ctx->callback.after_wmarks_check &&
1129                                 ctx->callback.after_wmarks_check(ctx))
1130                         break;
1131         }
1132         return -EBUSY;
1133 }
1134
1135 /*
1136  * The monitoring daemon that runs as a kernel thread
1137  */
1138 static int kdamond_fn(void *data)
1139 {
1140         struct damon_ctx *ctx = data;
1141         struct damon_target *t;
1142         struct damon_region *r, *next;
1143         unsigned int max_nr_accesses = 0;
1144         unsigned long sz_limit = 0;
1145         bool done = false;
1146
1147         pr_debug("kdamond (%d) starts\n", current->pid);
1148
1149         if (ctx->ops.init)
1150                 ctx->ops.init(ctx);
1151         if (ctx->callback.before_start && ctx->callback.before_start(ctx))
1152                 done = true;
1153
1154         sz_limit = damon_region_sz_limit(ctx);
1155
1156         while (!kdamond_need_stop(ctx) && !done) {
1157                 if (kdamond_wait_activation(ctx)) {
1158                         done = true;
1159                         continue;
1160                 }
1161
1162                 if (ctx->ops.prepare_access_checks)
1163                         ctx->ops.prepare_access_checks(ctx);
1164                 if (ctx->callback.after_sampling &&
1165                                 ctx->callback.after_sampling(ctx)) {
1166                         done = true;
1167                         continue;
1168                 }
1169
1170                 kdamond_usleep(ctx->sample_interval);
1171
1172                 if (ctx->ops.check_accesses)
1173                         max_nr_accesses = ctx->ops.check_accesses(ctx);
1174
1175                 if (kdamond_aggregate_interval_passed(ctx)) {
1176                         kdamond_merge_regions(ctx,
1177                                         max_nr_accesses / 10,
1178                                         sz_limit);
1179                         if (ctx->callback.after_aggregation &&
1180                                         ctx->callback.after_aggregation(ctx)) {
1181                                 done = true;
1182                                 continue;
1183                         }
1184                         kdamond_apply_schemes(ctx);
1185                         kdamond_reset_aggregated(ctx);
1186                         kdamond_split_regions(ctx);
1187                         if (ctx->ops.reset_aggregated)
1188                                 ctx->ops.reset_aggregated(ctx);
1189                 }
1190
1191                 if (kdamond_need_update_operations(ctx)) {
1192                         if (ctx->ops.update)
1193                                 ctx->ops.update(ctx);
1194                         sz_limit = damon_region_sz_limit(ctx);
1195                 }
1196         }
1197         damon_for_each_target(t, ctx) {
1198                 damon_for_each_region_safe(r, next, t)
1199                         damon_destroy_region(r, t);
1200         }
1201
1202         if (ctx->callback.before_terminate)
1203                 ctx->callback.before_terminate(ctx);
1204         if (ctx->ops.cleanup)
1205                 ctx->ops.cleanup(ctx);
1206
1207         pr_debug("kdamond (%d) finishes\n", current->pid);
1208         mutex_lock(&ctx->kdamond_lock);
1209         ctx->kdamond = NULL;
1210         mutex_unlock(&ctx->kdamond_lock);
1211
1212         mutex_lock(&damon_lock);
1213         nr_running_ctxs--;
1214         if (!nr_running_ctxs && running_exclusive_ctxs)
1215                 running_exclusive_ctxs = false;
1216         mutex_unlock(&damon_lock);
1217
1218         return 0;
1219 }
1220
1221 #include "core-test.h"