mm/damon/core.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Data Access Monitor
   4  *
   5  * Author: SeongJae Park <sjpark@amazon.de>
   6  */
   7
   8 #define pr_fmt(fmt) "damon: " fmt
   9
  10 #include <linux/damon.h>
  11 #include <linux/delay.h>
  12 #include <linux/kthread.h>
  13 #include <linux/mm.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16
  17 #define CREATE_TRACE_POINTS
  18 #include <trace/events/damon.h>
  19
  20 #ifdef CONFIG_DAMON_KUNIT_TEST
  21 #undef DAMON_MIN_REGION
  22 #define DAMON_MIN_REGION 1
  23 #endif
  24
  25 static DEFINE_MUTEX(damon_lock);
  26 static int nr_running_ctxs;
  27 static bool running_exclusive_ctxs;
  28
  29 static DEFINE_MUTEX(damon_ops_lock);
  30 static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
  31
  32 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
  33 static bool damon_registered_ops_id(enum damon_ops_id id)
  34 {
  35         struct damon_operations empty_ops = {};
  36
  37         if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
  38                 return false;
  39         return true;
  40 }
  41
  42 /**
  43  * damon_register_ops() - Register a monitoring operations set to DAMON.
  44  * @ops:        monitoring operations set to register.
  45  *
  46  * This function registers a monitoring operations set of valid &struct
  47  * damon_operations->id so that others can find and use them later.
  48  *
  49  * Return: 0 on success, negative error code otherwise.
  50  */
  51 int damon_register_ops(struct damon_operations *ops)
  52 {
  53         int err = 0;
  54
  55         if (ops->id >= NR_DAMON_OPS)
  56                 return -EINVAL;
  57         mutex_lock(&damon_ops_lock);
  58         /* Fail for already registered ops */
  59         if (damon_registered_ops_id(ops->id)) {
  60                 err = -EINVAL;
  61                 goto out;
  62         }
  63         damon_registered_ops[ops->id] = *ops;
  64 out:
  65         mutex_unlock(&damon_ops_lock);
  66         return err;
  67 }
  68
  69 /**
  70  * damon_select_ops() - Select a monitoring operations to use with the context.
  71  * @ctx:        monitoring context to use the operations.
  72  * @id:         id of the registered monitoring operations to select.
  73  *
  74  * This function finds registered monitoring operations set of @id and make
  75  * @ctx to use it.
  76  *
  77  * Return: 0 on success, negative error code otherwise.
  78  */
  79 int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
  80 {
  81         int err = 0;
  82
  83         if (id >= NR_DAMON_OPS)
  84                 return -EINVAL;
  85
  86         mutex_lock(&damon_ops_lock);
  87         if (!damon_registered_ops_id(id))
  88                 err = -EINVAL;
  89         else
  90                 ctx->ops = damon_registered_ops[id];
  91         mutex_unlock(&damon_ops_lock);
  92         return err;
  93 }
  94
  95 /*
  96  * Construct a damon_region struct
  97  *
  98  * Returns the pointer to the new struct if success, or NULL otherwise
  99  */
 100 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
 101 {
 102         struct damon_region *region;
 103
 104         region = kmalloc(sizeof(*region), GFP_KERNEL);
 105         if (!region)
 106                 return NULL;
 107
 108         region->ar.start = start;
 109         region->ar.end = end;
 110         region->nr_accesses = 0;
 111         INIT_LIST_HEAD(&region->list);
 112
 113         region->age = 0;
 114         region->last_nr_accesses = 0;
 115
 116         return region;
 117 }
 118
 119 void damon_add_region(struct damon_region *r, struct damon_target *t)
 120 {
 121         list_add_tail(&r->list, &t->regions_list);
 122         t->nr_regions++;
 123 }
 124
 125 static void damon_del_region(struct damon_region *r, struct damon_target *t)
 126 {
 127         list_del(&r->list);
 128         t->nr_regions--;
 129 }
 130
 131 static void damon_free_region(struct damon_region *r)
 132 {
 133         kfree(r);
 134 }
 135
 136 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
 137 {
 138         damon_del_region(r, t);
 139         damon_free_region(r);
 140 }
 141
 142 struct damos *damon_new_scheme(
 143                 unsigned long min_sz_region, unsigned long max_sz_region,
 144                 unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 145                 unsigned int min_age_region, unsigned int max_age_region,
 146                 enum damos_action action, struct damos_quota *quota,
 147                 struct damos_watermarks *wmarks)
 148 {
 149         struct damos *scheme;
 150
 151         scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
 152         if (!scheme)
 153                 return NULL;
 154         scheme->min_sz_region = min_sz_region;
 155         scheme->max_sz_region = max_sz_region;
 156         scheme->min_nr_accesses = min_nr_accesses;
 157         scheme->max_nr_accesses = max_nr_accesses;
 158         scheme->min_age_region = min_age_region;
 159         scheme->max_age_region = max_age_region;
 160         scheme->action = action;
 161         scheme->stat = (struct damos_stat){};
 162         INIT_LIST_HEAD(&scheme->list);
 163
 164         scheme->quota.ms = quota->ms;
 165         scheme->quota.sz = quota->sz;
 166         scheme->quota.reset_interval = quota->reset_interval;
 167         scheme->quota.weight_sz = quota->weight_sz;
 168         scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
 169         scheme->quota.weight_age = quota->weight_age;
 170         scheme->quota.total_charged_sz = 0;
 171         scheme->quota.total_charged_ns = 0;
 172         scheme->quota.esz = 0;
 173         scheme->quota.charged_sz = 0;
 174         scheme->quota.charged_from = 0;
 175         scheme->quota.charge_target_from = NULL;
 176         scheme->quota.charge_addr_from = 0;
 177
 178         scheme->wmarks.metric = wmarks->metric;
 179         scheme->wmarks.interval = wmarks->interval;
 180         scheme->wmarks.high = wmarks->high;
 181         scheme->wmarks.mid = wmarks->mid;
 182         scheme->wmarks.low = wmarks->low;
 183         scheme->wmarks.activated = true;
 184
 185         return scheme;
 186 }
 187
 188 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 189 {
 190         list_add_tail(&s->list, &ctx->schemes);
 191 }
 192
 193 static void damon_del_scheme(struct damos *s)
 194 {
 195         list_del(&s->list);
 196 }
 197
 198 static void damon_free_scheme(struct damos *s)
 199 {
 200         kfree(s);
 201 }
 202
 203 void damon_destroy_scheme(struct damos *s)
 204 {
 205         damon_del_scheme(s);
 206         damon_free_scheme(s);
 207 }
 208
 209 /*
 210  * Construct a damon_target struct
 211  *
 212  * Returns the pointer to the new struct if success, or NULL otherwise
 213  */
 214 struct damon_target *damon_new_target(void)
 215 {
 216         struct damon_target *t;
 217
 218         t = kmalloc(sizeof(*t), GFP_KERNEL);
 219         if (!t)
 220                 return NULL;
 221
 222         t->pid = NULL;
 223         t->nr_regions = 0;
 224         INIT_LIST_HEAD(&t->regions_list);
 225
 226         return t;
 227 }
 228
 229 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 230 {
 231         list_add_tail(&t->list, &ctx->adaptive_targets);
 232 }
 233
 234 bool damon_targets_empty(struct damon_ctx *ctx)
 235 {
 236         return list_empty(&ctx->adaptive_targets);
 237 }
 238
 239 static void damon_del_target(struct damon_target *t)
 240 {
 241         list_del(&t->list);
 242 }
 243
 244 void damon_free_target(struct damon_target *t)
 245 {
 246         struct damon_region *r, *next;
 247
 248         damon_for_each_region_safe(r, next, t)
 249                 damon_free_region(r);
 250         kfree(t);
 251 }
 252
 253 void damon_destroy_target(struct damon_target *t)
 254 {
 255         damon_del_target(t);
 256         damon_free_target(t);
 257 }
 258
 259 unsigned int damon_nr_regions(struct damon_target *t)
 260 {
 261         return t->nr_regions;
 262 }
 263
 264 struct damon_ctx *damon_new_ctx(void)
 265 {
 266         struct damon_ctx *ctx;
 267
 268         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 269         if (!ctx)
 270                 return NULL;
 271
 272         ctx->sample_interval = 5 * 1000;
 273         ctx->aggr_interval = 100 * 1000;
 274         ctx->ops_update_interval = 60 * 1000 * 1000;
 275
 276         ktime_get_coarse_ts64(&ctx->last_aggregation);
 277         ctx->last_ops_update = ctx->last_aggregation;
 278
 279         mutex_init(&ctx->kdamond_lock);
 280
 281         ctx->min_nr_regions = 10;
 282         ctx->max_nr_regions = 1000;
 283
 284         INIT_LIST_HEAD(&ctx->adaptive_targets);
 285         INIT_LIST_HEAD(&ctx->schemes);
 286
 287         return ctx;
 288 }
 289
 290 static void damon_destroy_targets(struct damon_ctx *ctx)
 291 {
 292         struct damon_target *t, *next_t;
 293
 294         if (ctx->ops.cleanup) {
 295                 ctx->ops.cleanup(ctx);
 296                 return;
 297         }
 298
 299         damon_for_each_target_safe(t, next_t, ctx)
 300                 damon_destroy_target(t);
 301 }
 302
 303 void damon_destroy_ctx(struct damon_ctx *ctx)
 304 {
 305         struct damos *s, *next_s;
 306
 307         damon_destroy_targets(ctx);
 308
 309         damon_for_each_scheme_safe(s, next_s, ctx)
 310                 damon_destroy_scheme(s);
 311
 312         kfree(ctx);
 313 }
 314
 315 /**
 316  * damon_set_attrs() - Set attributes for the monitoring.
 317  * @ctx:                monitoring context
 318  * @sample_int:         time interval between samplings
 319  * @aggr_int:           time interval between aggregations
 320  * @ops_upd_int:        time interval between monitoring operations updates
 321  * @min_nr_reg:         minimal number of regions
 322  * @max_nr_reg:         maximum number of regions
 323  *
 324  * This function should not be called while the kdamond is running.
 325  * Every time interval is in micro-seconds.
 326  *
 327  * Return: 0 on success, negative error code otherwise.
 328  */
 329 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 330                     unsigned long aggr_int, unsigned long ops_upd_int,
 331                     unsigned long min_nr_reg, unsigned long max_nr_reg)
 332 {
 333         if (min_nr_reg < 3)
 334                 return -EINVAL;
 335         if (min_nr_reg > max_nr_reg)
 336                 return -EINVAL;
 337
 338         ctx->sample_interval = sample_int;
 339         ctx->aggr_interval = aggr_int;
 340         ctx->ops_update_interval = ops_upd_int;
 341         ctx->min_nr_regions = min_nr_reg;
 342         ctx->max_nr_regions = max_nr_reg;
 343
 344         return 0;
 345 }
 346
 347 /**
 348  * damon_set_schemes() - Set data access monitoring based operation schemes.
 349  * @ctx:        monitoring context
 350  * @schemes:    array of the schemes
 351  * @nr_schemes: number of entries in @schemes
 352  *
 353  * This function should not be called while the kdamond of the context is
 354  * running.
 355  *
 356  * Return: 0 if success, or negative error code otherwise.
 357  */
 358 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
 359                         ssize_t nr_schemes)
 360 {
 361         struct damos *s, *next;
 362         ssize_t i;
 363
 364         damon_for_each_scheme_safe(s, next, ctx)
 365                 damon_destroy_scheme(s);
 366         for (i = 0; i < nr_schemes; i++)
 367                 damon_add_scheme(ctx, schemes[i]);
 368         return 0;
 369 }
 370
 371 /**
 372  * damon_nr_running_ctxs() - Return number of currently running contexts.
 373  */
 374 int damon_nr_running_ctxs(void)
 375 {
 376         int nr_ctxs;
 377
 378         mutex_lock(&damon_lock);
 379         nr_ctxs = nr_running_ctxs;
 380         mutex_unlock(&damon_lock);
 381
 382         return nr_ctxs;
 383 }
 384
 385 /* Returns the size upper limit for each monitoring region */
 386 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 387 {
 388         struct damon_target *t;
 389         struct damon_region *r;
 390         unsigned long sz = 0;
 391
 392         damon_for_each_target(t, ctx) {
 393                 damon_for_each_region(r, t)
 394                         sz += r->ar.end - r->ar.start;
 395         }
 396
 397         if (ctx->min_nr_regions)
 398                 sz /= ctx->min_nr_regions;
 399         if (sz < DAMON_MIN_REGION)
 400                 sz = DAMON_MIN_REGION;
 401
 402         return sz;
 403 }
 404
 405 static int kdamond_fn(void *data);
 406
 407 /*
 408  * __damon_start() - Starts monitoring with given context.
 409  * @ctx:        monitoring context
 410  *
 411  * This function should be called while damon_lock is hold.
 412  *
 413  * Return: 0 on success, negative error code otherwise.
 414  */
 415 static int __damon_start(struct damon_ctx *ctx)
 416 {
 417         int err = -EBUSY;
 418
 419         mutex_lock(&ctx->kdamond_lock);
 420         if (!ctx->kdamond) {
 421                 err = 0;
 422                 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 423                                 nr_running_ctxs);
 424                 if (IS_ERR(ctx->kdamond)) {
 425                         err = PTR_ERR(ctx->kdamond);
 426                         ctx->kdamond = NULL;
 427                 }
 428         }
 429         mutex_unlock(&ctx->kdamond_lock);
 430
 431         return err;
 432 }
 433
 434 /**
 435  * damon_start() - Starts the monitorings for a given group of contexts.
 436  * @ctxs:       an array of the pointers for contexts to start monitoring
 437  * @nr_ctxs:    size of @ctxs
 438  * @exclusive:  exclusiveness of this contexts group
 439  *
 440  * This function starts a group of monitoring threads for a group of monitoring
 441  * contexts.  One thread per each context is created and run in parallel.  The
 442  * caller should handle synchronization between the threads by itself.  If
 443  * @exclusive is true and a group of threads that created by other
 444  * 'damon_start()' call is currently running, this function does nothing but
 445  * returns -EBUSY.
 446  *
 447  * Return: 0 on success, negative error code otherwise.
 448  */
 449 int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
 450 {
 451         int i;
 452         int err = 0;
 453
 454         mutex_lock(&damon_lock);
 455         if ((exclusive && nr_running_ctxs) ||
 456                         (!exclusive && running_exclusive_ctxs)) {
 457                 mutex_unlock(&damon_lock);
 458                 return -EBUSY;
 459         }
 460
 461         for (i = 0; i < nr_ctxs; i++) {
 462                 err = __damon_start(ctxs[i]);
 463                 if (err)
 464                         break;
 465                 nr_running_ctxs++;
 466         }
 467         if (exclusive && nr_running_ctxs)
 468                 running_exclusive_ctxs = true;
 469         mutex_unlock(&damon_lock);
 470
 471         return err;
 472 }
 473
 474 /*
 475  * __damon_stop() - Stops monitoring of a given context.
 476  * @ctx:        monitoring context
 477  *
 478  * Return: 0 on success, negative error code otherwise.
 479  */
 480 static int __damon_stop(struct damon_ctx *ctx)
 481 {
 482         struct task_struct *tsk;
 483
 484         mutex_lock(&ctx->kdamond_lock);
 485         tsk = ctx->kdamond;
 486         if (tsk) {
 487                 get_task_struct(tsk);
 488                 mutex_unlock(&ctx->kdamond_lock);
 489                 kthread_stop(tsk);
 490                 put_task_struct(tsk);
 491                 return 0;
 492         }
 493         mutex_unlock(&ctx->kdamond_lock);
 494
 495         return -EPERM;
 496 }
 497
 498 /**
 499  * damon_stop() - Stops the monitorings for a given group of contexts.
 500  * @ctxs:       an array of the pointers for contexts to stop monitoring
 501  * @nr_ctxs:    size of @ctxs
 502  *
 503  * Return: 0 on success, negative error code otherwise.
 504  */
 505 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
 506 {
 507         int i, err = 0;
 508
 509         for (i = 0; i < nr_ctxs; i++) {
 510                 /* nr_running_ctxs is decremented in kdamond_fn */
 511                 err = __damon_stop(ctxs[i]);
 512                 if (err)
 513                         break;
 514         }
 515         return err;
 516 }
 517
 518 /*
 519  * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 520  * @baseline:   the time to check whether the interval has elapsed since
 521  * @interval:   the time interval (microseconds)
 522  *
 523  * See whether the given time interval has passed since the given baseline
 524  * time.  If so, it also updates the baseline to current time for next check.
 525  *
 526  * Return:      true if the time interval has passed, or false otherwise.
 527  */
 528 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
 529                 unsigned long interval)
 530 {
 531         struct timespec64 now;
 532
 533         ktime_get_coarse_ts64(&now);
 534         if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
 535                         interval * 1000)
 536                 return false;
 537         *baseline = now;
 538         return true;
 539 }
 540
 541 /*
 542  * Check whether it is time to flush the aggregated information
 543  */
 544 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
 545 {
 546         return damon_check_reset_time_interval(&ctx->last_aggregation,
 547                         ctx->aggr_interval);
 548 }
 549
 550 /*
 551  * Reset the aggregated monitoring results ('nr_accesses' of each region).
 552  */
 553 static void kdamond_reset_aggregated(struct damon_ctx *c)
 554 {
 555         struct damon_target *t;
 556         unsigned int ti = 0;    /* target's index */
 557
 558         damon_for_each_target(t, c) {
 559                 struct damon_region *r;
 560
 561                 damon_for_each_region(r, t) {
 562                         trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
 563                         r->last_nr_accesses = r->nr_accesses;
 564                         r->nr_accesses = 0;
 565                 }
 566                 ti++;
 567         }
 568 }
 569
 570 static void damon_split_region_at(struct damon_ctx *ctx,
 571                 struct damon_target *t, struct damon_region *r,
 572                 unsigned long sz_r);
 573
 574 static bool __damos_valid_target(struct damon_region *r, struct damos *s)
 575 {
 576         unsigned long sz;
 577
 578         sz = r->ar.end - r->ar.start;
 579         return s->min_sz_region <= sz && sz <= s->max_sz_region &&
 580                 s->min_nr_accesses <= r->nr_accesses &&
 581                 r->nr_accesses <= s->max_nr_accesses &&
 582                 s->min_age_region <= r->age && r->age <= s->max_age_region;
 583 }
 584
 585 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
 586                 struct damon_region *r, struct damos *s)
 587 {
 588         bool ret = __damos_valid_target(r, s);
 589
 590         if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
 591                 return ret;
 592
 593         return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
 594 }
 595
 596 static void damon_do_apply_schemes(struct damon_ctx *c,
 597                                    struct damon_target *t,
 598                                    struct damon_region *r)
 599 {
 600         struct damos *s;
 601
 602         damon_for_each_scheme(s, c) {
 603                 struct damos_quota *quota = &s->quota;
 604                 unsigned long sz = r->ar.end - r->ar.start;
 605                 struct timespec64 begin, end;
 606                 unsigned long sz_applied = 0;
 607
 608                 if (!s->wmarks.activated)
 609                         continue;
 610
 611                 /* Check the quota */
 612                 if (quota->esz && quota->charged_sz >= quota->esz)
 613                         continue;
 614
 615                 /* Skip previously charged regions */
 616                 if (quota->charge_target_from) {
 617                         if (t != quota->charge_target_from)
 618                                 continue;
 619                         if (r == damon_last_region(t)) {
 620                                 quota->charge_target_from = NULL;
 621                                 quota->charge_addr_from = 0;
 622                                 continue;
 623                         }
 624                         if (quota->charge_addr_from &&
 625                                         r->ar.end <= quota->charge_addr_from)
 626                                 continue;
 627
 628                         if (quota->charge_addr_from && r->ar.start <
 629                                         quota->charge_addr_from) {
 630                                 sz = ALIGN_DOWN(quota->charge_addr_from -
 631                                                 r->ar.start, DAMON_MIN_REGION);
 632                                 if (!sz) {
 633                                         if (r->ar.end - r->ar.start <=
 634                                                         DAMON_MIN_REGION)
 635                                                 continue;
 636                                         sz = DAMON_MIN_REGION;
 637                                 }
 638                                 damon_split_region_at(c, t, r, sz);
 639                                 r = damon_next_region(r);
 640                                 sz = r->ar.end - r->ar.start;
 641                         }
 642                         quota->charge_target_from = NULL;
 643                         quota->charge_addr_from = 0;
 644                 }
 645
 646                 if (!damos_valid_target(c, t, r, s))
 647                         continue;
 648
 649                 /* Apply the scheme */
 650                 if (c->ops.apply_scheme) {
 651                         if (quota->esz &&
 652                                         quota->charged_sz + sz > quota->esz) {
 653                                 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
 654                                                 DAMON_MIN_REGION);
 655                                 if (!sz)
 656                                         goto update_stat;
 657                                 damon_split_region_at(c, t, r, sz);
 658                         }
 659                         ktime_get_coarse_ts64(&begin);
 660                         sz_applied = c->ops.apply_scheme(c, t, r, s);
 661                         ktime_get_coarse_ts64(&end);
 662                         quota->total_charged_ns += timespec64_to_ns(&end) -
 663                                 timespec64_to_ns(&begin);
 664                         quota->charged_sz += sz;
 665                         if (quota->esz && quota->charged_sz >= quota->esz) {
 666                                 quota->charge_target_from = t;
 667                                 quota->charge_addr_from = r->ar.end + 1;
 668                         }
 669                 }
 670                 if (s->action != DAMOS_STAT)
 671                         r->age = 0;
 672
 673 update_stat:
 674                 s->stat.nr_tried++;
 675                 s->stat.sz_tried += sz;
 676                 if (sz_applied)
 677                         s->stat.nr_applied++;
 678                 s->stat.sz_applied += sz_applied;
 679         }
 680 }
 681
 682 /* Shouldn't be called if quota->ms and quota->sz are zero */
 683 static void damos_set_effective_quota(struct damos_quota *quota)
 684 {
 685         unsigned long throughput;
 686         unsigned long esz;
 687
 688         if (!quota->ms) {
 689                 quota->esz = quota->sz;
 690                 return;
 691         }
 692
 693         if (quota->total_charged_ns)
 694                 throughput = quota->total_charged_sz * 1000000 /
 695                         quota->total_charged_ns;
 696         else
 697                 throughput = PAGE_SIZE * 1024;
 698         esz = throughput * quota->ms;
 699
 700         if (quota->sz && quota->sz < esz)
 701                 esz = quota->sz;
 702         quota->esz = esz;
 703 }
 704
 705 static void kdamond_apply_schemes(struct damon_ctx *c)
 706 {
 707         struct damon_target *t;
 708         struct damon_region *r, *next_r;
 709         struct damos *s;
 710
 711         damon_for_each_scheme(s, c) {
 712                 struct damos_quota *quota = &s->quota;
 713                 unsigned long cumulated_sz;
 714                 unsigned int score, max_score = 0;
 715
 716                 if (!s->wmarks.activated)
 717                         continue;
 718
 719                 if (!quota->ms && !quota->sz)
 720                         continue;
 721
 722                 /* New charge window starts */
 723                 if (time_after_eq(jiffies, quota->charged_from +
 724                                         msecs_to_jiffies(
 725                                                 quota->reset_interval))) {
 726                         if (quota->esz && quota->charged_sz >= quota->esz)
 727                                 s->stat.qt_exceeds++;
 728                         quota->total_charged_sz += quota->charged_sz;
 729                         quota->charged_from = jiffies;
 730                         quota->charged_sz = 0;
 731                         damos_set_effective_quota(quota);
 732                 }
 733
 734                 if (!c->ops.get_scheme_score)
 735                         continue;
 736
 737                 /* Fill up the score histogram */
 738                 memset(quota->histogram, 0, sizeof(quota->histogram));
 739                 damon_for_each_target(t, c) {
 740                         damon_for_each_region(r, t) {
 741                                 if (!__damos_valid_target(r, s))
 742                                         continue;
 743                                 score = c->ops.get_scheme_score(
 744                                                 c, t, r, s);
 745                                 quota->histogram[score] +=
 746                                         r->ar.end - r->ar.start;
 747                                 if (score > max_score)
 748                                         max_score = score;
 749                         }
 750                 }
 751
 752                 /* Set the min score limit */
 753                 for (cumulated_sz = 0, score = max_score; ; score--) {
 754                         cumulated_sz += quota->histogram[score];
 755                         if (cumulated_sz >= quota->esz || !score)
 756                                 break;
 757                 }
 758                 quota->min_score = score;
 759         }
 760
 761         damon_for_each_target(t, c) {
 762                 damon_for_each_region_safe(r, next_r, t)
 763                         damon_do_apply_schemes(c, t, r);
 764         }
 765 }
 766
 767 static inline unsigned long sz_damon_region(struct damon_region *r)
 768 {
 769         return r->ar.end - r->ar.start;
 770 }
 771
 772 /*
 773  * Merge two adjacent regions into one region
 774  */
 775 static void damon_merge_two_regions(struct damon_target *t,
 776                 struct damon_region *l, struct damon_region *r)
 777 {
 778         unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
 779
 780         l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 781                         (sz_l + sz_r);
 782         l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
 783         l->ar.end = r->ar.end;
 784         damon_destroy_region(r, t);
 785 }
 786
 787 /*
 788  * Merge adjacent regions having similar access frequencies
 789  *
 790  * t            target affected by this merge operation
 791  * thres        '->nr_accesses' diff threshold for the merge
 792  * sz_limit     size upper limit of each region
 793  */
 794 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 795                                    unsigned long sz_limit)
 796 {
 797         struct damon_region *r, *prev = NULL, *next;
 798
 799         damon_for_each_region_safe(r, next, t) {
 800                 if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
 801                         r->age = 0;
 802                 else
 803                         r->age++;
 804
 805                 if (prev && prev->ar.end == r->ar.start &&
 806                     abs(prev->nr_accesses - r->nr_accesses) <= thres &&
 807                     sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
 808                         damon_merge_two_regions(t, prev, r);
 809                 else
 810                         prev = r;
 811         }
 812 }
 813
 814 /*
 815  * Merge adjacent regions having similar access frequencies
 816  *
 817  * threshold    '->nr_accesses' diff threshold for the merge
 818  * sz_limit     size upper limit of each region
 819  *
 820  * This function merges monitoring target regions which are adjacent and their
 821  * access frequencies are similar.  This is for minimizing the monitoring
 822  * overhead under the dynamically changeable access pattern.  If a merge was
 823  * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 824  */
 825 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
 826                                   unsigned long sz_limit)
 827 {
 828         struct damon_target *t;
 829
 830         damon_for_each_target(t, c)
 831                 damon_merge_regions_of(t, threshold, sz_limit);
 832 }
 833
 834 /*
 835  * Split a region in two
 836  *
 837  * r            the region to be split
 838  * sz_r         size of the first sub-region that will be made
 839  */
 840 static void damon_split_region_at(struct damon_ctx *ctx,
 841                 struct damon_target *t, struct damon_region *r,
 842                 unsigned long sz_r)
 843 {
 844         struct damon_region *new;
 845
 846         new = damon_new_region(r->ar.start + sz_r, r->ar.end);
 847         if (!new)
 848                 return;
 849
 850         r->ar.end = new->ar.start;
 851
 852         new->age = r->age;
 853         new->last_nr_accesses = r->last_nr_accesses;
 854
 855         damon_insert_region(new, r, damon_next_region(r), t);
 856 }
 857
 858 /* Split every region in the given target into 'nr_subs' regions */
 859 static void damon_split_regions_of(struct damon_ctx *ctx,
 860                                      struct damon_target *t, int nr_subs)
 861 {
 862         struct damon_region *r, *next;
 863         unsigned long sz_region, sz_sub = 0;
 864         int i;
 865
 866         damon_for_each_region_safe(r, next, t) {
 867                 sz_region = r->ar.end - r->ar.start;
 868
 869                 for (i = 0; i < nr_subs - 1 &&
 870                                 sz_region > 2 * DAMON_MIN_REGION; i++) {
 871                         /*
 872                          * Randomly select size of left sub-region to be at
 873                          * least 10 percent and at most 90% of original region
 874                          */
 875                         sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
 876                                         sz_region / 10, DAMON_MIN_REGION);
 877                         /* Do not allow blank region */
 878                         if (sz_sub == 0 || sz_sub >= sz_region)
 879                                 continue;
 880
 881                         damon_split_region_at(ctx, t, r, sz_sub);
 882                         sz_region = sz_sub;
 883                 }
 884         }
 885 }
 886
 887 /*
 888  * Split every target region into randomly-sized small regions
 889  *
 890  * This function splits every target region into random-sized small regions if
 891  * current total number of the regions is equal or smaller than half of the
 892  * user-specified maximum number of regions.  This is for maximizing the
 893  * monitoring accuracy under the dynamically changeable access patterns.  If a
 894  * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 895  * it.
 896  */
 897 static void kdamond_split_regions(struct damon_ctx *ctx)
 898 {
 899         struct damon_target *t;
 900         unsigned int nr_regions = 0;
 901         static unsigned int last_nr_regions;
 902         int nr_subregions = 2;
 903
 904         damon_for_each_target(t, ctx)
 905                 nr_regions += damon_nr_regions(t);
 906
 907         if (nr_regions > ctx->max_nr_regions / 2)
 908                 return;
 909
 910         /* Maybe the middle of the region has different access frequency */
 911         if (last_nr_regions == nr_regions &&
 912                         nr_regions < ctx->max_nr_regions / 3)
 913                 nr_subregions = 3;
 914
 915         damon_for_each_target(t, ctx)
 916                 damon_split_regions_of(ctx, t, nr_subregions);
 917
 918         last_nr_regions = nr_regions;
 919 }
 920
 921 /*
 922  * Check whether it is time to check and apply the operations-related data
 923  * structures.
 924  *
 925  * Returns true if it is.
 926  */
 927 static bool kdamond_need_update_operations(struct damon_ctx *ctx)
 928 {
 929         return damon_check_reset_time_interval(&ctx->last_ops_update,
 930                         ctx->ops_update_interval);
 931 }
 932
 933 /*
 934  * Check whether current monitoring should be stopped
 935  *
 936  * The monitoring is stopped when either the user requested to stop, or all
 937  * monitoring targets are invalid.
 938  *
 939  * Returns true if need to stop current monitoring.
 940  */
 941 static bool kdamond_need_stop(struct damon_ctx *ctx)
 942 {
 943         struct damon_target *t;
 944
 945         if (kthread_should_stop())
 946                 return true;
 947
 948         if (!ctx->ops.target_valid)
 949                 return false;
 950
 951         damon_for_each_target(t, ctx) {
 952                 if (ctx->ops.target_valid(t))
 953                         return false;
 954         }
 955
 956         return true;
 957 }
 958
 959 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
 960 {
 961         struct sysinfo i;
 962
 963         switch (metric) {
 964         case DAMOS_WMARK_FREE_MEM_RATE:
 965                 si_meminfo(&i);
 966                 return i.freeram * 1000 / i.totalram;
 967         default:
 968                 break;
 969         }
 970         return -EINVAL;
 971 }
 972
 973 /*
 974  * Returns zero if the scheme is active.  Else, returns time to wait for next
 975  * watermark check in micro-seconds.
 976  */
 977 static unsigned long damos_wmark_wait_us(struct damos *scheme)
 978 {
 979         unsigned long metric;
 980
 981         if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
 982                 return 0;
 983
 984         metric = damos_wmark_metric_value(scheme->wmarks.metric);
 985         /* higher than high watermark or lower than low watermark */
 986         if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
 987                 if (scheme->wmarks.activated)
 988                         pr_debug("deactivate a scheme (%d) for %s wmark\n",
 989                                         scheme->action,
 990                                         metric > scheme->wmarks.high ?
 991                                         "high" : "low");
 992                 scheme->wmarks.activated = false;
 993                 return scheme->wmarks.interval;
 994         }
 995
 996         /* inactive and higher than middle watermark */
 997         if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
 998                         !scheme->wmarks.activated)
 999                 return scheme->wmarks.interval;
1000
1001         if (!scheme->wmarks.activated)
1002                 pr_debug("activate a scheme (%d)\n", scheme->action);
1003         scheme->wmarks.activated = true;
1004         return 0;
1005 }
1006
1007 static void kdamond_usleep(unsigned long usecs)
1008 {
1009         /* See Documentation/timers/timers-howto.rst for the thresholds */
1010         if (usecs > 20 * USEC_PER_MSEC)
1011                 schedule_timeout_idle(usecs_to_jiffies(usecs));
1012         else
1013                 usleep_idle_range(usecs, usecs + 1);
1014 }
1015
1016 /* Returns negative error code if it's not activated but should return */
1017 static int kdamond_wait_activation(struct damon_ctx *ctx)
1018 {
1019         struct damos *s;
1020         unsigned long wait_time;
1021         unsigned long min_wait_time = 0;
1022         bool init_wait_time = false;
1023
1024         while (!kdamond_need_stop(ctx)) {
1025                 damon_for_each_scheme(s, ctx) {
1026                         wait_time = damos_wmark_wait_us(s);
1027                         if (!init_wait_time || wait_time < min_wait_time) {
1028                                 init_wait_time = true;
1029                                 min_wait_time = wait_time;
1030                         }
1031                 }
1032                 if (!min_wait_time)
1033                         return 0;
1034
1035                 kdamond_usleep(min_wait_time);
1036         }
1037         return -EBUSY;
1038 }
1039
1040 /*
1041  * The monitoring daemon that runs as a kernel thread
1042  */
1043 static int kdamond_fn(void *data)
1044 {
1045         struct damon_ctx *ctx = (struct damon_ctx *)data;
1046         struct damon_target *t;
1047         struct damon_region *r, *next;
1048         unsigned int max_nr_accesses = 0;
1049         unsigned long sz_limit = 0;
1050         bool done = false;
1051
1052         pr_debug("kdamond (%d) starts\n", current->pid);
1053
1054         if (ctx->ops.init)
1055                 ctx->ops.init(ctx);
1056         if (ctx->callback.before_start && ctx->callback.before_start(ctx))
1057                 done = true;
1058
1059         sz_limit = damon_region_sz_limit(ctx);
1060
1061         while (!kdamond_need_stop(ctx) && !done) {
1062                 if (kdamond_wait_activation(ctx))
1063                         continue;
1064
1065                 if (ctx->ops.prepare_access_checks)
1066                         ctx->ops.prepare_access_checks(ctx);
1067                 if (ctx->callback.after_sampling &&
1068                                 ctx->callback.after_sampling(ctx))
1069                         done = true;
1070
1071                 kdamond_usleep(ctx->sample_interval);
1072
1073                 if (ctx->ops.check_accesses)
1074                         max_nr_accesses = ctx->ops.check_accesses(ctx);
1075
1076                 if (kdamond_aggregate_interval_passed(ctx)) {
1077                         kdamond_merge_regions(ctx,
1078                                         max_nr_accesses / 10,
1079                                         sz_limit);
1080                         if (ctx->callback.after_aggregation &&
1081                                         ctx->callback.after_aggregation(ctx))
1082                                 done = true;
1083                         kdamond_apply_schemes(ctx);
1084                         kdamond_reset_aggregated(ctx);
1085                         kdamond_split_regions(ctx);
1086                         if (ctx->ops.reset_aggregated)
1087                                 ctx->ops.reset_aggregated(ctx);
1088                 }
1089
1090                 if (kdamond_need_update_operations(ctx)) {
1091                         if (ctx->ops.update)
1092                                 ctx->ops.update(ctx);
1093                         sz_limit = damon_region_sz_limit(ctx);
1094                 }
1095         }
1096         damon_for_each_target(t, ctx) {
1097                 damon_for_each_region_safe(r, next, t)
1098                         damon_destroy_region(r, t);
1099         }
1100
1101         if (ctx->callback.before_terminate)
1102                 ctx->callback.before_terminate(ctx);
1103         if (ctx->ops.cleanup)
1104                 ctx->ops.cleanup(ctx);
1105
1106         pr_debug("kdamond (%d) finishes\n", current->pid);
1107         mutex_lock(&ctx->kdamond_lock);
1108         ctx->kdamond = NULL;
1109         mutex_unlock(&ctx->kdamond_lock);
1110
1111         mutex_lock(&damon_lock);
1112         nr_running_ctxs--;
1113         if (!nr_running_ctxs && running_exclusive_ctxs)
1114                 running_exclusive_ctxs = false;
1115         mutex_unlock(&damon_lock);
1116
1117         return 0;
1118 }
1119
1120 #include "core-test.h"