mm/damon/core.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Data Access Monitor
   4  *
   5  * Author: SeongJae Park <sjpark@amazon.de>
   6  */
   7
   8 #define pr_fmt(fmt) "damon: " fmt
   9
  10 #include <linux/damon.h>
  11 #include <linux/delay.h>
  12 #include <linux/kthread.h>
  13 #include <linux/mm.h>
  14 #include <linux/slab.h>
  15 #include <linux/string.h>
  16
  17 #define CREATE_TRACE_POINTS
  18 #include <trace/events/damon.h>
  19
  20 #ifdef CONFIG_DAMON_KUNIT_TEST
  21 #undef DAMON_MIN_REGION
  22 #define DAMON_MIN_REGION 1
  23 #endif
  24
  25 static DEFINE_MUTEX(damon_lock);
  26 static int nr_running_ctxs;
  27
  28 /*
  29  * Construct a damon_region struct
  30  *
  31  * Returns the pointer to the new struct if success, or NULL otherwise
  32  */
  33 struct damon_region *damon_new_region(unsigned long start, unsigned long end)
  34 {
  35         struct damon_region *region;
  36
  37         region = kmalloc(sizeof(*region), GFP_KERNEL);
  38         if (!region)
  39                 return NULL;
  40
  41         region->ar.start = start;
  42         region->ar.end = end;
  43         region->nr_accesses = 0;
  44         INIT_LIST_HEAD(&region->list);
  45
  46         region->age = 0;
  47         region->last_nr_accesses = 0;
  48
  49         return region;
  50 }
  51
  52 void damon_add_region(struct damon_region *r, struct damon_target *t)
  53 {
  54         list_add_tail(&r->list, &t->regions_list);
  55         t->nr_regions++;
  56 }
  57
  58 static void damon_del_region(struct damon_region *r, struct damon_target *t)
  59 {
  60         list_del(&r->list);
  61         t->nr_regions--;
  62 }
  63
  64 static void damon_free_region(struct damon_region *r)
  65 {
  66         kfree(r);
  67 }
  68
  69 void damon_destroy_region(struct damon_region *r, struct damon_target *t)
  70 {
  71         damon_del_region(r, t);
  72         damon_free_region(r);
  73 }
  74
  75 struct damos *damon_new_scheme(
  76                 unsigned long min_sz_region, unsigned long max_sz_region,
  77                 unsigned int min_nr_accesses, unsigned int max_nr_accesses,
  78                 unsigned int min_age_region, unsigned int max_age_region,
  79                 enum damos_action action, struct damos_quota *quota,
  80                 struct damos_watermarks *wmarks)
  81 {
  82         struct damos *scheme;
  83
  84         scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
  85         if (!scheme)
  86                 return NULL;
  87         scheme->min_sz_region = min_sz_region;
  88         scheme->max_sz_region = max_sz_region;
  89         scheme->min_nr_accesses = min_nr_accesses;
  90         scheme->max_nr_accesses = max_nr_accesses;
  91         scheme->min_age_region = min_age_region;
  92         scheme->max_age_region = max_age_region;
  93         scheme->action = action;
  94         scheme->stat = (struct damos_stat){};
  95         INIT_LIST_HEAD(&scheme->list);
  96
  97         scheme->quota.ms = quota->ms;
  98         scheme->quota.sz = quota->sz;
  99         scheme->quota.reset_interval = quota->reset_interval;
 100         scheme->quota.weight_sz = quota->weight_sz;
 101         scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
 102         scheme->quota.weight_age = quota->weight_age;
 103         scheme->quota.total_charged_sz = 0;
 104         scheme->quota.total_charged_ns = 0;
 105         scheme->quota.esz = 0;
 106         scheme->quota.charged_sz = 0;
 107         scheme->quota.charged_from = 0;
 108         scheme->quota.charge_target_from = NULL;
 109         scheme->quota.charge_addr_from = 0;
 110
 111         scheme->wmarks.metric = wmarks->metric;
 112         scheme->wmarks.interval = wmarks->interval;
 113         scheme->wmarks.high = wmarks->high;
 114         scheme->wmarks.mid = wmarks->mid;
 115         scheme->wmarks.low = wmarks->low;
 116         scheme->wmarks.activated = true;
 117
 118         return scheme;
 119 }
 120
 121 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
 122 {
 123         list_add_tail(&s->list, &ctx->schemes);
 124 }
 125
 126 static void damon_del_scheme(struct damos *s)
 127 {
 128         list_del(&s->list);
 129 }
 130
 131 static void damon_free_scheme(struct damos *s)
 132 {
 133         kfree(s);
 134 }
 135
 136 void damon_destroy_scheme(struct damos *s)
 137 {
 138         damon_del_scheme(s);
 139         damon_free_scheme(s);
 140 }
 141
 142 /*
 143  * Construct a damon_target struct
 144  *
 145  * Returns the pointer to the new struct if success, or NULL otherwise
 146  */
 147 struct damon_target *damon_new_target(unsigned long id)
 148 {
 149         struct damon_target *t;
 150
 151         t = kmalloc(sizeof(*t), GFP_KERNEL);
 152         if (!t)
 153                 return NULL;
 154
 155         t->id = id;
 156         t->nr_regions = 0;
 157         INIT_LIST_HEAD(&t->regions_list);
 158
 159         return t;
 160 }
 161
 162 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 163 {
 164         list_add_tail(&t->list, &ctx->adaptive_targets);
 165 }
 166
 167 bool damon_targets_empty(struct damon_ctx *ctx)
 168 {
 169         return list_empty(&ctx->adaptive_targets);
 170 }
 171
 172 static void damon_del_target(struct damon_target *t)
 173 {
 174         list_del(&t->list);
 175 }
 176
 177 void damon_free_target(struct damon_target *t)
 178 {
 179         struct damon_region *r, *next;
 180
 181         damon_for_each_region_safe(r, next, t)
 182                 damon_free_region(r);
 183         kfree(t);
 184 }
 185
 186 void damon_destroy_target(struct damon_target *t)
 187 {
 188         damon_del_target(t);
 189         damon_free_target(t);
 190 }
 191
 192 unsigned int damon_nr_regions(struct damon_target *t)
 193 {
 194         return t->nr_regions;
 195 }
 196
 197 struct damon_ctx *damon_new_ctx(void)
 198 {
 199         struct damon_ctx *ctx;
 200
 201         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 202         if (!ctx)
 203                 return NULL;
 204
 205         ctx->sample_interval = 5 * 1000;
 206         ctx->aggr_interval = 100 * 1000;
 207         ctx->primitive_update_interval = 60 * 1000 * 1000;
 208
 209         ktime_get_coarse_ts64(&ctx->last_aggregation);
 210         ctx->last_primitive_update = ctx->last_aggregation;
 211
 212         mutex_init(&ctx->kdamond_lock);
 213
 214         ctx->min_nr_regions = 10;
 215         ctx->max_nr_regions = 1000;
 216
 217         INIT_LIST_HEAD(&ctx->adaptive_targets);
 218         INIT_LIST_HEAD(&ctx->schemes);
 219
 220         return ctx;
 221 }
 222
 223 static void damon_destroy_targets(struct damon_ctx *ctx)
 224 {
 225         struct damon_target *t, *next_t;
 226
 227         if (ctx->primitive.cleanup) {
 228                 ctx->primitive.cleanup(ctx);
 229                 return;
 230         }
 231
 232         damon_for_each_target_safe(t, next_t, ctx)
 233                 damon_destroy_target(t);
 234 }
 235
 236 void damon_destroy_ctx(struct damon_ctx *ctx)
 237 {
 238         struct damos *s, *next_s;
 239
 240         damon_destroy_targets(ctx);
 241
 242         damon_for_each_scheme_safe(s, next_s, ctx)
 243                 damon_destroy_scheme(s);
 244
 245         kfree(ctx);
 246 }
 247
 248 /**
 249  * damon_set_targets() - Set monitoring targets.
 250  * @ctx:        monitoring context
 251  * @ids:        array of target ids
 252  * @nr_ids:     number of entries in @ids
 253  *
 254  * This function should not be called while the kdamond is running.
 255  *
 256  * Return: 0 on success, negative error code otherwise.
 257  */
 258 int damon_set_targets(struct damon_ctx *ctx,
 259                       unsigned long *ids, ssize_t nr_ids)
 260 {
 261         ssize_t i;
 262         struct damon_target *t, *next;
 263
 264         damon_destroy_targets(ctx);
 265
 266         for (i = 0; i < nr_ids; i++) {
 267                 t = damon_new_target(ids[i]);
 268                 if (!t) {
 269                         /* The caller should do cleanup of the ids itself */
 270                         damon_for_each_target_safe(t, next, ctx)
 271                                 damon_destroy_target(t);
 272                         return -ENOMEM;
 273                 }
 274                 damon_add_target(ctx, t);
 275         }
 276
 277         return 0;
 278 }
 279
 280 /**
 281  * damon_set_attrs() - Set attributes for the monitoring.
 282  * @ctx:                monitoring context
 283  * @sample_int:         time interval between samplings
 284  * @aggr_int:           time interval between aggregations
 285  * @primitive_upd_int:  time interval between monitoring primitive updates
 286  * @min_nr_reg:         minimal number of regions
 287  * @max_nr_reg:         maximum number of regions
 288  *
 289  * This function should not be called while the kdamond is running.
 290  * Every time interval is in micro-seconds.
 291  *
 292  * Return: 0 on success, negative error code otherwise.
 293  */
 294 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 295                     unsigned long aggr_int, unsigned long primitive_upd_int,
 296                     unsigned long min_nr_reg, unsigned long max_nr_reg)
 297 {
 298         if (min_nr_reg < 3)
 299                 return -EINVAL;
 300         if (min_nr_reg > max_nr_reg)
 301                 return -EINVAL;
 302
 303         ctx->sample_interval = sample_int;
 304         ctx->aggr_interval = aggr_int;
 305         ctx->primitive_update_interval = primitive_upd_int;
 306         ctx->min_nr_regions = min_nr_reg;
 307         ctx->max_nr_regions = max_nr_reg;
 308
 309         return 0;
 310 }
 311
 312 /**
 313  * damon_set_schemes() - Set data access monitoring based operation schemes.
 314  * @ctx:        monitoring context
 315  * @schemes:    array of the schemes
 316  * @nr_schemes: number of entries in @schemes
 317  *
 318  * This function should not be called while the kdamond of the context is
 319  * running.
 320  *
 321  * Return: 0 if success, or negative error code otherwise.
 322  */
 323 int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
 324                         ssize_t nr_schemes)
 325 {
 326         struct damos *s, *next;
 327         ssize_t i;
 328
 329         damon_for_each_scheme_safe(s, next, ctx)
 330                 damon_destroy_scheme(s);
 331         for (i = 0; i < nr_schemes; i++)
 332                 damon_add_scheme(ctx, schemes[i]);
 333         return 0;
 334 }
 335
 336 /**
 337  * damon_nr_running_ctxs() - Return number of currently running contexts.
 338  */
 339 int damon_nr_running_ctxs(void)
 340 {
 341         int nr_ctxs;
 342
 343         mutex_lock(&damon_lock);
 344         nr_ctxs = nr_running_ctxs;
 345         mutex_unlock(&damon_lock);
 346
 347         return nr_ctxs;
 348 }
 349
 350 /* Returns the size upper limit for each monitoring region */
 351 static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 352 {
 353         struct damon_target *t;
 354         struct damon_region *r;
 355         unsigned long sz = 0;
 356
 357         damon_for_each_target(t, ctx) {
 358                 damon_for_each_region(r, t)
 359                         sz += r->ar.end - r->ar.start;
 360         }
 361
 362         if (ctx->min_nr_regions)
 363                 sz /= ctx->min_nr_regions;
 364         if (sz < DAMON_MIN_REGION)
 365                 sz = DAMON_MIN_REGION;
 366
 367         return sz;
 368 }
 369
 370 static int kdamond_fn(void *data);
 371
 372 /*
 373  * __damon_start() - Starts monitoring with given context.
 374  * @ctx:        monitoring context
 375  *
 376  * This function should be called while damon_lock is hold.
 377  *
 378  * Return: 0 on success, negative error code otherwise.
 379  */
 380 static int __damon_start(struct damon_ctx *ctx)
 381 {
 382         int err = -EBUSY;
 383
 384         mutex_lock(&ctx->kdamond_lock);
 385         if (!ctx->kdamond) {
 386                 err = 0;
 387                 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 388                                 nr_running_ctxs);
 389                 if (IS_ERR(ctx->kdamond)) {
 390                         err = PTR_ERR(ctx->kdamond);
 391                         ctx->kdamond = NULL;
 392                 }
 393         }
 394         mutex_unlock(&ctx->kdamond_lock);
 395
 396         return err;
 397 }
 398
 399 /**
 400  * damon_start() - Starts the monitorings for a given group of contexts.
 401  * @ctxs:       an array of the pointers for contexts to start monitoring
 402  * @nr_ctxs:    size of @ctxs
 403  *
 404  * This function starts a group of monitoring threads for a group of monitoring
 405  * contexts.  One thread per each context is created and run in parallel.  The
 406  * caller should handle synchronization between the threads by itself.  If a
 407  * group of threads that created by other 'damon_start()' call is currently
 408  * running, this function does nothing but returns -EBUSY.
 409  *
 410  * Return: 0 on success, negative error code otherwise.
 411  */
 412 int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
 413 {
 414         int i;
 415         int err = 0;
 416
 417         mutex_lock(&damon_lock);
 418         if (nr_running_ctxs) {
 419                 mutex_unlock(&damon_lock);
 420                 return -EBUSY;
 421         }
 422
 423         for (i = 0; i < nr_ctxs; i++) {
 424                 err = __damon_start(ctxs[i]);
 425                 if (err)
 426                         break;
 427                 nr_running_ctxs++;
 428         }
 429         mutex_unlock(&damon_lock);
 430
 431         return err;
 432 }
 433
 434 /*
 435  * __damon_stop() - Stops monitoring of given context.
 436  * @ctx:        monitoring context
 437  *
 438  * Return: 0 on success, negative error code otherwise.
 439  */
 440 static int __damon_stop(struct damon_ctx *ctx)
 441 {
 442         struct task_struct *tsk;
 443
 444         mutex_lock(&ctx->kdamond_lock);
 445         tsk = ctx->kdamond;
 446         if (tsk) {
 447                 get_task_struct(tsk);
 448                 mutex_unlock(&ctx->kdamond_lock);
 449                 kthread_stop(tsk);
 450                 put_task_struct(tsk);
 451                 return 0;
 452         }
 453         mutex_unlock(&ctx->kdamond_lock);
 454
 455         return -EPERM;
 456 }
 457
 458 /**
 459  * damon_stop() - Stops the monitorings for a given group of contexts.
 460  * @ctxs:       an array of the pointers for contexts to stop monitoring
 461  * @nr_ctxs:    size of @ctxs
 462  *
 463  * Return: 0 on success, negative error code otherwise.
 464  */
 465 int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
 466 {
 467         int i, err = 0;
 468
 469         for (i = 0; i < nr_ctxs; i++) {
 470                 /* nr_running_ctxs is decremented in kdamond_fn */
 471                 err = __damon_stop(ctxs[i]);
 472                 if (err)
 473                         return err;
 474         }
 475
 476         return err;
 477 }
 478
 479 /*
 480  * damon_check_reset_time_interval() - Check if a time interval is elapsed.
 481  * @baseline:   the time to check whether the interval has elapsed since
 482  * @interval:   the time interval (microseconds)
 483  *
 484  * See whether the given time interval has passed since the given baseline
 485  * time.  If so, it also updates the baseline to current time for next check.
 486  *
 487  * Return:      true if the time interval has passed, or false otherwise.
 488  */
 489 static bool damon_check_reset_time_interval(struct timespec64 *baseline,
 490                 unsigned long interval)
 491 {
 492         struct timespec64 now;
 493
 494         ktime_get_coarse_ts64(&now);
 495         if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
 496                         interval * 1000)
 497                 return false;
 498         *baseline = now;
 499         return true;
 500 }
 501
 502 /*
 503  * Check whether it is time to flush the aggregated information
 504  */
 505 static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
 506 {
 507         return damon_check_reset_time_interval(&ctx->last_aggregation,
 508                         ctx->aggr_interval);
 509 }
 510
 511 /*
 512  * Reset the aggregated monitoring results ('nr_accesses' of each region).
 513  */
 514 static void kdamond_reset_aggregated(struct damon_ctx *c)
 515 {
 516         struct damon_target *t;
 517         unsigned int ti = 0;    /* target's index */
 518
 519         damon_for_each_target(t, c) {
 520                 struct damon_region *r;
 521
 522                 damon_for_each_region(r, t) {
 523                         trace_damon_aggregated(t, ti, r, damon_nr_regions(t));
 524                         r->last_nr_accesses = r->nr_accesses;
 525                         r->nr_accesses = 0;
 526                 }
 527                 ti++;
 528         }
 529 }
 530
 531 static void damon_split_region_at(struct damon_ctx *ctx,
 532                 struct damon_target *t, struct damon_region *r,
 533                 unsigned long sz_r);
 534
 535 static bool __damos_valid_target(struct damon_region *r, struct damos *s)
 536 {
 537         unsigned long sz;
 538
 539         sz = r->ar.end - r->ar.start;
 540         return s->min_sz_region <= sz && sz <= s->max_sz_region &&
 541                 s->min_nr_accesses <= r->nr_accesses &&
 542                 r->nr_accesses <= s->max_nr_accesses &&
 543                 s->min_age_region <= r->age && r->age <= s->max_age_region;
 544 }
 545
 546 static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
 547                 struct damon_region *r, struct damos *s)
 548 {
 549         bool ret = __damos_valid_target(r, s);
 550
 551         if (!ret || !s->quota.esz || !c->primitive.get_scheme_score)
 552                 return ret;
 553
 554         return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score;
 555 }
 556
 557 static void damon_do_apply_schemes(struct damon_ctx *c,
 558                                    struct damon_target *t,
 559                                    struct damon_region *r)
 560 {
 561         struct damos *s;
 562
 563         damon_for_each_scheme(s, c) {
 564                 struct damos_quota *quota = &s->quota;
 565                 unsigned long sz = r->ar.end - r->ar.start;
 566                 struct timespec64 begin, end;
 567                 unsigned long sz_applied = 0;
 568
 569                 if (!s->wmarks.activated)
 570                         continue;
 571
 572                 /* Check the quota */
 573                 if (quota->esz && quota->charged_sz >= quota->esz)
 574                         continue;
 575
 576                 /* Skip previously charged regions */
 577                 if (quota->charge_target_from) {
 578                         if (t != quota->charge_target_from)
 579                                 continue;
 580                         if (r == damon_last_region(t)) {
 581                                 quota->charge_target_from = NULL;
 582                                 quota->charge_addr_from = 0;
 583                                 continue;
 584                         }
 585                         if (quota->charge_addr_from &&
 586                                         r->ar.end <= quota->charge_addr_from)
 587                                 continue;
 588
 589                         if (quota->charge_addr_from && r->ar.start <
 590                                         quota->charge_addr_from) {
 591                                 sz = ALIGN_DOWN(quota->charge_addr_from -
 592                                                 r->ar.start, DAMON_MIN_REGION);
 593                                 if (!sz) {
 594                                         if (r->ar.end - r->ar.start <=
 595                                                         DAMON_MIN_REGION)
 596                                                 continue;
 597                                         sz = DAMON_MIN_REGION;
 598                                 }
 599                                 damon_split_region_at(c, t, r, sz);
 600                                 r = damon_next_region(r);
 601                                 sz = r->ar.end - r->ar.start;
 602                         }
 603                         quota->charge_target_from = NULL;
 604                         quota->charge_addr_from = 0;
 605                 }
 606
 607                 if (!damos_valid_target(c, t, r, s))
 608                         continue;
 609
 610                 /* Apply the scheme */
 611                 if (c->primitive.apply_scheme) {
 612                         if (quota->esz &&
 613                                         quota->charged_sz + sz > quota->esz) {
 614                                 sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
 615                                                 DAMON_MIN_REGION);
 616                                 if (!sz)
 617                                         goto update_stat;
 618                                 damon_split_region_at(c, t, r, sz);
 619                         }
 620                         ktime_get_coarse_ts64(&begin);
 621                         sz_applied = c->primitive.apply_scheme(c, t, r, s);
 622                         ktime_get_coarse_ts64(&end);
 623                         quota->total_charged_ns += timespec64_to_ns(&end) -
 624                                 timespec64_to_ns(&begin);
 625                         quota->charged_sz += sz;
 626                         if (quota->esz && quota->charged_sz >= quota->esz) {
 627                                 quota->charge_target_from = t;
 628                                 quota->charge_addr_from = r->ar.end + 1;
 629                         }
 630                 }
 631                 if (s->action != DAMOS_STAT)
 632                         r->age = 0;
 633
 634 update_stat:
 635                 s->stat.nr_tried++;
 636                 s->stat.sz_tried += sz;
 637                 if (sz_applied)
 638                         s->stat.nr_applied++;
 639                 s->stat.sz_applied += sz_applied;
 640         }
 641 }
 642
 643 /* Shouldn't be called if quota->ms and quota->sz are zero */
 644 static void damos_set_effective_quota(struct damos_quota *quota)
 645 {
 646         unsigned long throughput;
 647         unsigned long esz;
 648
 649         if (!quota->ms) {
 650                 quota->esz = quota->sz;
 651                 return;
 652         }
 653
 654         if (quota->total_charged_ns)
 655                 throughput = quota->total_charged_sz * 1000000 /
 656                         quota->total_charged_ns;
 657         else
 658                 throughput = PAGE_SIZE * 1024;
 659         esz = throughput * quota->ms;
 660
 661         if (quota->sz && quota->sz < esz)
 662                 esz = quota->sz;
 663         quota->esz = esz;
 664 }
 665
 666 static void kdamond_apply_schemes(struct damon_ctx *c)
 667 {
 668         struct damon_target *t;
 669         struct damon_region *r, *next_r;
 670         struct damos *s;
 671
 672         damon_for_each_scheme(s, c) {
 673                 struct damos_quota *quota = &s->quota;
 674                 unsigned long cumulated_sz;
 675                 unsigned int score, max_score = 0;
 676
 677                 if (!s->wmarks.activated)
 678                         continue;
 679
 680                 if (!quota->ms && !quota->sz)
 681                         continue;
 682
 683                 /* New charge window starts */
 684                 if (time_after_eq(jiffies, quota->charged_from +
 685                                         msecs_to_jiffies(
 686                                                 quota->reset_interval))) {
 687                         if (quota->esz && quota->charged_sz >= quota->esz)
 688                                 s->stat.qt_exceeds++;
 689                         quota->total_charged_sz += quota->charged_sz;
 690                         quota->charged_from = jiffies;
 691                         quota->charged_sz = 0;
 692                         damos_set_effective_quota(quota);
 693                 }
 694
 695                 if (!c->primitive.get_scheme_score)
 696                         continue;
 697
 698                 /* Fill up the score histogram */
 699                 memset(quota->histogram, 0, sizeof(quota->histogram));
 700                 damon_for_each_target(t, c) {
 701                         damon_for_each_region(r, t) {
 702                                 if (!__damos_valid_target(r, s))
 703                                         continue;
 704                                 score = c->primitive.get_scheme_score(
 705                                                 c, t, r, s);
 706                                 quota->histogram[score] +=
 707                                         r->ar.end - r->ar.start;
 708                                 if (score > max_score)
 709                                         max_score = score;
 710                         }
 711                 }
 712
 713                 /* Set the min score limit */
 714                 for (cumulated_sz = 0, score = max_score; ; score--) {
 715                         cumulated_sz += quota->histogram[score];
 716                         if (cumulated_sz >= quota->esz || !score)
 717                                 break;
 718                 }
 719                 quota->min_score = score;
 720         }
 721
 722         damon_for_each_target(t, c) {
 723                 damon_for_each_region_safe(r, next_r, t)
 724                         damon_do_apply_schemes(c, t, r);
 725         }
 726 }
 727
 728 static inline unsigned long sz_damon_region(struct damon_region *r)
 729 {
 730         return r->ar.end - r->ar.start;
 731 }
 732
 733 /*
 734  * Merge two adjacent regions into one region
 735  */
 736 static void damon_merge_two_regions(struct damon_target *t,
 737                 struct damon_region *l, struct damon_region *r)
 738 {
 739         unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r);
 740
 741         l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 742                         (sz_l + sz_r);
 743         l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
 744         l->ar.end = r->ar.end;
 745         damon_destroy_region(r, t);
 746 }
 747
 748 /*
 749  * Merge adjacent regions having similar access frequencies
 750  *
 751  * t            target affected by this merge operation
 752  * thres        '->nr_accesses' diff threshold for the merge
 753  * sz_limit     size upper limit of each region
 754  */
 755 static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 756                                    unsigned long sz_limit)
 757 {
 758         struct damon_region *r, *prev = NULL, *next;
 759
 760         damon_for_each_region_safe(r, next, t) {
 761                 if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
 762                         r->age = 0;
 763                 else
 764                         r->age++;
 765
 766                 if (prev && prev->ar.end == r->ar.start &&
 767                     abs(prev->nr_accesses - r->nr_accesses) <= thres &&
 768                     sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
 769                         damon_merge_two_regions(t, prev, r);
 770                 else
 771                         prev = r;
 772         }
 773 }
 774
 775 /*
 776  * Merge adjacent regions having similar access frequencies
 777  *
 778  * threshold    '->nr_accesses' diff threshold for the merge
 779  * sz_limit     size upper limit of each region
 780  *
 781  * This function merges monitoring target regions which are adjacent and their
 782  * access frequencies are similar.  This is for minimizing the monitoring
 783  * overhead under the dynamically changeable access pattern.  If a merge was
 784  * unnecessarily made, later 'kdamond_split_regions()' will revert it.
 785  */
 786 static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
 787                                   unsigned long sz_limit)
 788 {
 789         struct damon_target *t;
 790
 791         damon_for_each_target(t, c)
 792                 damon_merge_regions_of(t, threshold, sz_limit);
 793 }
 794
 795 /*
 796  * Split a region in two
 797  *
 798  * r            the region to be split
 799  * sz_r         size of the first sub-region that will be made
 800  */
 801 static void damon_split_region_at(struct damon_ctx *ctx,
 802                 struct damon_target *t, struct damon_region *r,
 803                 unsigned long sz_r)
 804 {
 805         struct damon_region *new;
 806
 807         new = damon_new_region(r->ar.start + sz_r, r->ar.end);
 808         if (!new)
 809                 return;
 810
 811         r->ar.end = new->ar.start;
 812
 813         new->age = r->age;
 814         new->last_nr_accesses = r->last_nr_accesses;
 815
 816         damon_insert_region(new, r, damon_next_region(r), t);
 817 }
 818
 819 /* Split every region in the given target into 'nr_subs' regions */
 820 static void damon_split_regions_of(struct damon_ctx *ctx,
 821                                      struct damon_target *t, int nr_subs)
 822 {
 823         struct damon_region *r, *next;
 824         unsigned long sz_region, sz_sub = 0;
 825         int i;
 826
 827         damon_for_each_region_safe(r, next, t) {
 828                 sz_region = r->ar.end - r->ar.start;
 829
 830                 for (i = 0; i < nr_subs - 1 &&
 831                                 sz_region > 2 * DAMON_MIN_REGION; i++) {
 832                         /*
 833                          * Randomly select size of left sub-region to be at
 834                          * least 10 percent and at most 90% of original region
 835                          */
 836                         sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
 837                                         sz_region / 10, DAMON_MIN_REGION);
 838                         /* Do not allow blank region */
 839                         if (sz_sub == 0 || sz_sub >= sz_region)
 840                                 continue;
 841
 842                         damon_split_region_at(ctx, t, r, sz_sub);
 843                         sz_region = sz_sub;
 844                 }
 845         }
 846 }
 847
 848 /*
 849  * Split every target region into randomly-sized small regions
 850  *
 851  * This function splits every target region into random-sized small regions if
 852  * current total number of the regions is equal or smaller than half of the
 853  * user-specified maximum number of regions.  This is for maximizing the
 854  * monitoring accuracy under the dynamically changeable access patterns.  If a
 855  * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
 856  * it.
 857  */
 858 static void kdamond_split_regions(struct damon_ctx *ctx)
 859 {
 860         struct damon_target *t;
 861         unsigned int nr_regions = 0;
 862         static unsigned int last_nr_regions;
 863         int nr_subregions = 2;
 864
 865         damon_for_each_target(t, ctx)
 866                 nr_regions += damon_nr_regions(t);
 867
 868         if (nr_regions > ctx->max_nr_regions / 2)
 869                 return;
 870
 871         /* Maybe the middle of the region has different access frequency */
 872         if (last_nr_regions == nr_regions &&
 873                         nr_regions < ctx->max_nr_regions / 3)
 874                 nr_subregions = 3;
 875
 876         damon_for_each_target(t, ctx)
 877                 damon_split_regions_of(ctx, t, nr_subregions);
 878
 879         last_nr_regions = nr_regions;
 880 }
 881
 882 /*
 883  * Check whether it is time to check and apply the target monitoring regions
 884  *
 885  * Returns true if it is.
 886  */
 887 static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
 888 {
 889         return damon_check_reset_time_interval(&ctx->last_primitive_update,
 890                         ctx->primitive_update_interval);
 891 }
 892
 893 /*
 894  * Check whether current monitoring should be stopped
 895  *
 896  * The monitoring is stopped when either the user requested to stop, or all
 897  * monitoring targets are invalid.
 898  *
 899  * Returns true if need to stop current monitoring.
 900  */
 901 static bool kdamond_need_stop(struct damon_ctx *ctx)
 902 {
 903         struct damon_target *t;
 904
 905         if (kthread_should_stop())
 906                 return true;
 907
 908         if (!ctx->primitive.target_valid)
 909                 return false;
 910
 911         damon_for_each_target(t, ctx) {
 912                 if (ctx->primitive.target_valid(t))
 913                         return false;
 914         }
 915
 916         return true;
 917 }
 918
 919 static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
 920 {
 921         struct sysinfo i;
 922
 923         switch (metric) {
 924         case DAMOS_WMARK_FREE_MEM_RATE:
 925                 si_meminfo(&i);
 926                 return i.freeram * 1000 / i.totalram;
 927         default:
 928                 break;
 929         }
 930         return -EINVAL;
 931 }
 932
 933 /*
 934  * Returns zero if the scheme is active.  Else, returns time to wait for next
 935  * watermark check in micro-seconds.
 936  */
 937 static unsigned long damos_wmark_wait_us(struct damos *scheme)
 938 {
 939         unsigned long metric;
 940
 941         if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
 942                 return 0;
 943
 944         metric = damos_wmark_metric_value(scheme->wmarks.metric);
 945         /* higher than high watermark or lower than low watermark */
 946         if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
 947                 if (scheme->wmarks.activated)
 948                         pr_debug("deactivate a scheme (%d) for %s wmark\n",
 949                                         scheme->action,
 950                                         metric > scheme->wmarks.high ?
 951                                         "high" : "low");
 952                 scheme->wmarks.activated = false;
 953                 return scheme->wmarks.interval;
 954         }
 955
 956         /* inactive and higher than middle watermark */
 957         if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
 958                         !scheme->wmarks.activated)
 959                 return scheme->wmarks.interval;
 960
 961         if (!scheme->wmarks.activated)
 962                 pr_debug("activate a scheme (%d)\n", scheme->action);
 963         scheme->wmarks.activated = true;
 964         return 0;
 965 }
 966
 967 static void kdamond_usleep(unsigned long usecs)
 968 {
 969         /* See Documentation/timers/timers-howto.rst for the thresholds */
 970         if (usecs > 20 * USEC_PER_MSEC)
 971                 schedule_timeout_idle(usecs_to_jiffies(usecs));
 972         else
 973                 usleep_idle_range(usecs, usecs + 1);
 974 }
 975
 976 /* Returns negative error code if it's not activated but should return */
 977 static int kdamond_wait_activation(struct damon_ctx *ctx)
 978 {
 979         struct damos *s;
 980         unsigned long wait_time;
 981         unsigned long min_wait_time = 0;
 982
 983         while (!kdamond_need_stop(ctx)) {
 984                 damon_for_each_scheme(s, ctx) {
 985                         wait_time = damos_wmark_wait_us(s);
 986                         if (!min_wait_time || wait_time < min_wait_time)
 987                                 min_wait_time = wait_time;
 988                 }
 989                 if (!min_wait_time)
 990                         return 0;
 991
 992                 kdamond_usleep(min_wait_time);
 993         }
 994         return -EBUSY;
 995 }
 996
 997 /*
 998  * The monitoring daemon that runs as a kernel thread
 999  */
1000 static int kdamond_fn(void *data)
1001 {
1002         struct damon_ctx *ctx = (struct damon_ctx *)data;
1003         struct damon_target *t;
1004         struct damon_region *r, *next;
1005         unsigned int max_nr_accesses = 0;
1006         unsigned long sz_limit = 0;
1007         bool done = false;
1008
1009         pr_debug("kdamond (%d) starts\n", current->pid);
1010
1011         if (ctx->primitive.init)
1012                 ctx->primitive.init(ctx);
1013         if (ctx->callback.before_start && ctx->callback.before_start(ctx))
1014                 done = true;
1015
1016         sz_limit = damon_region_sz_limit(ctx);
1017
1018         while (!kdamond_need_stop(ctx) && !done) {
1019                 if (kdamond_wait_activation(ctx))
1020                         continue;
1021
1022                 if (ctx->primitive.prepare_access_checks)
1023                         ctx->primitive.prepare_access_checks(ctx);
1024                 if (ctx->callback.after_sampling &&
1025                                 ctx->callback.after_sampling(ctx))
1026                         done = true;
1027
1028                 kdamond_usleep(ctx->sample_interval);
1029
1030                 if (ctx->primitive.check_accesses)
1031                         max_nr_accesses = ctx->primitive.check_accesses(ctx);
1032
1033                 if (kdamond_aggregate_interval_passed(ctx)) {
1034                         kdamond_merge_regions(ctx,
1035                                         max_nr_accesses / 10,
1036                                         sz_limit);
1037                         if (ctx->callback.after_aggregation &&
1038                                         ctx->callback.after_aggregation(ctx))
1039                                 done = true;
1040                         kdamond_apply_schemes(ctx);
1041                         kdamond_reset_aggregated(ctx);
1042                         kdamond_split_regions(ctx);
1043                         if (ctx->primitive.reset_aggregated)
1044                                 ctx->primitive.reset_aggregated(ctx);
1045                 }
1046
1047                 if (kdamond_need_update_primitive(ctx)) {
1048                         if (ctx->primitive.update)
1049                                 ctx->primitive.update(ctx);
1050                         sz_limit = damon_region_sz_limit(ctx);
1051                 }
1052         }
1053         damon_for_each_target(t, ctx) {
1054                 damon_for_each_region_safe(r, next, t)
1055                         damon_destroy_region(r, t);
1056         }
1057
1058         if (ctx->callback.before_terminate)
1059                 ctx->callback.before_terminate(ctx);
1060         if (ctx->primitive.cleanup)
1061                 ctx->primitive.cleanup(ctx);
1062
1063         pr_debug("kdamond (%d) finishes\n", current->pid);
1064         mutex_lock(&ctx->kdamond_lock);
1065         ctx->kdamond = NULL;
1066         mutex_unlock(&ctx->kdamond_lock);
1067
1068         mutex_lock(&damon_lock);
1069         nr_running_ctxs--;
1070         mutex_unlock(&damon_lock);
1071
1072         return 0;
1073 }
1074
1075 #include "core-test.h"