audit: Provide helper for dropping mark's chunk reference
[linux-2.6-microblaze.git] / kernel / audit_tree.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "audit.h"
3 #include <linux/fsnotify_backend.h>
4 #include <linux/namei.h>
5 #include <linux/mount.h>
6 #include <linux/kthread.h>
7 #include <linux/refcount.h>
8 #include <linux/slab.h>
9
10 struct audit_tree;
11 struct audit_chunk;
12
13 struct audit_tree {
14         refcount_t count;
15         int goner;
16         struct audit_chunk *root;
17         struct list_head chunks;
18         struct list_head rules;
19         struct list_head list;
20         struct list_head same_root;
21         struct rcu_head head;
22         char pathname[];
23 };
24
25 struct audit_chunk {
26         struct list_head hash;
27         unsigned long key;
28         struct fsnotify_mark mark;
29         struct list_head trees;         /* with root here */
30         int dead;
31         int count;
32         atomic_long_t refs;
33         struct rcu_head head;
34         struct node {
35                 struct list_head list;
36                 struct audit_tree *owner;
37                 unsigned index;         /* index; upper bit indicates 'will prune' */
38         } owners[];
39 };
40
41 static LIST_HEAD(tree_list);
42 static LIST_HEAD(prune_list);
43 static struct task_struct *prune_thread;
44
45 /*
46  * One struct chunk is attached to each inode of interest.
47  * We replace struct chunk on tagging/untagging.
48  * Rules have pointer to struct audit_tree.
49  * Rules have struct list_head rlist forming a list of rules over
50  * the same tree.
51  * References to struct chunk are collected at audit_inode{,_child}()
52  * time and used in AUDIT_TREE rule matching.
53  * These references are dropped at the same time we are calling
54  * audit_free_names(), etc.
55  *
56  * Cyclic lists galore:
57  * tree.chunks anchors chunk.owners[].list                      hash_lock
58  * tree.rules anchors rule.rlist                                audit_filter_mutex
59  * chunk.trees anchors tree.same_root                           hash_lock
60  * chunk.hash is a hash with middle bits of watch.inode as
61  * a hash function.                                             RCU, hash_lock
62  *
63  * tree is refcounted; one reference for "some rules on rules_list refer to
64  * it", one for each chunk with pointer to it.
65  *
66  * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
67  * of watch contributes 1 to .refs).
68  *
69  * node.index allows to get from node.list to containing chunk.
70  * MSB of that sucker is stolen to mark taggings that we might have to
71  * revert - several operations have very unpleasant cleanup logics and
72  * that makes a difference.  Some.
73  */
74
75 static struct fsnotify_group *audit_tree_group;
76
77 static struct audit_tree *alloc_tree(const char *s)
78 {
79         struct audit_tree *tree;
80
81         tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
82         if (tree) {
83                 refcount_set(&tree->count, 1);
84                 tree->goner = 0;
85                 INIT_LIST_HEAD(&tree->chunks);
86                 INIT_LIST_HEAD(&tree->rules);
87                 INIT_LIST_HEAD(&tree->list);
88                 INIT_LIST_HEAD(&tree->same_root);
89                 tree->root = NULL;
90                 strcpy(tree->pathname, s);
91         }
92         return tree;
93 }
94
95 static inline void get_tree(struct audit_tree *tree)
96 {
97         refcount_inc(&tree->count);
98 }
99
100 static inline void put_tree(struct audit_tree *tree)
101 {
102         if (refcount_dec_and_test(&tree->count))
103                 kfree_rcu(tree, head);
104 }
105
106 /* to avoid bringing the entire thing in audit.h */
107 const char *audit_tree_path(struct audit_tree *tree)
108 {
109         return tree->pathname;
110 }
111
112 static void free_chunk(struct audit_chunk *chunk)
113 {
114         int i;
115
116         for (i = 0; i < chunk->count; i++) {
117                 if (chunk->owners[i].owner)
118                         put_tree(chunk->owners[i].owner);
119         }
120         kfree(chunk);
121 }
122
123 void audit_put_chunk(struct audit_chunk *chunk)
124 {
125         if (atomic_long_dec_and_test(&chunk->refs))
126                 free_chunk(chunk);
127 }
128
129 static void __put_chunk(struct rcu_head *rcu)
130 {
131         struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
132         audit_put_chunk(chunk);
133 }
134
135 /*
136  * Drop reference to the chunk that was held by the mark. This is the reference
137  * that gets dropped after we've removed the chunk from the hash table and we
138  * use it to make sure chunk cannot be freed before RCU grace period expires.
139  */
140 static void audit_mark_put_chunk(struct audit_chunk *chunk)
141 {
142         call_rcu(&chunk->head, __put_chunk);
143 }
144
145 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
146 {
147         struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
148         audit_mark_put_chunk(chunk);
149 }
150
151 static struct audit_chunk *alloc_chunk(int count)
152 {
153         struct audit_chunk *chunk;
154         size_t size;
155         int i;
156
157         size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
158         chunk = kzalloc(size, GFP_KERNEL);
159         if (!chunk)
160                 return NULL;
161
162         INIT_LIST_HEAD(&chunk->hash);
163         INIT_LIST_HEAD(&chunk->trees);
164         chunk->count = count;
165         atomic_long_set(&chunk->refs, 1);
166         for (i = 0; i < count; i++) {
167                 INIT_LIST_HEAD(&chunk->owners[i].list);
168                 chunk->owners[i].index = i;
169         }
170         fsnotify_init_mark(&chunk->mark, audit_tree_group);
171         chunk->mark.mask = FS_IN_IGNORED;
172         return chunk;
173 }
174
175 enum {HASH_SIZE = 128};
176 static struct list_head chunk_hash_heads[HASH_SIZE];
177 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
178
179 /* Function to return search key in our hash from inode. */
180 static unsigned long inode_to_key(const struct inode *inode)
181 {
182         /* Use address pointed to by connector->obj as the key */
183         return (unsigned long)&inode->i_fsnotify_marks;
184 }
185
186 static inline struct list_head *chunk_hash(unsigned long key)
187 {
188         unsigned long n = key / L1_CACHE_BYTES;
189         return chunk_hash_heads + n % HASH_SIZE;
190 }
191
192 /* hash_lock & entry->group->mark_mutex is held by caller */
193 static void insert_hash(struct audit_chunk *chunk)
194 {
195         struct list_head *list;
196
197         /*
198          * Make sure chunk is fully initialized before making it visible in the
199          * hash. Pairs with a data dependency barrier in READ_ONCE() in
200          * audit_tree_lookup().
201          */
202         smp_wmb();
203         WARN_ON_ONCE(!chunk->key);
204         list = chunk_hash(chunk->key);
205         list_add_rcu(&chunk->hash, list);
206 }
207
208 /* called under rcu_read_lock */
209 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
210 {
211         unsigned long key = inode_to_key(inode);
212         struct list_head *list = chunk_hash(key);
213         struct audit_chunk *p;
214
215         list_for_each_entry_rcu(p, list, hash) {
216                 /*
217                  * We use a data dependency barrier in READ_ONCE() to make sure
218                  * the chunk we see is fully initialized.
219                  */
220                 if (READ_ONCE(p->key) == key) {
221                         atomic_long_inc(&p->refs);
222                         return p;
223                 }
224         }
225         return NULL;
226 }
227
228 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
229 {
230         int n;
231         for (n = 0; n < chunk->count; n++)
232                 if (chunk->owners[n].owner == tree)
233                         return true;
234         return false;
235 }
236
237 /* tagging and untagging inodes with trees */
238
239 static struct audit_chunk *find_chunk(struct node *p)
240 {
241         int index = p->index & ~(1U<<31);
242         p -= index;
243         return container_of(p, struct audit_chunk, owners[0]);
244 }
245
246 static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
247                           struct node *skip)
248 {
249         struct audit_tree *owner;
250         int i, j;
251
252         new->key = old->key;
253         list_splice_init(&old->trees, &new->trees);
254         list_for_each_entry(owner, &new->trees, same_root)
255                 owner->root = new;
256         for (i = j = 0; j < old->count; i++, j++) {
257                 if (&old->owners[j] == skip) {
258                         i--;
259                         continue;
260                 }
261                 owner = old->owners[j].owner;
262                 new->owners[i].owner = owner;
263                 new->owners[i].index = old->owners[j].index - j + i;
264                 if (!owner) /* result of earlier fallback */
265                         continue;
266                 get_tree(owner);
267                 list_replace_init(&old->owners[j].list, &new->owners[i].list);
268         }
269         /*
270          * Make sure chunk is fully initialized before making it visible in the
271          * hash. Pairs with a data dependency barrier in READ_ONCE() in
272          * audit_tree_lookup().
273          */
274         smp_wmb();
275         list_replace_rcu(&old->hash, &new->hash);
276 }
277
278 static void untag_chunk(struct node *p)
279 {
280         struct audit_chunk *chunk = find_chunk(p);
281         struct fsnotify_mark *entry = &chunk->mark;
282         struct audit_chunk *new = NULL;
283         struct audit_tree *owner;
284         int size = chunk->count - 1;
285
286         fsnotify_get_mark(entry);
287
288         spin_unlock(&hash_lock);
289
290         if (size)
291                 new = alloc_chunk(size);
292
293         mutex_lock(&entry->group->mark_mutex);
294         /*
295          * mark_mutex protects mark from getting detached and thus also from
296          * mark->connector->obj getting NULL.
297          */
298         if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
299                 mutex_unlock(&entry->group->mark_mutex);
300                 if (new)
301                         fsnotify_put_mark(&new->mark);
302                 goto out;
303         }
304
305         owner = p->owner;
306
307         if (!size) {
308                 chunk->dead = 1;
309                 spin_lock(&hash_lock);
310                 list_del_init(&chunk->trees);
311                 if (owner->root == chunk)
312                         owner->root = NULL;
313                 list_del_init(&p->list);
314                 list_del_rcu(&chunk->hash);
315                 spin_unlock(&hash_lock);
316                 fsnotify_detach_mark(entry);
317                 mutex_unlock(&entry->group->mark_mutex);
318                 fsnotify_free_mark(entry);
319                 goto out;
320         }
321
322         if (!new)
323                 goto Fallback;
324
325         if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
326                                      FSNOTIFY_OBJ_TYPE_INODE, 1)) {
327                 fsnotify_put_mark(&new->mark);
328                 goto Fallback;
329         }
330
331         chunk->dead = 1;
332         spin_lock(&hash_lock);
333         if (owner->root == chunk) {
334                 list_del_init(&owner->same_root);
335                 owner->root = NULL;
336         }
337         list_del_init(&p->list);
338         /*
339          * This has to go last when updating chunk as once replace_chunk() is
340          * called, new RCU readers can see the new chunk.
341          */
342         replace_chunk(new, chunk, p);
343         spin_unlock(&hash_lock);
344         fsnotify_detach_mark(entry);
345         mutex_unlock(&entry->group->mark_mutex);
346         fsnotify_free_mark(entry);
347         fsnotify_put_mark(&new->mark);  /* drop initial reference */
348         goto out;
349
350 Fallback:
351         // do the best we can
352         spin_lock(&hash_lock);
353         if (owner->root == chunk) {
354                 list_del_init(&owner->same_root);
355                 owner->root = NULL;
356         }
357         list_del_init(&p->list);
358         p->owner = NULL;
359         put_tree(owner);
360         spin_unlock(&hash_lock);
361         mutex_unlock(&entry->group->mark_mutex);
362 out:
363         fsnotify_put_mark(entry);
364         spin_lock(&hash_lock);
365 }
366
367 /* Call with group->mark_mutex held, releases it */
368 static int create_chunk(struct inode *inode, struct audit_tree *tree)
369 {
370         struct fsnotify_mark *entry;
371         struct audit_chunk *chunk = alloc_chunk(1);
372
373         if (!chunk) {
374                 mutex_unlock(&audit_tree_group->mark_mutex);
375                 return -ENOMEM;
376         }
377
378         entry = &chunk->mark;
379         if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
380                 mutex_unlock(&audit_tree_group->mark_mutex);
381                 fsnotify_put_mark(entry);
382                 return -ENOSPC;
383         }
384
385         spin_lock(&hash_lock);
386         if (tree->goner) {
387                 spin_unlock(&hash_lock);
388                 chunk->dead = 1;
389                 fsnotify_detach_mark(entry);
390                 mutex_unlock(&audit_tree_group->mark_mutex);
391                 fsnotify_free_mark(entry);
392                 fsnotify_put_mark(entry);
393                 return 0;
394         }
395         chunk->owners[0].index = (1U << 31);
396         chunk->owners[0].owner = tree;
397         get_tree(tree);
398         list_add(&chunk->owners[0].list, &tree->chunks);
399         if (!tree->root) {
400                 tree->root = chunk;
401                 list_add(&tree->same_root, &chunk->trees);
402         }
403         chunk->key = inode_to_key(inode);
404         /*
405          * Inserting into the hash table has to go last as once we do that RCU
406          * readers can see the chunk.
407          */
408         insert_hash(chunk);
409         spin_unlock(&hash_lock);
410         mutex_unlock(&audit_tree_group->mark_mutex);
411         fsnotify_put_mark(entry);       /* drop initial reference */
412         return 0;
413 }
414
415 /* the first tagged inode becomes root of tree */
416 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
417 {
418         struct fsnotify_mark *old_entry, *chunk_entry;
419         struct audit_chunk *chunk, *old;
420         struct node *p;
421         int n;
422
423         mutex_lock(&audit_tree_group->mark_mutex);
424         old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
425                                        audit_tree_group);
426         if (!old_entry)
427                 return create_chunk(inode, tree);
428
429         old = container_of(old_entry, struct audit_chunk, mark);
430
431         /* are we already there? */
432         spin_lock(&hash_lock);
433         for (n = 0; n < old->count; n++) {
434                 if (old->owners[n].owner == tree) {
435                         spin_unlock(&hash_lock);
436                         mutex_unlock(&audit_tree_group->mark_mutex);
437                         fsnotify_put_mark(old_entry);
438                         return 0;
439                 }
440         }
441         spin_unlock(&hash_lock);
442
443         chunk = alloc_chunk(old->count + 1);
444         if (!chunk) {
445                 mutex_unlock(&audit_tree_group->mark_mutex);
446                 fsnotify_put_mark(old_entry);
447                 return -ENOMEM;
448         }
449
450         chunk_entry = &chunk->mark;
451
452         /*
453          * mark_mutex protects mark from getting detached and thus also from
454          * mark->connector->obj getting NULL.
455          */
456         if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
457                 /* old_entry is being shot, lets just lie */
458                 mutex_unlock(&audit_tree_group->mark_mutex);
459                 fsnotify_put_mark(old_entry);
460                 fsnotify_put_mark(&chunk->mark);
461                 return -ENOENT;
462         }
463
464         if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
465                                      FSNOTIFY_OBJ_TYPE_INODE, 1)) {
466                 mutex_unlock(&audit_tree_group->mark_mutex);
467                 fsnotify_put_mark(chunk_entry);
468                 fsnotify_put_mark(old_entry);
469                 return -ENOSPC;
470         }
471
472         spin_lock(&hash_lock);
473         if (tree->goner) {
474                 spin_unlock(&hash_lock);
475                 chunk->dead = 1;
476                 fsnotify_detach_mark(chunk_entry);
477                 mutex_unlock(&audit_tree_group->mark_mutex);
478                 fsnotify_free_mark(chunk_entry);
479                 fsnotify_put_mark(chunk_entry);
480                 fsnotify_put_mark(old_entry);
481                 return 0;
482         }
483         p = &chunk->owners[chunk->count - 1];
484         p->index = (chunk->count - 1) | (1U<<31);
485         p->owner = tree;
486         get_tree(tree);
487         list_add(&p->list, &tree->chunks);
488         old->dead = 1;
489         if (!tree->root) {
490                 tree->root = chunk;
491                 list_add(&tree->same_root, &chunk->trees);
492         }
493         /*
494          * This has to go last when updating chunk as once replace_chunk() is
495          * called, new RCU readers can see the new chunk.
496          */
497         replace_chunk(chunk, old, NULL);
498         spin_unlock(&hash_lock);
499         fsnotify_detach_mark(old_entry);
500         mutex_unlock(&audit_tree_group->mark_mutex);
501         fsnotify_free_mark(old_entry);
502         fsnotify_put_mark(chunk_entry); /* drop initial reference */
503         fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
504         return 0;
505 }
506
507 static void audit_tree_log_remove_rule(struct audit_krule *rule)
508 {
509         struct audit_buffer *ab;
510
511         if (!audit_enabled)
512                 return;
513         ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
514         if (unlikely(!ab))
515                 return;
516         audit_log_format(ab, "op=remove_rule");
517         audit_log_format(ab, " dir=");
518         audit_log_untrustedstring(ab, rule->tree->pathname);
519         audit_log_key(ab, rule->filterkey);
520         audit_log_format(ab, " list=%d res=1", rule->listnr);
521         audit_log_end(ab);
522 }
523
524 static void kill_rules(struct audit_tree *tree)
525 {
526         struct audit_krule *rule, *next;
527         struct audit_entry *entry;
528
529         list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
530                 entry = container_of(rule, struct audit_entry, rule);
531
532                 list_del_init(&rule->rlist);
533                 if (rule->tree) {
534                         /* not a half-baked one */
535                         audit_tree_log_remove_rule(rule);
536                         if (entry->rule.exe)
537                                 audit_remove_mark(entry->rule.exe);
538                         rule->tree = NULL;
539                         list_del_rcu(&entry->list);
540                         list_del(&entry->rule.list);
541                         call_rcu(&entry->rcu, audit_free_rule_rcu);
542                 }
543         }
544 }
545
546 /*
547  * finish killing struct audit_tree
548  */
549 static void prune_one(struct audit_tree *victim)
550 {
551         spin_lock(&hash_lock);
552         while (!list_empty(&victim->chunks)) {
553                 struct node *p;
554
555                 p = list_entry(victim->chunks.next, struct node, list);
556
557                 untag_chunk(p);
558         }
559         spin_unlock(&hash_lock);
560         put_tree(victim);
561 }
562
563 /* trim the uncommitted chunks from tree */
564
565 static void trim_marked(struct audit_tree *tree)
566 {
567         struct list_head *p, *q;
568         spin_lock(&hash_lock);
569         if (tree->goner) {
570                 spin_unlock(&hash_lock);
571                 return;
572         }
573         /* reorder */
574         for (p = tree->chunks.next; p != &tree->chunks; p = q) {
575                 struct node *node = list_entry(p, struct node, list);
576                 q = p->next;
577                 if (node->index & (1U<<31)) {
578                         list_del_init(p);
579                         list_add(p, &tree->chunks);
580                 }
581         }
582
583         while (!list_empty(&tree->chunks)) {
584                 struct node *node;
585
586                 node = list_entry(tree->chunks.next, struct node, list);
587
588                 /* have we run out of marked? */
589                 if (!(node->index & (1U<<31)))
590                         break;
591
592                 untag_chunk(node);
593         }
594         if (!tree->root && !tree->goner) {
595                 tree->goner = 1;
596                 spin_unlock(&hash_lock);
597                 mutex_lock(&audit_filter_mutex);
598                 kill_rules(tree);
599                 list_del_init(&tree->list);
600                 mutex_unlock(&audit_filter_mutex);
601                 prune_one(tree);
602         } else {
603                 spin_unlock(&hash_lock);
604         }
605 }
606
607 static void audit_schedule_prune(void);
608
609 /* called with audit_filter_mutex */
610 int audit_remove_tree_rule(struct audit_krule *rule)
611 {
612         struct audit_tree *tree;
613         tree = rule->tree;
614         if (tree) {
615                 spin_lock(&hash_lock);
616                 list_del_init(&rule->rlist);
617                 if (list_empty(&tree->rules) && !tree->goner) {
618                         tree->root = NULL;
619                         list_del_init(&tree->same_root);
620                         tree->goner = 1;
621                         list_move(&tree->list, &prune_list);
622                         rule->tree = NULL;
623                         spin_unlock(&hash_lock);
624                         audit_schedule_prune();
625                         return 1;
626                 }
627                 rule->tree = NULL;
628                 spin_unlock(&hash_lock);
629                 return 1;
630         }
631         return 0;
632 }
633
634 static int compare_root(struct vfsmount *mnt, void *arg)
635 {
636         return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
637                (unsigned long)arg;
638 }
639
640 void audit_trim_trees(void)
641 {
642         struct list_head cursor;
643
644         mutex_lock(&audit_filter_mutex);
645         list_add(&cursor, &tree_list);
646         while (cursor.next != &tree_list) {
647                 struct audit_tree *tree;
648                 struct path path;
649                 struct vfsmount *root_mnt;
650                 struct node *node;
651                 int err;
652
653                 tree = container_of(cursor.next, struct audit_tree, list);
654                 get_tree(tree);
655                 list_del(&cursor);
656                 list_add(&cursor, &tree->list);
657                 mutex_unlock(&audit_filter_mutex);
658
659                 err = kern_path(tree->pathname, 0, &path);
660                 if (err)
661                         goto skip_it;
662
663                 root_mnt = collect_mounts(&path);
664                 path_put(&path);
665                 if (IS_ERR(root_mnt))
666                         goto skip_it;
667
668                 spin_lock(&hash_lock);
669                 list_for_each_entry(node, &tree->chunks, list) {
670                         struct audit_chunk *chunk = find_chunk(node);
671                         /* this could be NULL if the watch is dying else where... */
672                         node->index |= 1U<<31;
673                         if (iterate_mounts(compare_root,
674                                            (void *)(chunk->key),
675                                            root_mnt))
676                                 node->index &= ~(1U<<31);
677                 }
678                 spin_unlock(&hash_lock);
679                 trim_marked(tree);
680                 drop_collected_mounts(root_mnt);
681 skip_it:
682                 put_tree(tree);
683                 mutex_lock(&audit_filter_mutex);
684         }
685         list_del(&cursor);
686         mutex_unlock(&audit_filter_mutex);
687 }
688
689 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
690 {
691
692         if (pathname[0] != '/' ||
693             rule->listnr != AUDIT_FILTER_EXIT ||
694             op != Audit_equal ||
695             rule->inode_f || rule->watch || rule->tree)
696                 return -EINVAL;
697         rule->tree = alloc_tree(pathname);
698         if (!rule->tree)
699                 return -ENOMEM;
700         return 0;
701 }
702
703 void audit_put_tree(struct audit_tree *tree)
704 {
705         put_tree(tree);
706 }
707
708 static int tag_mount(struct vfsmount *mnt, void *arg)
709 {
710         return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
711 }
712
713 /*
714  * That gets run when evict_chunk() ends up needing to kill audit_tree.
715  * Runs from a separate thread.
716  */
717 static int prune_tree_thread(void *unused)
718 {
719         for (;;) {
720                 if (list_empty(&prune_list)) {
721                         set_current_state(TASK_INTERRUPTIBLE);
722                         schedule();
723                 }
724
725                 audit_ctl_lock();
726                 mutex_lock(&audit_filter_mutex);
727
728                 while (!list_empty(&prune_list)) {
729                         struct audit_tree *victim;
730
731                         victim = list_entry(prune_list.next,
732                                         struct audit_tree, list);
733                         list_del_init(&victim->list);
734
735                         mutex_unlock(&audit_filter_mutex);
736
737                         prune_one(victim);
738
739                         mutex_lock(&audit_filter_mutex);
740                 }
741
742                 mutex_unlock(&audit_filter_mutex);
743                 audit_ctl_unlock();
744         }
745         return 0;
746 }
747
748 static int audit_launch_prune(void)
749 {
750         if (prune_thread)
751                 return 0;
752         prune_thread = kthread_run(prune_tree_thread, NULL,
753                                 "audit_prune_tree");
754         if (IS_ERR(prune_thread)) {
755                 pr_err("cannot start thread audit_prune_tree");
756                 prune_thread = NULL;
757                 return -ENOMEM;
758         }
759         return 0;
760 }
761
762 /* called with audit_filter_mutex */
763 int audit_add_tree_rule(struct audit_krule *rule)
764 {
765         struct audit_tree *seed = rule->tree, *tree;
766         struct path path;
767         struct vfsmount *mnt;
768         int err;
769
770         rule->tree = NULL;
771         list_for_each_entry(tree, &tree_list, list) {
772                 if (!strcmp(seed->pathname, tree->pathname)) {
773                         put_tree(seed);
774                         rule->tree = tree;
775                         list_add(&rule->rlist, &tree->rules);
776                         return 0;
777                 }
778         }
779         tree = seed;
780         list_add(&tree->list, &tree_list);
781         list_add(&rule->rlist, &tree->rules);
782         /* do not set rule->tree yet */
783         mutex_unlock(&audit_filter_mutex);
784
785         if (unlikely(!prune_thread)) {
786                 err = audit_launch_prune();
787                 if (err)
788                         goto Err;
789         }
790
791         err = kern_path(tree->pathname, 0, &path);
792         if (err)
793                 goto Err;
794         mnt = collect_mounts(&path);
795         path_put(&path);
796         if (IS_ERR(mnt)) {
797                 err = PTR_ERR(mnt);
798                 goto Err;
799         }
800
801         get_tree(tree);
802         err = iterate_mounts(tag_mount, tree, mnt);
803         drop_collected_mounts(mnt);
804
805         if (!err) {
806                 struct node *node;
807                 spin_lock(&hash_lock);
808                 list_for_each_entry(node, &tree->chunks, list)
809                         node->index &= ~(1U<<31);
810                 spin_unlock(&hash_lock);
811         } else {
812                 trim_marked(tree);
813                 goto Err;
814         }
815
816         mutex_lock(&audit_filter_mutex);
817         if (list_empty(&rule->rlist)) {
818                 put_tree(tree);
819                 return -ENOENT;
820         }
821         rule->tree = tree;
822         put_tree(tree);
823
824         return 0;
825 Err:
826         mutex_lock(&audit_filter_mutex);
827         list_del_init(&tree->list);
828         list_del_init(&tree->rules);
829         put_tree(tree);
830         return err;
831 }
832
833 int audit_tag_tree(char *old, char *new)
834 {
835         struct list_head cursor, barrier;
836         int failed = 0;
837         struct path path1, path2;
838         struct vfsmount *tagged;
839         int err;
840
841         err = kern_path(new, 0, &path2);
842         if (err)
843                 return err;
844         tagged = collect_mounts(&path2);
845         path_put(&path2);
846         if (IS_ERR(tagged))
847                 return PTR_ERR(tagged);
848
849         err = kern_path(old, 0, &path1);
850         if (err) {
851                 drop_collected_mounts(tagged);
852                 return err;
853         }
854
855         mutex_lock(&audit_filter_mutex);
856         list_add(&barrier, &tree_list);
857         list_add(&cursor, &barrier);
858
859         while (cursor.next != &tree_list) {
860                 struct audit_tree *tree;
861                 int good_one = 0;
862
863                 tree = container_of(cursor.next, struct audit_tree, list);
864                 get_tree(tree);
865                 list_del(&cursor);
866                 list_add(&cursor, &tree->list);
867                 mutex_unlock(&audit_filter_mutex);
868
869                 err = kern_path(tree->pathname, 0, &path2);
870                 if (!err) {
871                         good_one = path_is_under(&path1, &path2);
872                         path_put(&path2);
873                 }
874
875                 if (!good_one) {
876                         put_tree(tree);
877                         mutex_lock(&audit_filter_mutex);
878                         continue;
879                 }
880
881                 failed = iterate_mounts(tag_mount, tree, tagged);
882                 if (failed) {
883                         put_tree(tree);
884                         mutex_lock(&audit_filter_mutex);
885                         break;
886                 }
887
888                 mutex_lock(&audit_filter_mutex);
889                 spin_lock(&hash_lock);
890                 if (!tree->goner) {
891                         list_del(&tree->list);
892                         list_add(&tree->list, &tree_list);
893                 }
894                 spin_unlock(&hash_lock);
895                 put_tree(tree);
896         }
897
898         while (barrier.prev != &tree_list) {
899                 struct audit_tree *tree;
900
901                 tree = container_of(barrier.prev, struct audit_tree, list);
902                 get_tree(tree);
903                 list_del(&tree->list);
904                 list_add(&tree->list, &barrier);
905                 mutex_unlock(&audit_filter_mutex);
906
907                 if (!failed) {
908                         struct node *node;
909                         spin_lock(&hash_lock);
910                         list_for_each_entry(node, &tree->chunks, list)
911                                 node->index &= ~(1U<<31);
912                         spin_unlock(&hash_lock);
913                 } else {
914                         trim_marked(tree);
915                 }
916
917                 put_tree(tree);
918                 mutex_lock(&audit_filter_mutex);
919         }
920         list_del(&barrier);
921         list_del(&cursor);
922         mutex_unlock(&audit_filter_mutex);
923         path_put(&path1);
924         drop_collected_mounts(tagged);
925         return failed;
926 }
927
928
929 static void audit_schedule_prune(void)
930 {
931         wake_up_process(prune_thread);
932 }
933
934 /*
935  * ... and that one is done if evict_chunk() decides to delay until the end
936  * of syscall.  Runs synchronously.
937  */
938 void audit_kill_trees(struct list_head *list)
939 {
940         audit_ctl_lock();
941         mutex_lock(&audit_filter_mutex);
942
943         while (!list_empty(list)) {
944                 struct audit_tree *victim;
945
946                 victim = list_entry(list->next, struct audit_tree, list);
947                 kill_rules(victim);
948                 list_del_init(&victim->list);
949
950                 mutex_unlock(&audit_filter_mutex);
951
952                 prune_one(victim);
953
954                 mutex_lock(&audit_filter_mutex);
955         }
956
957         mutex_unlock(&audit_filter_mutex);
958         audit_ctl_unlock();
959 }
960
961 /*
962  *  Here comes the stuff asynchronous to auditctl operations
963  */
964
965 static void evict_chunk(struct audit_chunk *chunk)
966 {
967         struct audit_tree *owner;
968         struct list_head *postponed = audit_killed_trees();
969         int need_prune = 0;
970         int n;
971
972         if (chunk->dead)
973                 return;
974
975         chunk->dead = 1;
976         mutex_lock(&audit_filter_mutex);
977         spin_lock(&hash_lock);
978         while (!list_empty(&chunk->trees)) {
979                 owner = list_entry(chunk->trees.next,
980                                    struct audit_tree, same_root);
981                 owner->goner = 1;
982                 owner->root = NULL;
983                 list_del_init(&owner->same_root);
984                 spin_unlock(&hash_lock);
985                 if (!postponed) {
986                         kill_rules(owner);
987                         list_move(&owner->list, &prune_list);
988                         need_prune = 1;
989                 } else {
990                         list_move(&owner->list, postponed);
991                 }
992                 spin_lock(&hash_lock);
993         }
994         list_del_rcu(&chunk->hash);
995         for (n = 0; n < chunk->count; n++)
996                 list_del_init(&chunk->owners[n].list);
997         spin_unlock(&hash_lock);
998         mutex_unlock(&audit_filter_mutex);
999         if (need_prune)
1000                 audit_schedule_prune();
1001 }
1002
1003 static int audit_tree_handle_event(struct fsnotify_group *group,
1004                                    struct inode *to_tell,
1005                                    u32 mask, const void *data, int data_type,
1006                                    const unsigned char *file_name, u32 cookie,
1007                                    struct fsnotify_iter_info *iter_info)
1008 {
1009         return 0;
1010 }
1011
1012 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
1013 {
1014         struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
1015
1016         evict_chunk(chunk);
1017
1018         /*
1019          * We are guaranteed to have at least one reference to the mark from
1020          * either the inode or the caller of fsnotify_destroy_mark().
1021          */
1022         BUG_ON(refcount_read(&entry->refcnt) < 1);
1023 }
1024
1025 static const struct fsnotify_ops audit_tree_ops = {
1026         .handle_event = audit_tree_handle_event,
1027         .freeing_mark = audit_tree_freeing_mark,
1028         .free_mark = audit_tree_destroy_watch,
1029 };
1030
1031 static int __init audit_tree_init(void)
1032 {
1033         int i;
1034
1035         audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1036         if (IS_ERR(audit_tree_group))
1037                 audit_panic("cannot initialize fsnotify group for rectree watches");
1038
1039         for (i = 0; i < HASH_SIZE; i++)
1040                 INIT_LIST_HEAD(&chunk_hash_heads[i]);
1041
1042         return 0;
1043 }
1044 __initcall(audit_tree_init);