afs: Make callback processing more efficient.
[linux-2.6-microblaze.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static atomic_t afs_server_debug_id;
16
17 static struct afs_server *afs_maybe_use_server(struct afs_server *,
18                                                enum afs_server_trace);
19 static void __afs_put_server(struct afs_net *, struct afs_server *);
20
21 /*
22  * Find a server by one of its addresses.
23  */
24 struct afs_server *afs_find_server(struct afs_net *net,
25                                    const struct sockaddr_rxrpc *srx)
26 {
27         const struct afs_addr_list *alist;
28         struct afs_server *server = NULL;
29         unsigned int i;
30         int seq = 0, diff;
31
32         rcu_read_lock();
33
34         do {
35                 if (server)
36                         afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
37                 server = NULL;
38                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
39
40                 if (srx->transport.family == AF_INET6) {
41                         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
42                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
43                                 alist = rcu_dereference(server->addresses);
44                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
45                                         b = &alist->addrs[i].transport.sin6;
46                                         diff = ((u16 __force)a->sin6_port -
47                                                 (u16 __force)b->sin6_port);
48                                         if (diff == 0)
49                                                 diff = memcmp(&a->sin6_addr,
50                                                               &b->sin6_addr,
51                                                               sizeof(struct in6_addr));
52                                         if (diff == 0)
53                                                 goto found;
54                                 }
55                         }
56                 } else {
57                         const struct sockaddr_in *a = &srx->transport.sin, *b;
58                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
59                                 alist = rcu_dereference(server->addresses);
60                                 for (i = 0; i < alist->nr_ipv4; i++) {
61                                         b = &alist->addrs[i].transport.sin;
62                                         diff = ((u16 __force)a->sin_port -
63                                                 (u16 __force)b->sin_port);
64                                         if (diff == 0)
65                                                 diff = ((u32 __force)a->sin_addr.s_addr -
66                                                         (u32 __force)b->sin_addr.s_addr);
67                                         if (diff == 0)
68                                                 goto found;
69                                 }
70                         }
71                 }
72
73                 server = NULL;
74                 continue;
75         found:
76                 server = afs_maybe_use_server(server, afs_server_trace_get_by_addr);
77
78         } while (need_seqretry(&net->fs_addr_lock, seq));
79
80         done_seqretry(&net->fs_addr_lock, seq);
81
82         rcu_read_unlock();
83         return server;
84 }
85
86 /*
87  * Look up a server by its UUID and mark it active.
88  */
89 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
90 {
91         struct afs_server *server = NULL;
92         struct rb_node *p;
93         int diff, seq = 0;
94
95         _enter("%pU", uuid);
96
97         do {
98                 /* Unfortunately, rbtree walking doesn't give reliable results
99                  * under just the RCU read lock, so we have to check for
100                  * changes.
101                  */
102                 if (server)
103                         afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
104                 server = NULL;
105
106                 read_seqbegin_or_lock(&net->fs_lock, &seq);
107
108                 p = net->fs_servers.rb_node;
109                 while (p) {
110                         server = rb_entry(p, struct afs_server, uuid_rb);
111
112                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
113                         if (diff < 0) {
114                                 p = p->rb_left;
115                         } else if (diff > 0) {
116                                 p = p->rb_right;
117                         } else {
118                                 afs_use_server(server, afs_server_trace_get_by_uuid);
119                                 break;
120                         }
121
122                         server = NULL;
123                 }
124         } while (need_seqretry(&net->fs_lock, seq));
125
126         done_seqretry(&net->fs_lock, seq);
127
128         _leave(" = %p", server);
129         return server;
130 }
131
132 /*
133  * Install a server record in the namespace tree
134  */
135 static struct afs_server *afs_install_server(struct afs_net *net,
136                                              struct afs_server *candidate)
137 {
138         const struct afs_addr_list *alist;
139         struct afs_server *server;
140         struct rb_node **pp, *p;
141         int diff;
142
143         _enter("%p", candidate);
144
145         write_seqlock(&net->fs_lock);
146
147         /* Firstly install the server in the UUID lookup tree */
148         pp = &net->fs_servers.rb_node;
149         p = NULL;
150         while (*pp) {
151                 p = *pp;
152                 _debug("- consider %p", p);
153                 server = rb_entry(p, struct afs_server, uuid_rb);
154                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
155                 if (diff < 0)
156                         pp = &(*pp)->rb_left;
157                 else if (diff > 0)
158                         pp = &(*pp)->rb_right;
159                 else
160                         goto exists;
161         }
162
163         server = candidate;
164         rb_link_node(&server->uuid_rb, p, pp);
165         rb_insert_color(&server->uuid_rb, &net->fs_servers);
166         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
167
168         write_seqlock(&net->fs_addr_lock);
169         alist = rcu_dereference_protected(server->addresses,
170                                           lockdep_is_held(&net->fs_addr_lock.lock));
171
172         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
173          * it in the IPv4 and/or IPv6 reverse-map lists.
174          *
175          * TODO: For speed we want to use something other than a flat list
176          * here; even sorting the list in terms of lowest address would help a
177          * bit, but anything we might want to do gets messy and memory
178          * intensive.
179          */
180         if (alist->nr_ipv4 > 0)
181                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
182         if (alist->nr_addrs > alist->nr_ipv4)
183                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
184
185         write_sequnlock(&net->fs_addr_lock);
186
187 exists:
188         afs_get_server(server, afs_server_trace_get_install);
189         write_sequnlock(&net->fs_lock);
190         return server;
191 }
192
193 /*
194  * Allocate a new server record and mark it active.
195  */
196 static struct afs_server *afs_alloc_server(struct afs_net *net,
197                                            const uuid_t *uuid,
198                                            struct afs_addr_list *alist)
199 {
200         struct afs_server *server;
201
202         _enter("");
203
204         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
205         if (!server)
206                 goto enomem;
207
208         atomic_set(&server->ref, 1);
209         atomic_set(&server->active, 1);
210         server->debug_id = atomic_inc_return(&afs_server_debug_id);
211         RCU_INIT_POINTER(server->addresses, alist);
212         server->addr_version = alist->version;
213         server->uuid = *uuid;
214         rwlock_init(&server->fs_lock);
215         server->cb_volumes = RB_ROOT;
216         seqlock_init(&server->cb_break_lock);
217         init_waitqueue_head(&server->probe_wq);
218         INIT_LIST_HEAD(&server->probe_link);
219         spin_lock_init(&server->probe_lock);
220
221         afs_inc_servers_outstanding(net);
222         trace_afs_server(server, 1, 1, afs_server_trace_alloc);
223         _leave(" = %p", server);
224         return server;
225
226 enomem:
227         _leave(" = NULL [nomem]");
228         return NULL;
229 }
230
231 /*
232  * Look up an address record for a server
233  */
234 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
235                                                  struct key *key, const uuid_t *uuid)
236 {
237         struct afs_vl_cursor vc;
238         struct afs_addr_list *alist = NULL;
239         int ret;
240
241         ret = -ERESTARTSYS;
242         if (afs_begin_vlserver_operation(&vc, cell, key)) {
243                 while (afs_select_vlserver(&vc)) {
244                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
245                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
246                         else
247                                 alist = afs_vl_get_addrs_u(&vc, uuid);
248                 }
249
250                 ret = afs_end_vlserver_operation(&vc);
251         }
252
253         return ret < 0 ? ERR_PTR(ret) : alist;
254 }
255
256 /*
257  * Get or create a fileserver record.
258  */
259 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
260                                      const uuid_t *uuid, u32 addr_version)
261 {
262         struct afs_addr_list *alist;
263         struct afs_server *server, *candidate;
264
265         _enter("%p,%pU", cell->net, uuid);
266
267         server = afs_find_server_by_uuid(cell->net, uuid);
268         if (server) {
269                 if (server->addr_version != addr_version)
270                         set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
271                 return server;
272         }
273
274         alist = afs_vl_lookup_addrs(cell, key, uuid);
275         if (IS_ERR(alist))
276                 return ERR_CAST(alist);
277
278         candidate = afs_alloc_server(cell->net, uuid, alist);
279         if (!candidate) {
280                 afs_put_addrlist(alist);
281                 return ERR_PTR(-ENOMEM);
282         }
283
284         server = afs_install_server(cell->net, candidate);
285         if (server != candidate) {
286                 afs_put_addrlist(alist);
287                 kfree(candidate);
288         } else {
289                 /* Immediately dispatch an asynchronous probe to each interface
290                  * on the fileserver.  This will make sure the repeat-probing
291                  * service is started.
292                  */
293                 afs_fs_probe_fileserver(cell->net, server, key, true);
294         }
295
296         return server;
297 }
298
299 /*
300  * Set the server timer to fire after a given delay, assuming it's not already
301  * set for an earlier time.
302  */
303 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
304 {
305         if (net->live) {
306                 afs_inc_servers_outstanding(net);
307                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
308                         afs_dec_servers_outstanding(net);
309         }
310 }
311
312 /*
313  * Server management timer.  We have an increment on fs_outstanding that we
314  * need to pass along to the work item.
315  */
316 void afs_servers_timer(struct timer_list *timer)
317 {
318         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
319
320         _enter("");
321         if (!queue_work(afs_wq, &net->fs_manager))
322                 afs_dec_servers_outstanding(net);
323 }
324
325 /*
326  * Get a reference on a server object.
327  */
328 struct afs_server *afs_get_server(struct afs_server *server,
329                                   enum afs_server_trace reason)
330 {
331         unsigned int u = atomic_inc_return(&server->ref);
332
333         trace_afs_server(server, u, atomic_read(&server->active), reason);
334         return server;
335 }
336
337 /*
338  * Try to get a reference on a server object.
339  */
340 static struct afs_server *afs_maybe_use_server(struct afs_server *server,
341                                                enum afs_server_trace reason)
342 {
343         unsigned int r = atomic_fetch_add_unless(&server->ref, 1, 0);
344         unsigned int a;
345
346         if (r == 0)
347                 return NULL;
348
349         a = atomic_inc_return(&server->active);
350         trace_afs_server(server, r, a, reason);
351         return server;
352 }
353
354 /*
355  * Get an active count on a server object.
356  */
357 struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason)
358 {
359         unsigned int r = atomic_inc_return(&server->ref);
360         unsigned int a = atomic_inc_return(&server->active);
361
362         trace_afs_server(server, r, a, reason);
363         return server;
364 }
365
366 /*
367  * Release a reference on a server record.
368  */
369 void afs_put_server(struct afs_net *net, struct afs_server *server,
370                     enum afs_server_trace reason)
371 {
372         unsigned int usage;
373
374         if (!server)
375                 return;
376
377         usage = atomic_dec_return(&server->ref);
378         trace_afs_server(server, usage, atomic_read(&server->active), reason);
379         if (unlikely(usage == 0))
380                 __afs_put_server(net, server);
381 }
382
383 /*
384  * Drop an active count on a server object without updating the last-unused
385  * time.
386  */
387 void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server,
388                              enum afs_server_trace reason)
389 {
390         if (server) {
391                 unsigned int active = atomic_dec_return(&server->active);
392
393                 if (active == 0)
394                         afs_set_server_timer(net, afs_server_gc_delay);
395                 afs_put_server(net, server, reason);
396         }
397 }
398
399 /*
400  * Drop an active count on a server object.
401  */
402 void afs_unuse_server(struct afs_net *net, struct afs_server *server,
403                       enum afs_server_trace reason)
404 {
405         if (server) {
406                 server->unuse_time = ktime_get_real_seconds();
407                 afs_unuse_server_notime(net, server, reason);
408         }
409 }
410
411 static void afs_server_rcu(struct rcu_head *rcu)
412 {
413         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
414
415         trace_afs_server(server, atomic_read(&server->ref),
416                          atomic_read(&server->active), afs_server_trace_free);
417         afs_put_addrlist(rcu_access_pointer(server->addresses));
418         kfree(server);
419 }
420
421 static void __afs_put_server(struct afs_net *net, struct afs_server *server)
422 {
423         call_rcu(&server->rcu, afs_server_rcu);
424         afs_dec_servers_outstanding(net);
425 }
426
427 /*
428  * destroy a dead server
429  */
430 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
431 {
432         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
433         struct afs_addr_cursor ac = {
434                 .alist  = alist,
435                 .index  = alist->preferred,
436                 .error  = 0,
437         };
438
439         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
440                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
441
442         afs_put_server(net, server, afs_server_trace_destroy);
443 }
444
445 /*
446  * Garbage collect any expired servers.
447  */
448 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
449 {
450         struct afs_server *server;
451         int active;
452
453         while ((server = gc_list)) {
454                 gc_list = server->gc_next;
455
456                 write_seqlock(&net->fs_lock);
457
458                 active = atomic_read(&server->active);
459                 if (active == 0) {
460                         trace_afs_server(server, atomic_read(&server->ref),
461                                          active, afs_server_trace_gc);
462                         rb_erase(&server->uuid_rb, &net->fs_servers);
463                         list_del(&server->probe_link);
464                         hlist_del_rcu(&server->proc_link);
465                         if (!hlist_unhashed(&server->addr4_link))
466                                 hlist_del_rcu(&server->addr4_link);
467                         if (!hlist_unhashed(&server->addr6_link))
468                                 hlist_del_rcu(&server->addr6_link);
469                 }
470                 write_sequnlock(&net->fs_lock);
471
472                 if (active == 0)
473                         afs_destroy_server(net, server);
474         }
475 }
476
477 /*
478  * Manage the records of servers known to be within a network namespace.  This
479  * includes garbage collecting unused servers.
480  *
481  * Note also that we were given an increment on net->servers_outstanding by
482  * whoever queued us that we need to deal with before returning.
483  */
484 void afs_manage_servers(struct work_struct *work)
485 {
486         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
487         struct afs_server *gc_list = NULL;
488         struct rb_node *cursor;
489         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
490         bool purging = !net->live;
491
492         _enter("");
493
494         /* Trawl the server list looking for servers that have expired from
495          * lack of use.
496          */
497         read_seqlock_excl(&net->fs_lock);
498
499         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
500                 struct afs_server *server =
501                         rb_entry(cursor, struct afs_server, uuid_rb);
502                 int active = atomic_read(&server->active);
503
504                 _debug("manage %pU %u", &server->uuid, active);
505
506                 ASSERTIFCMP(purging, active, ==, 0);
507
508                 if (active == 0) {
509                         time64_t expire_at = server->unuse_time;
510
511                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
512                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
513                                 expire_at += afs_server_gc_delay;
514                         if (purging || expire_at <= now) {
515                                 server->gc_next = gc_list;
516                                 gc_list = server;
517                         } else if (expire_at < next_manage) {
518                                 next_manage = expire_at;
519                         }
520                 }
521         }
522
523         read_sequnlock_excl(&net->fs_lock);
524
525         /* Update the timer on the way out.  We have to pass an increment on
526          * servers_outstanding in the namespace that we are in to the timer or
527          * the work scheduler.
528          */
529         if (!purging && next_manage < TIME64_MAX) {
530                 now = ktime_get_real_seconds();
531
532                 if (next_manage - now <= 0) {
533                         if (queue_work(afs_wq, &net->fs_manager))
534                                 afs_inc_servers_outstanding(net);
535                 } else {
536                         afs_set_server_timer(net, next_manage - now);
537                 }
538         }
539
540         afs_gc_servers(net, gc_list);
541
542         afs_dec_servers_outstanding(net);
543         _leave(" [%d]", atomic_read(&net->servers_outstanding));
544 }
545
546 static void afs_queue_server_manager(struct afs_net *net)
547 {
548         afs_inc_servers_outstanding(net);
549         if (!queue_work(afs_wq, &net->fs_manager))
550                 afs_dec_servers_outstanding(net);
551 }
552
553 /*
554  * Purge list of servers.
555  */
556 void afs_purge_servers(struct afs_net *net)
557 {
558         _enter("");
559
560         if (del_timer_sync(&net->fs_timer))
561                 atomic_dec(&net->servers_outstanding);
562
563         afs_queue_server_manager(net);
564
565         _debug("wait");
566         wait_var_event(&net->servers_outstanding,
567                        !atomic_read(&net->servers_outstanding));
568         _leave("");
569 }
570
571 /*
572  * Get an update for a server's address list.
573  */
574 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
575 {
576         struct afs_addr_list *alist, *discard;
577
578         _enter("");
579
580         trace_afs_server(server, atomic_read(&server->ref), atomic_read(&server->active),
581                          afs_server_trace_update);
582
583         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
584                                     &server->uuid);
585         if (IS_ERR(alist)) {
586                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
587                      PTR_ERR(alist) == -EINTR) &&
588                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
589                     server->addresses) {
590                         _leave(" = t [intr]");
591                         return true;
592                 }
593                 fc->error = PTR_ERR(alist);
594                 _leave(" = f [%d]", fc->error);
595                 return false;
596         }
597
598         discard = alist;
599         if (server->addr_version != alist->version) {
600                 write_lock(&server->fs_lock);
601                 discard = rcu_dereference_protected(server->addresses,
602                                                     lockdep_is_held(&server->fs_lock));
603                 rcu_assign_pointer(server->addresses, alist);
604                 server->addr_version = alist->version;
605                 write_unlock(&server->fs_lock);
606         }
607
608         afs_put_addrlist(discard);
609         _leave(" = t");
610         return true;
611 }
612
613 /*
614  * See if a server's address list needs updating.
615  */
616 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
617 {
618         bool success;
619         int ret, retries = 0;
620
621         _enter("");
622
623         ASSERT(server);
624
625 retry:
626         if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags))
627                 goto wait;
628         if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags))
629                 goto update;
630         _leave(" = t [good]");
631         return true;
632
633 update:
634         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
635                 clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
636                 success = afs_update_server_record(fc, server);
637                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
638                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
639                 _leave(" = %d", success);
640                 return success;
641         }
642
643 wait:
644         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
645                           (fc->flags & AFS_FS_CURSOR_INTR) ?
646                           TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
647         if (ret == -ERESTARTSYS) {
648                 fc->error = ret;
649                 _leave(" = f [intr]");
650                 return false;
651         }
652
653         retries++;
654         if (retries == 4) {
655                 _leave(" = f [stale]");
656                 ret = -ESTALE;
657                 return false;
658         }
659         goto retry;
660 }