afs: Use the serverUnique field in the UVLDB record to reduce rpc ops
[linux-2.6-microblaze.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static atomic_t afs_server_debug_id;
16
17 static void afs_inc_servers_outstanding(struct afs_net *net)
18 {
19         atomic_inc(&net->servers_outstanding);
20 }
21
22 static void afs_dec_servers_outstanding(struct afs_net *net)
23 {
24         if (atomic_dec_and_test(&net->servers_outstanding))
25                 wake_up_var(&net->servers_outstanding);
26 }
27
28 /*
29  * Find a server by one of its addresses.
30  */
31 struct afs_server *afs_find_server(struct afs_net *net,
32                                    const struct sockaddr_rxrpc *srx)
33 {
34         const struct afs_addr_list *alist;
35         struct afs_server *server = NULL;
36         unsigned int i;
37         int seq = 0, diff;
38
39         rcu_read_lock();
40
41         do {
42                 if (server)
43                         afs_put_server(net, server, afs_server_trace_put_find_rsq);
44                 server = NULL;
45                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
46
47                 if (srx->transport.family == AF_INET6) {
48                         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
49                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
50                                 alist = rcu_dereference(server->addresses);
51                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
52                                         b = &alist->addrs[i].transport.sin6;
53                                         diff = ((u16 __force)a->sin6_port -
54                                                 (u16 __force)b->sin6_port);
55                                         if (diff == 0)
56                                                 diff = memcmp(&a->sin6_addr,
57                                                               &b->sin6_addr,
58                                                               sizeof(struct in6_addr));
59                                         if (diff == 0)
60                                                 goto found;
61                                 }
62                         }
63                 } else {
64                         const struct sockaddr_in *a = &srx->transport.sin, *b;
65                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
66                                 alist = rcu_dereference(server->addresses);
67                                 for (i = 0; i < alist->nr_ipv4; i++) {
68                                         b = &alist->addrs[i].transport.sin;
69                                         diff = ((u16 __force)a->sin_port -
70                                                 (u16 __force)b->sin_port);
71                                         if (diff == 0)
72                                                 diff = ((u32 __force)a->sin_addr.s_addr -
73                                                         (u32 __force)b->sin_addr.s_addr);
74                                         if (diff == 0)
75                                                 goto found;
76                                 }
77                         }
78                 }
79
80                 server = NULL;
81         found:
82                 if (server && !atomic_inc_not_zero(&server->usage))
83                         server = NULL;
84
85         } while (need_seqretry(&net->fs_addr_lock, seq));
86
87         done_seqretry(&net->fs_addr_lock, seq);
88
89         rcu_read_unlock();
90         return server;
91 }
92
93 /*
94  * Look up a server by its UUID
95  */
96 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
97 {
98         struct afs_server *server = NULL;
99         struct rb_node *p;
100         int diff, seq = 0;
101
102         _enter("%pU", uuid);
103
104         do {
105                 /* Unfortunately, rbtree walking doesn't give reliable results
106                  * under just the RCU read lock, so we have to check for
107                  * changes.
108                  */
109                 if (server)
110                         afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
111                 server = NULL;
112
113                 read_seqbegin_or_lock(&net->fs_lock, &seq);
114
115                 p = net->fs_servers.rb_node;
116                 while (p) {
117                         server = rb_entry(p, struct afs_server, uuid_rb);
118
119                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
120                         if (diff < 0) {
121                                 p = p->rb_left;
122                         } else if (diff > 0) {
123                                 p = p->rb_right;
124                         } else {
125                                 afs_get_server(server, afs_server_trace_get_by_uuid);
126                                 break;
127                         }
128
129                         server = NULL;
130                 }
131         } while (need_seqretry(&net->fs_lock, seq));
132
133         done_seqretry(&net->fs_lock, seq);
134
135         _leave(" = %p", server);
136         return server;
137 }
138
139 /*
140  * Install a server record in the namespace tree
141  */
142 static struct afs_server *afs_install_server(struct afs_net *net,
143                                              struct afs_server *candidate)
144 {
145         const struct afs_addr_list *alist;
146         struct afs_server *server;
147         struct rb_node **pp, *p;
148         int diff;
149
150         _enter("%p", candidate);
151
152         write_seqlock(&net->fs_lock);
153
154         /* Firstly install the server in the UUID lookup tree */
155         pp = &net->fs_servers.rb_node;
156         p = NULL;
157         while (*pp) {
158                 p = *pp;
159                 _debug("- consider %p", p);
160                 server = rb_entry(p, struct afs_server, uuid_rb);
161                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
162                 if (diff < 0)
163                         pp = &(*pp)->rb_left;
164                 else if (diff > 0)
165                         pp = &(*pp)->rb_right;
166                 else
167                         goto exists;
168         }
169
170         server = candidate;
171         rb_link_node(&server->uuid_rb, p, pp);
172         rb_insert_color(&server->uuid_rb, &net->fs_servers);
173         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
174
175         write_seqlock(&net->fs_addr_lock);
176         alist = rcu_dereference_protected(server->addresses,
177                                           lockdep_is_held(&net->fs_addr_lock.lock));
178
179         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
180          * it in the IPv4 and/or IPv6 reverse-map lists.
181          *
182          * TODO: For speed we want to use something other than a flat list
183          * here; even sorting the list in terms of lowest address would help a
184          * bit, but anything we might want to do gets messy and memory
185          * intensive.
186          */
187         if (alist->nr_ipv4 > 0)
188                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
189         if (alist->nr_addrs > alist->nr_ipv4)
190                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
191
192         write_sequnlock(&net->fs_addr_lock);
193
194 exists:
195         afs_get_server(server, afs_server_trace_get_install);
196         write_sequnlock(&net->fs_lock);
197         return server;
198 }
199
200 /*
201  * allocate a new server record
202  */
203 static struct afs_server *afs_alloc_server(struct afs_net *net,
204                                            const uuid_t *uuid,
205                                            struct afs_addr_list *alist)
206 {
207         struct afs_server *server;
208
209         _enter("");
210
211         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
212         if (!server)
213                 goto enomem;
214
215         atomic_set(&server->usage, 1);
216         server->debug_id = atomic_inc_return(&afs_server_debug_id);
217         RCU_INIT_POINTER(server->addresses, alist);
218         server->addr_version = alist->version;
219         server->uuid = *uuid;
220         rwlock_init(&server->fs_lock);
221         INIT_HLIST_HEAD(&server->cb_volumes);
222         rwlock_init(&server->cb_break_lock);
223         init_waitqueue_head(&server->probe_wq);
224         spin_lock_init(&server->probe_lock);
225
226         afs_inc_servers_outstanding(net);
227         trace_afs_server(server, 1, afs_server_trace_alloc);
228         _leave(" = %p", server);
229         return server;
230
231 enomem:
232         _leave(" = NULL [nomem]");
233         return NULL;
234 }
235
236 /*
237  * Look up an address record for a server
238  */
239 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
240                                                  struct key *key, const uuid_t *uuid)
241 {
242         struct afs_vl_cursor vc;
243         struct afs_addr_list *alist = NULL;
244         int ret;
245
246         ret = -ERESTARTSYS;
247         if (afs_begin_vlserver_operation(&vc, cell, key)) {
248                 while (afs_select_vlserver(&vc)) {
249                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
250                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
251                         else
252                                 alist = afs_vl_get_addrs_u(&vc, uuid);
253                 }
254
255                 ret = afs_end_vlserver_operation(&vc);
256         }
257
258         return ret < 0 ? ERR_PTR(ret) : alist;
259 }
260
261 /*
262  * Get or create a fileserver record.
263  */
264 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
265                                      const uuid_t *uuid, u32 addr_version)
266 {
267         struct afs_addr_list *alist;
268         struct afs_server *server, *candidate;
269
270         _enter("%p,%pU", cell->net, uuid);
271
272         server = afs_find_server_by_uuid(cell->net, uuid);
273         if (server) {
274                 if (server->addr_version != addr_version)
275                         set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
276                 return server;
277         }
278
279         alist = afs_vl_lookup_addrs(cell, key, uuid);
280         if (IS_ERR(alist))
281                 return ERR_CAST(alist);
282
283         candidate = afs_alloc_server(cell->net, uuid, alist);
284         if (!candidate) {
285                 afs_put_addrlist(alist);
286                 return ERR_PTR(-ENOMEM);
287         }
288
289         server = afs_install_server(cell->net, candidate);
290         if (server != candidate) {
291                 afs_put_addrlist(alist);
292                 kfree(candidate);
293         }
294
295         _leave(" = %p{%d}", server, atomic_read(&server->usage));
296         return server;
297 }
298
299 /*
300  * Set the server timer to fire after a given delay, assuming it's not already
301  * set for an earlier time.
302  */
303 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
304 {
305         if (net->live) {
306                 afs_inc_servers_outstanding(net);
307                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
308                         afs_dec_servers_outstanding(net);
309         }
310 }
311
312 /*
313  * Server management timer.  We have an increment on fs_outstanding that we
314  * need to pass along to the work item.
315  */
316 void afs_servers_timer(struct timer_list *timer)
317 {
318         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
319
320         _enter("");
321         if (!queue_work(afs_wq, &net->fs_manager))
322                 afs_dec_servers_outstanding(net);
323 }
324
325 /*
326  * Get a reference on a server object.
327  */
328 struct afs_server *afs_get_server(struct afs_server *server,
329                                   enum afs_server_trace reason)
330 {
331         unsigned int u = atomic_inc_return(&server->usage);
332
333         trace_afs_server(server, u, reason);
334         return server;
335 }
336
337 /*
338  * Release a reference on a server record.
339  */
340 void afs_put_server(struct afs_net *net, struct afs_server *server,
341                     enum afs_server_trace reason)
342 {
343         unsigned int usage;
344
345         if (!server)
346                 return;
347
348         server->put_time = ktime_get_real_seconds();
349
350         usage = atomic_dec_return(&server->usage);
351
352         trace_afs_server(server, usage, reason);
353
354         if (likely(usage > 0))
355                 return;
356
357         afs_set_server_timer(net, afs_server_gc_delay);
358 }
359
360 static void afs_server_rcu(struct rcu_head *rcu)
361 {
362         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
363
364         trace_afs_server(server, atomic_read(&server->usage),
365                          afs_server_trace_free);
366         afs_put_addrlist(rcu_access_pointer(server->addresses));
367         kfree(server);
368 }
369
370 /*
371  * destroy a dead server
372  */
373 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
374 {
375         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
376         struct afs_addr_cursor ac = {
377                 .alist  = alist,
378                 .index  = alist->preferred,
379                 .error  = 0,
380         };
381
382         trace_afs_server(server, atomic_read(&server->usage),
383                          afs_server_trace_give_up_cb);
384
385         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
386                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
387
388         wait_var_event(&server->probe_outstanding,
389                        atomic_read(&server->probe_outstanding) == 0);
390
391         trace_afs_server(server, atomic_read(&server->usage),
392                          afs_server_trace_destroy);
393         call_rcu(&server->rcu, afs_server_rcu);
394         afs_dec_servers_outstanding(net);
395 }
396
397 /*
398  * Garbage collect any expired servers.
399  */
400 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
401 {
402         struct afs_server *server;
403         bool deleted;
404         int usage;
405
406         while ((server = gc_list)) {
407                 gc_list = server->gc_next;
408
409                 write_seqlock(&net->fs_lock);
410                 usage = 1;
411                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
412                 trace_afs_server(server, usage, afs_server_trace_gc);
413                 if (deleted) {
414                         rb_erase(&server->uuid_rb, &net->fs_servers);
415                         hlist_del_rcu(&server->proc_link);
416                 }
417                 write_sequnlock(&net->fs_lock);
418
419                 if (deleted) {
420                         write_seqlock(&net->fs_addr_lock);
421                         if (!hlist_unhashed(&server->addr4_link))
422                                 hlist_del_rcu(&server->addr4_link);
423                         if (!hlist_unhashed(&server->addr6_link))
424                                 hlist_del_rcu(&server->addr6_link);
425                         write_sequnlock(&net->fs_addr_lock);
426                         afs_destroy_server(net, server);
427                 }
428         }
429 }
430
431 /*
432  * Manage the records of servers known to be within a network namespace.  This
433  * includes garbage collecting unused servers.
434  *
435  * Note also that we were given an increment on net->servers_outstanding by
436  * whoever queued us that we need to deal with before returning.
437  */
438 void afs_manage_servers(struct work_struct *work)
439 {
440         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
441         struct afs_server *gc_list = NULL;
442         struct rb_node *cursor;
443         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
444         bool purging = !net->live;
445
446         _enter("");
447
448         /* Trawl the server list looking for servers that have expired from
449          * lack of use.
450          */
451         read_seqlock_excl(&net->fs_lock);
452
453         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
454                 struct afs_server *server =
455                         rb_entry(cursor, struct afs_server, uuid_rb);
456                 int usage = atomic_read(&server->usage);
457
458                 _debug("manage %pU %u", &server->uuid, usage);
459
460                 ASSERTCMP(usage, >=, 1);
461                 ASSERTIFCMP(purging, usage, ==, 1);
462
463                 if (usage == 1) {
464                         time64_t expire_at = server->put_time;
465
466                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
467                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
468                                 expire_at += afs_server_gc_delay;
469                         if (purging || expire_at <= now) {
470                                 server->gc_next = gc_list;
471                                 gc_list = server;
472                         } else if (expire_at < next_manage) {
473                                 next_manage = expire_at;
474                         }
475                 }
476         }
477
478         read_sequnlock_excl(&net->fs_lock);
479
480         /* Update the timer on the way out.  We have to pass an increment on
481          * servers_outstanding in the namespace that we are in to the timer or
482          * the work scheduler.
483          */
484         if (!purging && next_manage < TIME64_MAX) {
485                 now = ktime_get_real_seconds();
486
487                 if (next_manage - now <= 0) {
488                         if (queue_work(afs_wq, &net->fs_manager))
489                                 afs_inc_servers_outstanding(net);
490                 } else {
491                         afs_set_server_timer(net, next_manage - now);
492                 }
493         }
494
495         afs_gc_servers(net, gc_list);
496
497         afs_dec_servers_outstanding(net);
498         _leave(" [%d]", atomic_read(&net->servers_outstanding));
499 }
500
501 static void afs_queue_server_manager(struct afs_net *net)
502 {
503         afs_inc_servers_outstanding(net);
504         if (!queue_work(afs_wq, &net->fs_manager))
505                 afs_dec_servers_outstanding(net);
506 }
507
508 /*
509  * Purge list of servers.
510  */
511 void afs_purge_servers(struct afs_net *net)
512 {
513         _enter("");
514
515         if (del_timer_sync(&net->fs_timer))
516                 atomic_dec(&net->servers_outstanding);
517
518         afs_queue_server_manager(net);
519
520         _debug("wait");
521         wait_var_event(&net->servers_outstanding,
522                        !atomic_read(&net->servers_outstanding));
523         _leave("");
524 }
525
526 /*
527  * Get an update for a server's address list.
528  */
529 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
530 {
531         struct afs_addr_list *alist, *discard;
532
533         _enter("");
534
535         trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
536
537         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
538                                     &server->uuid);
539         if (IS_ERR(alist)) {
540                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
541                      PTR_ERR(alist) == -EINTR) &&
542                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
543                     server->addresses) {
544                         _leave(" = t [intr]");
545                         return true;
546                 }
547                 fc->error = PTR_ERR(alist);
548                 _leave(" = f [%d]", fc->error);
549                 return false;
550         }
551
552         discard = alist;
553         if (server->addr_version != alist->version) {
554                 write_lock(&server->fs_lock);
555                 discard = rcu_dereference_protected(server->addresses,
556                                                     lockdep_is_held(&server->fs_lock));
557                 rcu_assign_pointer(server->addresses, alist);
558                 server->addr_version = alist->version;
559                 write_unlock(&server->fs_lock);
560         }
561
562         afs_put_addrlist(discard);
563         _leave(" = t");
564         return true;
565 }
566
567 /*
568  * See if a server's address list needs updating.
569  */
570 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
571 {
572         bool success;
573         int ret, retries = 0;
574
575         _enter("");
576
577         ASSERT(server);
578
579 retry:
580         if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags))
581                 goto wait;
582         if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags))
583                 goto update;
584         _leave(" = t [good]");
585         return true;
586
587 update:
588         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
589                 clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags);
590                 success = afs_update_server_record(fc, server);
591                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
592                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
593                 _leave(" = %d", success);
594                 return success;
595         }
596
597 wait:
598         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
599                           (fc->flags & AFS_FS_CURSOR_INTR) ?
600                           TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
601         if (ret == -ERESTARTSYS) {
602                 fc->error = ret;
603                 _leave(" = f [intr]");
604                 return false;
605         }
606
607         retries++;
608         if (retries == 4) {
609                 _leave(" = f [stale]");
610                 ret = -ESTALE;
611                 return false;
612         }
613         goto retry;
614 }