perf beauty sockaddr: Fix augmented syscall format warning
[linux-2.6-microblaze.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20         atomic_inc(&net->servers_outstanding);
21 }
22
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25         if (atomic_dec_and_test(&net->servers_outstanding))
26                 wake_up_var(&net->servers_outstanding);
27 }
28
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33                                    const struct sockaddr_rxrpc *srx)
34 {
35         const struct afs_addr_list *alist;
36         struct afs_server *server = NULL;
37         unsigned int i;
38         int seq = 0, diff;
39
40         rcu_read_lock();
41
42         do {
43                 if (server)
44                         afs_put_server(net, server, afs_server_trace_put_find_rsq);
45                 server = NULL;
46                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
47
48                 if (srx->transport.family == AF_INET6) {
49                         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
50                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
51                                 alist = rcu_dereference(server->addresses);
52                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
53                                         b = &alist->addrs[i].transport.sin6;
54                                         diff = ((u16 __force)a->sin6_port -
55                                                 (u16 __force)b->sin6_port);
56                                         if (diff == 0)
57                                                 diff = memcmp(&a->sin6_addr,
58                                                               &b->sin6_addr,
59                                                               sizeof(struct in6_addr));
60                                         if (diff == 0)
61                                                 goto found;
62                                 }
63                         }
64                 } else {
65                         const struct sockaddr_in *a = &srx->transport.sin, *b;
66                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
67                                 alist = rcu_dereference(server->addresses);
68                                 for (i = 0; i < alist->nr_ipv4; i++) {
69                                         b = &alist->addrs[i].transport.sin;
70                                         diff = ((u16 __force)a->sin_port -
71                                                 (u16 __force)b->sin_port);
72                                         if (diff == 0)
73                                                 diff = ((u32 __force)a->sin_addr.s_addr -
74                                                         (u32 __force)b->sin_addr.s_addr);
75                                         if (diff == 0)
76                                                 goto found;
77                                 }
78                         }
79                 }
80
81                 server = NULL;
82         found:
83                 if (server && !atomic_inc_not_zero(&server->usage))
84                         server = NULL;
85
86         } while (need_seqretry(&net->fs_addr_lock, seq));
87
88         done_seqretry(&net->fs_addr_lock, seq);
89
90         rcu_read_unlock();
91         return server;
92 }
93
94 /*
95  * Look up a server by its UUID
96  */
97 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
98 {
99         struct afs_server *server = NULL;
100         struct rb_node *p;
101         int diff, seq = 0;
102
103         _enter("%pU", uuid);
104
105         do {
106                 /* Unfortunately, rbtree walking doesn't give reliable results
107                  * under just the RCU read lock, so we have to check for
108                  * changes.
109                  */
110                 if (server)
111                         afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
112                 server = NULL;
113
114                 read_seqbegin_or_lock(&net->fs_lock, &seq);
115
116                 p = net->fs_servers.rb_node;
117                 while (p) {
118                         server = rb_entry(p, struct afs_server, uuid_rb);
119
120                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
121                         if (diff < 0) {
122                                 p = p->rb_left;
123                         } else if (diff > 0) {
124                                 p = p->rb_right;
125                         } else {
126                                 afs_get_server(server, afs_server_trace_get_by_uuid);
127                                 break;
128                         }
129
130                         server = NULL;
131                 }
132         } while (need_seqretry(&net->fs_lock, seq));
133
134         done_seqretry(&net->fs_lock, seq);
135
136         _leave(" = %p", server);
137         return server;
138 }
139
140 /*
141  * Install a server record in the namespace tree
142  */
143 static struct afs_server *afs_install_server(struct afs_net *net,
144                                              struct afs_server *candidate)
145 {
146         const struct afs_addr_list *alist;
147         struct afs_server *server;
148         struct rb_node **pp, *p;
149         int diff;
150
151         _enter("%p", candidate);
152
153         write_seqlock(&net->fs_lock);
154
155         /* Firstly install the server in the UUID lookup tree */
156         pp = &net->fs_servers.rb_node;
157         p = NULL;
158         while (*pp) {
159                 p = *pp;
160                 _debug("- consider %p", p);
161                 server = rb_entry(p, struct afs_server, uuid_rb);
162                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
163                 if (diff < 0)
164                         pp = &(*pp)->rb_left;
165                 else if (diff > 0)
166                         pp = &(*pp)->rb_right;
167                 else
168                         goto exists;
169         }
170
171         server = candidate;
172         rb_link_node(&server->uuid_rb, p, pp);
173         rb_insert_color(&server->uuid_rb, &net->fs_servers);
174         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
175
176         write_seqlock(&net->fs_addr_lock);
177         alist = rcu_dereference_protected(server->addresses,
178                                           lockdep_is_held(&net->fs_addr_lock.lock));
179
180         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
181          * it in the IPv4 and/or IPv6 reverse-map lists.
182          *
183          * TODO: For speed we want to use something other than a flat list
184          * here; even sorting the list in terms of lowest address would help a
185          * bit, but anything we might want to do gets messy and memory
186          * intensive.
187          */
188         if (alist->nr_ipv4 > 0)
189                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
190         if (alist->nr_addrs > alist->nr_ipv4)
191                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
192
193         write_sequnlock(&net->fs_addr_lock);
194
195 exists:
196         afs_get_server(server, afs_server_trace_get_install);
197         write_sequnlock(&net->fs_lock);
198         return server;
199 }
200
201 /*
202  * allocate a new server record
203  */
204 static struct afs_server *afs_alloc_server(struct afs_net *net,
205                                            const uuid_t *uuid,
206                                            struct afs_addr_list *alist)
207 {
208         struct afs_server *server;
209
210         _enter("");
211
212         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
213         if (!server)
214                 goto enomem;
215
216         atomic_set(&server->usage, 1);
217         server->debug_id = atomic_inc_return(&afs_server_debug_id);
218         RCU_INIT_POINTER(server->addresses, alist);
219         server->addr_version = alist->version;
220         server->uuid = *uuid;
221         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
222         rwlock_init(&server->fs_lock);
223         INIT_HLIST_HEAD(&server->cb_volumes);
224         rwlock_init(&server->cb_break_lock);
225         init_waitqueue_head(&server->probe_wq);
226         spin_lock_init(&server->probe_lock);
227
228         afs_inc_servers_outstanding(net);
229         trace_afs_server(server, 1, afs_server_trace_alloc);
230         _leave(" = %p", server);
231         return server;
232
233 enomem:
234         _leave(" = NULL [nomem]");
235         return NULL;
236 }
237
238 /*
239  * Look up an address record for a server
240  */
241 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
242                                                  struct key *key, const uuid_t *uuid)
243 {
244         struct afs_vl_cursor vc;
245         struct afs_addr_list *alist = NULL;
246         int ret;
247
248         ret = -ERESTARTSYS;
249         if (afs_begin_vlserver_operation(&vc, cell, key)) {
250                 while (afs_select_vlserver(&vc)) {
251                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
252                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
253                         else
254                                 alist = afs_vl_get_addrs_u(&vc, uuid);
255                 }
256
257                 ret = afs_end_vlserver_operation(&vc);
258         }
259
260         return ret < 0 ? ERR_PTR(ret) : alist;
261 }
262
263 /*
264  * Get or create a fileserver record.
265  */
266 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
267                                      const uuid_t *uuid)
268 {
269         struct afs_addr_list *alist;
270         struct afs_server *server, *candidate;
271
272         _enter("%p,%pU", cell->net, uuid);
273
274         server = afs_find_server_by_uuid(cell->net, uuid);
275         if (server)
276                 return server;
277
278         alist = afs_vl_lookup_addrs(cell, key, uuid);
279         if (IS_ERR(alist))
280                 return ERR_CAST(alist);
281
282         candidate = afs_alloc_server(cell->net, uuid, alist);
283         if (!candidate) {
284                 afs_put_addrlist(alist);
285                 return ERR_PTR(-ENOMEM);
286         }
287
288         server = afs_install_server(cell->net, candidate);
289         if (server != candidate) {
290                 afs_put_addrlist(alist);
291                 kfree(candidate);
292         }
293
294         _leave(" = %p{%d}", server, atomic_read(&server->usage));
295         return server;
296 }
297
298 /*
299  * Set the server timer to fire after a given delay, assuming it's not already
300  * set for an earlier time.
301  */
302 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
303 {
304         if (net->live) {
305                 afs_inc_servers_outstanding(net);
306                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
307                         afs_dec_servers_outstanding(net);
308         }
309 }
310
311 /*
312  * Server management timer.  We have an increment on fs_outstanding that we
313  * need to pass along to the work item.
314  */
315 void afs_servers_timer(struct timer_list *timer)
316 {
317         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
318
319         _enter("");
320         if (!queue_work(afs_wq, &net->fs_manager))
321                 afs_dec_servers_outstanding(net);
322 }
323
324 /*
325  * Get a reference on a server object.
326  */
327 struct afs_server *afs_get_server(struct afs_server *server,
328                                   enum afs_server_trace reason)
329 {
330         unsigned int u = atomic_inc_return(&server->usage);
331
332         trace_afs_server(server, u, reason);
333         return server;
334 }
335
336 /*
337  * Release a reference on a server record.
338  */
339 void afs_put_server(struct afs_net *net, struct afs_server *server,
340                     enum afs_server_trace reason)
341 {
342         unsigned int usage;
343
344         if (!server)
345                 return;
346
347         server->put_time = ktime_get_real_seconds();
348
349         usage = atomic_dec_return(&server->usage);
350
351         trace_afs_server(server, usage, reason);
352
353         if (likely(usage > 0))
354                 return;
355
356         afs_set_server_timer(net, afs_server_gc_delay);
357 }
358
359 static void afs_server_rcu(struct rcu_head *rcu)
360 {
361         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
362
363         trace_afs_server(server, atomic_read(&server->usage),
364                          afs_server_trace_free);
365         afs_put_addrlist(rcu_access_pointer(server->addresses));
366         kfree(server);
367 }
368
369 /*
370  * destroy a dead server
371  */
372 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
373 {
374         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
375         struct afs_addr_cursor ac = {
376                 .alist  = alist,
377                 .index  = alist->preferred,
378                 .error  = 0,
379         };
380
381         trace_afs_server(server, atomic_read(&server->usage),
382                          afs_server_trace_give_up_cb);
383
384         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
385                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
386
387         wait_var_event(&server->probe_outstanding,
388                        atomic_read(&server->probe_outstanding) == 0);
389
390         trace_afs_server(server, atomic_read(&server->usage),
391                          afs_server_trace_destroy);
392         call_rcu(&server->rcu, afs_server_rcu);
393         afs_dec_servers_outstanding(net);
394 }
395
396 /*
397  * Garbage collect any expired servers.
398  */
399 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
400 {
401         struct afs_server *server;
402         bool deleted;
403         int usage;
404
405         while ((server = gc_list)) {
406                 gc_list = server->gc_next;
407
408                 write_seqlock(&net->fs_lock);
409                 usage = 1;
410                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
411                 trace_afs_server(server, usage, afs_server_trace_gc);
412                 if (deleted) {
413                         rb_erase(&server->uuid_rb, &net->fs_servers);
414                         hlist_del_rcu(&server->proc_link);
415                 }
416                 write_sequnlock(&net->fs_lock);
417
418                 if (deleted) {
419                         write_seqlock(&net->fs_addr_lock);
420                         if (!hlist_unhashed(&server->addr4_link))
421                                 hlist_del_rcu(&server->addr4_link);
422                         if (!hlist_unhashed(&server->addr6_link))
423                                 hlist_del_rcu(&server->addr6_link);
424                         write_sequnlock(&net->fs_addr_lock);
425                         afs_destroy_server(net, server);
426                 }
427         }
428 }
429
430 /*
431  * Manage the records of servers known to be within a network namespace.  This
432  * includes garbage collecting unused servers.
433  *
434  * Note also that we were given an increment on net->servers_outstanding by
435  * whoever queued us that we need to deal with before returning.
436  */
437 void afs_manage_servers(struct work_struct *work)
438 {
439         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
440         struct afs_server *gc_list = NULL;
441         struct rb_node *cursor;
442         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
443         bool purging = !net->live;
444
445         _enter("");
446
447         /* Trawl the server list looking for servers that have expired from
448          * lack of use.
449          */
450         read_seqlock_excl(&net->fs_lock);
451
452         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
453                 struct afs_server *server =
454                         rb_entry(cursor, struct afs_server, uuid_rb);
455                 int usage = atomic_read(&server->usage);
456
457                 _debug("manage %pU %u", &server->uuid, usage);
458
459                 ASSERTCMP(usage, >=, 1);
460                 ASSERTIFCMP(purging, usage, ==, 1);
461
462                 if (usage == 1) {
463                         time64_t expire_at = server->put_time;
464
465                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
466                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
467                                 expire_at += afs_server_gc_delay;
468                         if (purging || expire_at <= now) {
469                                 server->gc_next = gc_list;
470                                 gc_list = server;
471                         } else if (expire_at < next_manage) {
472                                 next_manage = expire_at;
473                         }
474                 }
475         }
476
477         read_sequnlock_excl(&net->fs_lock);
478
479         /* Update the timer on the way out.  We have to pass an increment on
480          * servers_outstanding in the namespace that we are in to the timer or
481          * the work scheduler.
482          */
483         if (!purging && next_manage < TIME64_MAX) {
484                 now = ktime_get_real_seconds();
485
486                 if (next_manage - now <= 0) {
487                         if (queue_work(afs_wq, &net->fs_manager))
488                                 afs_inc_servers_outstanding(net);
489                 } else {
490                         afs_set_server_timer(net, next_manage - now);
491                 }
492         }
493
494         afs_gc_servers(net, gc_list);
495
496         afs_dec_servers_outstanding(net);
497         _leave(" [%d]", atomic_read(&net->servers_outstanding));
498 }
499
500 static void afs_queue_server_manager(struct afs_net *net)
501 {
502         afs_inc_servers_outstanding(net);
503         if (!queue_work(afs_wq, &net->fs_manager))
504                 afs_dec_servers_outstanding(net);
505 }
506
507 /*
508  * Purge list of servers.
509  */
510 void afs_purge_servers(struct afs_net *net)
511 {
512         _enter("");
513
514         if (del_timer_sync(&net->fs_timer))
515                 atomic_dec(&net->servers_outstanding);
516
517         afs_queue_server_manager(net);
518
519         _debug("wait");
520         wait_var_event(&net->servers_outstanding,
521                        !atomic_read(&net->servers_outstanding));
522         _leave("");
523 }
524
525 /*
526  * Get an update for a server's address list.
527  */
528 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
529 {
530         struct afs_addr_list *alist, *discard;
531
532         _enter("");
533
534         trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
535
536         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
537                                     &server->uuid);
538         if (IS_ERR(alist)) {
539                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
540                      PTR_ERR(alist) == -EINTR) &&
541                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
542                     server->addresses) {
543                         _leave(" = t [intr]");
544                         return true;
545                 }
546                 fc->error = PTR_ERR(alist);
547                 _leave(" = f [%d]", fc->error);
548                 return false;
549         }
550
551         discard = alist;
552         if (server->addr_version != alist->version) {
553                 write_lock(&server->fs_lock);
554                 discard = rcu_dereference_protected(server->addresses,
555                                                     lockdep_is_held(&server->fs_lock));
556                 rcu_assign_pointer(server->addresses, alist);
557                 server->addr_version = alist->version;
558                 write_unlock(&server->fs_lock);
559         }
560
561         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
562         afs_put_addrlist(discard);
563         _leave(" = t");
564         return true;
565 }
566
567 /*
568  * See if a server's address list needs updating.
569  */
570 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
571 {
572         time64_t now = ktime_get_real_seconds();
573         long diff;
574         bool success;
575         int ret, retries = 0;
576
577         _enter("");
578
579         ASSERT(server);
580
581 retry:
582         diff = READ_ONCE(server->update_at) - now;
583         if (diff > 0) {
584                 _leave(" = t [not now %ld]", diff);
585                 return true;
586         }
587
588         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
589                 success = afs_update_server_record(fc, server);
590                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
591                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
592                 _leave(" = %d", success);
593                 return success;
594         }
595
596         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
597                           TASK_INTERRUPTIBLE);
598         if (ret == -ERESTARTSYS) {
599                 if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
600                         _leave(" = t [intr]");
601                         return true;
602                 }
603                 fc->error = ret;
604                 _leave(" = f [intr]");
605                 return false;
606         }
607
608         retries++;
609         if (retries == 4) {
610                 _leave(" = f [stale]");
611                 ret = -ESTALE;
612                 return false;
613         }
614         goto retry;
615 }