Merge tag 'char-misc-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregk...
[linux-2.6-microblaze.git] / fs / afs / server.c
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16
17 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
18 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
19
20 static void afs_inc_servers_outstanding(struct afs_net *net)
21 {
22         atomic_inc(&net->servers_outstanding);
23 }
24
25 static void afs_dec_servers_outstanding(struct afs_net *net)
26 {
27         if (atomic_dec_and_test(&net->servers_outstanding))
28                 wake_up_var(&net->servers_outstanding);
29 }
30
31 /*
32  * Find a server by one of its addresses.
33  */
34 struct afs_server *afs_find_server(struct afs_net *net,
35                                    const struct sockaddr_rxrpc *srx)
36 {
37         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38         const struct afs_addr_list *alist;
39         struct afs_server *server = NULL;
40         unsigned int i;
41         bool ipv6 = true;
42         int seq = 0, diff;
43
44         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47                 ipv6 = false;
48
49         rcu_read_lock();
50
51         do {
52                 if (server)
53                         afs_put_server(net, server);
54                 server = NULL;
55                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56
57                 if (ipv6) {
58                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59                                 alist = rcu_dereference(server->addresses);
60                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61                                         b = &alist->addrs[i].transport.sin6;
62                                         diff = ((u16 __force)a->sin6_port -
63                                                 (u16 __force)b->sin6_port);
64                                         if (diff == 0)
65                                                 diff = memcmp(&a->sin6_addr,
66                                                               &b->sin6_addr,
67                                                               sizeof(struct in6_addr));
68                                         if (diff == 0)
69                                                 goto found;
70                                 }
71                         }
72                 } else {
73                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
74                                 alist = rcu_dereference(server->addresses);
75                                 for (i = 0; i < alist->nr_ipv4; i++) {
76                                         b = &alist->addrs[i].transport.sin6;
77                                         diff = ((u16 __force)a->sin6_port -
78                                                 (u16 __force)b->sin6_port);
79                                         if (diff == 0)
80                                                 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
81                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
82                                         if (diff == 0)
83                                                 goto found;
84                                 }
85                         }
86                 }
87
88                 server = NULL;
89         found:
90                 if (server && !atomic_inc_not_zero(&server->usage))
91                         server = NULL;
92
93         } while (need_seqretry(&net->fs_addr_lock, seq));
94
95         done_seqretry(&net->fs_addr_lock, seq);
96
97         rcu_read_unlock();
98         return server;
99 }
100
101 /*
102  * Look up a server by its UUID
103  */
104 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
105 {
106         struct afs_server *server = NULL;
107         struct rb_node *p;
108         int diff, seq = 0;
109
110         _enter("%pU", uuid);
111
112         do {
113                 /* Unfortunately, rbtree walking doesn't give reliable results
114                  * under just the RCU read lock, so we have to check for
115                  * changes.
116                  */
117                 if (server)
118                         afs_put_server(net, server);
119                 server = NULL;
120
121                 read_seqbegin_or_lock(&net->fs_lock, &seq);
122
123                 p = net->fs_servers.rb_node;
124                 while (p) {
125                         server = rb_entry(p, struct afs_server, uuid_rb);
126
127                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
128                         if (diff < 0) {
129                                 p = p->rb_left;
130                         } else if (diff > 0) {
131                                 p = p->rb_right;
132                         } else {
133                                 afs_get_server(server);
134                                 break;
135                         }
136
137                         server = NULL;
138                 }
139         } while (need_seqretry(&net->fs_lock, seq));
140
141         done_seqretry(&net->fs_lock, seq);
142
143         _leave(" = %p", server);
144         return server;
145 }
146
147 /*
148  * Install a server record in the namespace tree
149  */
150 static struct afs_server *afs_install_server(struct afs_net *net,
151                                              struct afs_server *candidate)
152 {
153         const struct afs_addr_list *alist;
154         struct afs_server *server;
155         struct rb_node **pp, *p;
156         int ret = -EEXIST, diff;
157
158         _enter("%p", candidate);
159
160         write_seqlock(&net->fs_lock);
161
162         /* Firstly install the server in the UUID lookup tree */
163         pp = &net->fs_servers.rb_node;
164         p = NULL;
165         while (*pp) {
166                 p = *pp;
167                 _debug("- consider %p", p);
168                 server = rb_entry(p, struct afs_server, uuid_rb);
169                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
170                 if (diff < 0)
171                         pp = &(*pp)->rb_left;
172                 else if (diff > 0)
173                         pp = &(*pp)->rb_right;
174                 else
175                         goto exists;
176         }
177
178         server = candidate;
179         rb_link_node(&server->uuid_rb, p, pp);
180         rb_insert_color(&server->uuid_rb, &net->fs_servers);
181         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
182
183         write_seqlock(&net->fs_addr_lock);
184         alist = rcu_dereference_protected(server->addresses,
185                                           lockdep_is_held(&net->fs_addr_lock.lock));
186
187         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
188          * it in the IPv4 and/or IPv6 reverse-map lists.
189          *
190          * TODO: For speed we want to use something other than a flat list
191          * here; even sorting the list in terms of lowest address would help a
192          * bit, but anything we might want to do gets messy and memory
193          * intensive.
194          */
195         if (alist->nr_ipv4 > 0)
196                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
197         if (alist->nr_addrs > alist->nr_ipv4)
198                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
199
200         write_sequnlock(&net->fs_addr_lock);
201         ret = 0;
202
203 exists:
204         afs_get_server(server);
205         write_sequnlock(&net->fs_lock);
206         return server;
207 }
208
209 /*
210  * allocate a new server record
211  */
212 static struct afs_server *afs_alloc_server(struct afs_net *net,
213                                            const uuid_t *uuid,
214                                            struct afs_addr_list *alist)
215 {
216         struct afs_server *server;
217
218         _enter("");
219
220         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
221         if (!server)
222                 goto enomem;
223
224         atomic_set(&server->usage, 1);
225         RCU_INIT_POINTER(server->addresses, alist);
226         server->addr_version = alist->version;
227         server->uuid = *uuid;
228         server->flags = (1UL << AFS_SERVER_FL_NEW);
229         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
230         rwlock_init(&server->fs_lock);
231         INIT_HLIST_HEAD(&server->cb_volumes);
232         rwlock_init(&server->cb_break_lock);
233
234         afs_inc_servers_outstanding(net);
235         _leave(" = %p", server);
236         return server;
237
238 enomem:
239         _leave(" = NULL [nomem]");
240         return NULL;
241 }
242
243 /*
244  * Look up an address record for a server
245  */
246 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
247                                                  struct key *key, const uuid_t *uuid)
248 {
249         struct afs_addr_cursor ac;
250         struct afs_addr_list *alist;
251         int ret;
252
253         ret = afs_set_vl_cursor(&ac, cell);
254         if (ret < 0)
255                 return ERR_PTR(ret);
256
257         while (afs_iterate_addresses(&ac)) {
258                 if (test_bit(ac.index, &ac.alist->yfs))
259                         alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
260                 else
261                         alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
262                 switch (ac.error) {
263                 case 0:
264                         afs_end_cursor(&ac);
265                         return alist;
266                 case -ECONNABORTED:
267                         ac.error = afs_abort_to_error(ac.abort_code);
268                         goto error;
269                 case -ENOMEM:
270                 case -ENONET:
271                         goto error;
272                 case -ENETUNREACH:
273                 case -EHOSTUNREACH:
274                 case -ECONNREFUSED:
275                         break;
276                 default:
277                         ac.error = -EIO;
278                         goto error;
279                 }
280         }
281
282 error:
283         return ERR_PTR(afs_end_cursor(&ac));
284 }
285
286 /*
287  * Get or create a fileserver record.
288  */
289 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
290                                      const uuid_t *uuid)
291 {
292         struct afs_addr_list *alist;
293         struct afs_server *server, *candidate;
294
295         _enter("%p,%pU", cell->net, uuid);
296
297         server = afs_find_server_by_uuid(cell->net, uuid);
298         if (server)
299                 return server;
300
301         alist = afs_vl_lookup_addrs(cell, key, uuid);
302         if (IS_ERR(alist))
303                 return ERR_CAST(alist);
304
305         candidate = afs_alloc_server(cell->net, uuid, alist);
306         if (!candidate) {
307                 afs_put_addrlist(alist);
308                 return ERR_PTR(-ENOMEM);
309         }
310
311         server = afs_install_server(cell->net, candidate);
312         if (server != candidate) {
313                 afs_put_addrlist(alist);
314                 kfree(candidate);
315         }
316
317         _leave(" = %p{%d}", server, atomic_read(&server->usage));
318         return server;
319 }
320
321 /*
322  * Set the server timer to fire after a given delay, assuming it's not already
323  * set for an earlier time.
324  */
325 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
326 {
327         if (net->live) {
328                 afs_inc_servers_outstanding(net);
329                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
330                         afs_dec_servers_outstanding(net);
331         }
332 }
333
334 /*
335  * Server management timer.  We have an increment on fs_outstanding that we
336  * need to pass along to the work item.
337  */
338 void afs_servers_timer(struct timer_list *timer)
339 {
340         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
341
342         _enter("");
343         if (!queue_work(afs_wq, &net->fs_manager))
344                 afs_dec_servers_outstanding(net);
345 }
346
347 /*
348  * Release a reference on a server record.
349  */
350 void afs_put_server(struct afs_net *net, struct afs_server *server)
351 {
352         unsigned int usage;
353
354         if (!server)
355                 return;
356
357         server->put_time = ktime_get_real_seconds();
358
359         usage = atomic_dec_return(&server->usage);
360
361         _enter("{%u}", usage);
362
363         if (likely(usage > 0))
364                 return;
365
366         afs_set_server_timer(net, afs_server_gc_delay);
367 }
368
369 static void afs_server_rcu(struct rcu_head *rcu)
370 {
371         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
372
373         afs_put_addrlist(rcu_access_pointer(server->addresses));
374         kfree(server);
375 }
376
377 /*
378  * destroy a dead server
379  */
380 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
381 {
382         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
383         struct afs_addr_cursor ac = {
384                 .alist  = alist,
385                 .start  = alist->index,
386                 .index  = 0,
387                 .addr   = &alist->addrs[alist->index],
388                 .error  = 0,
389         };
390         _enter("%p", server);
391
392         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
393                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
394
395         call_rcu(&server->rcu, afs_server_rcu);
396         afs_dec_servers_outstanding(net);
397 }
398
399 /*
400  * Garbage collect any expired servers.
401  */
402 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
403 {
404         struct afs_server *server;
405         bool deleted;
406         int usage;
407
408         while ((server = gc_list)) {
409                 gc_list = server->gc_next;
410
411                 write_seqlock(&net->fs_lock);
412                 usage = 1;
413                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
414                 if (deleted) {
415                         rb_erase(&server->uuid_rb, &net->fs_servers);
416                         hlist_del_rcu(&server->proc_link);
417                 }
418                 write_sequnlock(&net->fs_lock);
419
420                 if (deleted) {
421                         write_seqlock(&net->fs_addr_lock);
422                         if (!hlist_unhashed(&server->addr4_link))
423                                 hlist_del_rcu(&server->addr4_link);
424                         if (!hlist_unhashed(&server->addr6_link))
425                                 hlist_del_rcu(&server->addr6_link);
426                         write_sequnlock(&net->fs_addr_lock);
427                         afs_destroy_server(net, server);
428                 }
429         }
430 }
431
432 /*
433  * Manage the records of servers known to be within a network namespace.  This
434  * includes garbage collecting unused servers.
435  *
436  * Note also that we were given an increment on net->servers_outstanding by
437  * whoever queued us that we need to deal with before returning.
438  */
439 void afs_manage_servers(struct work_struct *work)
440 {
441         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
442         struct afs_server *gc_list = NULL;
443         struct rb_node *cursor;
444         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
445         bool purging = !net->live;
446
447         _enter("");
448
449         /* Trawl the server list looking for servers that have expired from
450          * lack of use.
451          */
452         read_seqlock_excl(&net->fs_lock);
453
454         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
455                 struct afs_server *server =
456                         rb_entry(cursor, struct afs_server, uuid_rb);
457                 int usage = atomic_read(&server->usage);
458
459                 _debug("manage %pU %u", &server->uuid, usage);
460
461                 ASSERTCMP(usage, >=, 1);
462                 ASSERTIFCMP(purging, usage, ==, 1);
463
464                 if (usage == 1) {
465                         time64_t expire_at = server->put_time;
466
467                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
468                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
469                                 expire_at += afs_server_gc_delay;
470                         if (purging || expire_at <= now) {
471                                 server->gc_next = gc_list;
472                                 gc_list = server;
473                         } else if (expire_at < next_manage) {
474                                 next_manage = expire_at;
475                         }
476                 }
477         }
478
479         read_sequnlock_excl(&net->fs_lock);
480
481         /* Update the timer on the way out.  We have to pass an increment on
482          * servers_outstanding in the namespace that we are in to the timer or
483          * the work scheduler.
484          */
485         if (!purging && next_manage < TIME64_MAX) {
486                 now = ktime_get_real_seconds();
487
488                 if (next_manage - now <= 0) {
489                         if (queue_work(afs_wq, &net->fs_manager))
490                                 afs_inc_servers_outstanding(net);
491                 } else {
492                         afs_set_server_timer(net, next_manage - now);
493                 }
494         }
495
496         afs_gc_servers(net, gc_list);
497
498         afs_dec_servers_outstanding(net);
499         _leave(" [%d]", atomic_read(&net->servers_outstanding));
500 }
501
502 static void afs_queue_server_manager(struct afs_net *net)
503 {
504         afs_inc_servers_outstanding(net);
505         if (!queue_work(afs_wq, &net->fs_manager))
506                 afs_dec_servers_outstanding(net);
507 }
508
509 /*
510  * Purge list of servers.
511  */
512 void afs_purge_servers(struct afs_net *net)
513 {
514         _enter("");
515
516         if (del_timer_sync(&net->fs_timer))
517                 atomic_dec(&net->servers_outstanding);
518
519         afs_queue_server_manager(net);
520
521         _debug("wait");
522         wait_var_event(&net->servers_outstanding,
523                        !atomic_read(&net->servers_outstanding));
524         _leave("");
525 }
526
527 /*
528  * Probe a fileserver to find its capabilities.
529  *
530  * TODO: Try service upgrade.
531  */
532 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
533 {
534         _enter("");
535
536         fc->ac.addr = NULL;
537         fc->ac.start = READ_ONCE(fc->ac.alist->index);
538         fc->ac.index = fc->ac.start;
539         fc->ac.error = 0;
540         fc->ac.begun = false;
541
542         while (afs_iterate_addresses(&fc->ac)) {
543                 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
544                                         &fc->ac, fc->key);
545                 switch (fc->ac.error) {
546                 case 0:
547                         afs_end_cursor(&fc->ac);
548                         set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
549                         return true;
550                 case -ECONNABORTED:
551                         fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
552                         goto error;
553                 case -ENOMEM:
554                 case -ENONET:
555                         goto error;
556                 case -ENETUNREACH:
557                 case -EHOSTUNREACH:
558                 case -ECONNREFUSED:
559                 case -ETIMEDOUT:
560                 case -ETIME:
561                         break;
562                 default:
563                         fc->ac.error = -EIO;
564                         goto error;
565                 }
566         }
567
568 error:
569         afs_end_cursor(&fc->ac);
570         return false;
571 }
572
573 /*
574  * If we haven't already, try probing the fileserver to get its capabilities.
575  * We try not to instigate parallel probes, but it's possible that the parallel
576  * probes will fail due to authentication failure when ours would succeed.
577  *
578  * TODO: Try sending an anonymous probe if an authenticated probe fails.
579  */
580 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
581 {
582         bool success;
583         int ret, retries = 0;
584
585         _enter("");
586
587 retry:
588         if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
589                 _leave(" = t");
590                 return true;
591         }
592
593         if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
594                 success = afs_do_probe_fileserver(fc);
595                 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
596                 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
597                 _leave(" = t");
598                 return success;
599         }
600
601         _debug("wait");
602         ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
603                           TASK_INTERRUPTIBLE);
604         if (ret == -ERESTARTSYS) {
605                 fc->ac.error = ret;
606                 _leave(" = f [%d]", ret);
607                 return false;
608         }
609
610         retries++;
611         if (retries == 4) {
612                 fc->ac.error = -ESTALE;
613                 _leave(" = f [stale]");
614                 return false;
615         }
616         _debug("retry");
617         goto retry;
618 }
619
620 /*
621  * Get an update for a server's address list.
622  */
623 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
624 {
625         struct afs_addr_list *alist, *discard;
626
627         _enter("");
628
629         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
630                                     &server->uuid);
631         if (IS_ERR(alist)) {
632                 fc->ac.error = PTR_ERR(alist);
633                 _leave(" = f [%d]", fc->ac.error);
634                 return false;
635         }
636
637         discard = alist;
638         if (server->addr_version != alist->version) {
639                 write_lock(&server->fs_lock);
640                 discard = rcu_dereference_protected(server->addresses,
641                                                     lockdep_is_held(&server->fs_lock));
642                 rcu_assign_pointer(server->addresses, alist);
643                 server->addr_version = alist->version;
644                 write_unlock(&server->fs_lock);
645         }
646
647         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
648         afs_put_addrlist(discard);
649         _leave(" = t");
650         return true;
651 }
652
653 /*
654  * See if a server's address list needs updating.
655  */
656 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
657 {
658         time64_t now = ktime_get_real_seconds();
659         long diff;
660         bool success;
661         int ret, retries = 0;
662
663         _enter("");
664
665         ASSERT(server);
666
667 retry:
668         diff = READ_ONCE(server->update_at) - now;
669         if (diff > 0) {
670                 _leave(" = t [not now %ld]", diff);
671                 return true;
672         }
673
674         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
675                 success = afs_update_server_record(fc, server);
676                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
677                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
678                 _leave(" = %d", success);
679                 return success;
680         }
681
682         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
683                           TASK_INTERRUPTIBLE);
684         if (ret == -ERESTARTSYS) {
685                 fc->ac.error = ret;
686                 _leave(" = f [intr]");
687                 return false;
688         }
689
690         retries++;
691         if (retries == 4) {
692                 _leave(" = f [stale]");
693                 ret = -ESTALE;
694                 return false;
695         }
696         goto retry;
697 }