cf5b3c4314bbe420bac67f1110582357a72c8f73
[linux-2.6-microblaze.git] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/module.h>
40 #include <linux/err.h>
41 #include <linux/idr.h>
42 #include <linux/interrupt.h>
43 #include <linux/random.h>
44 #include <linux/rbtree.h>
45 #include <linux/spinlock.h>
46 #include <linux/slab.h>
47 #include <linux/sysfs.h>
48 #include <linux/workqueue.h>
49 #include <linux/kdev_t.h>
50 #include <linux/etherdevice.h>
51
52 #include <rdma/ib_cache.h>
53 #include <rdma/ib_cm.h>
54 #include "cm_msgs.h"
55
56 MODULE_AUTHOR("Sean Hefty");
57 MODULE_DESCRIPTION("InfiniBand CM");
58 MODULE_LICENSE("Dual BSD/GPL");
59
60 static const char * const ibcm_rej_reason_strs[] = {
61         [IB_CM_REJ_NO_QP]                       = "no QP",
62         [IB_CM_REJ_NO_EEC]                      = "no EEC",
63         [IB_CM_REJ_NO_RESOURCES]                = "no resources",
64         [IB_CM_REJ_TIMEOUT]                     = "timeout",
65         [IB_CM_REJ_UNSUPPORTED]                 = "unsupported",
66         [IB_CM_REJ_INVALID_COMM_ID]             = "invalid comm ID",
67         [IB_CM_REJ_INVALID_COMM_INSTANCE]       = "invalid comm instance",
68         [IB_CM_REJ_INVALID_SERVICE_ID]          = "invalid service ID",
69         [IB_CM_REJ_INVALID_TRANSPORT_TYPE]      = "invalid transport type",
70         [IB_CM_REJ_STALE_CONN]                  = "stale conn",
71         [IB_CM_REJ_RDC_NOT_EXIST]               = "RDC not exist",
72         [IB_CM_REJ_INVALID_GID]                 = "invalid GID",
73         [IB_CM_REJ_INVALID_LID]                 = "invalid LID",
74         [IB_CM_REJ_INVALID_SL]                  = "invalid SL",
75         [IB_CM_REJ_INVALID_TRAFFIC_CLASS]       = "invalid traffic class",
76         [IB_CM_REJ_INVALID_HOP_LIMIT]           = "invalid hop limit",
77         [IB_CM_REJ_INVALID_PACKET_RATE]         = "invalid packet rate",
78         [IB_CM_REJ_INVALID_ALT_GID]             = "invalid alt GID",
79         [IB_CM_REJ_INVALID_ALT_LID]             = "invalid alt LID",
80         [IB_CM_REJ_INVALID_ALT_SL]              = "invalid alt SL",
81         [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]   = "invalid alt traffic class",
82         [IB_CM_REJ_INVALID_ALT_HOP_LIMIT]       = "invalid alt hop limit",
83         [IB_CM_REJ_INVALID_ALT_PACKET_RATE]     = "invalid alt packet rate",
84         [IB_CM_REJ_PORT_CM_REDIRECT]            = "port CM redirect",
85         [IB_CM_REJ_PORT_REDIRECT]               = "port redirect",
86         [IB_CM_REJ_INVALID_MTU]                 = "invalid MTU",
87         [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
88         [IB_CM_REJ_CONSUMER_DEFINED]            = "consumer defined",
89         [IB_CM_REJ_INVALID_RNR_RETRY]           = "invalid RNR retry",
90         [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]     = "duplicate local comm ID",
91         [IB_CM_REJ_INVALID_CLASS_VERSION]       = "invalid class version",
92         [IB_CM_REJ_INVALID_FLOW_LABEL]          = "invalid flow label",
93         [IB_CM_REJ_INVALID_ALT_FLOW_LABEL]      = "invalid alt flow label",
94 };
95
96 const char *__attribute_const__ ibcm_reject_msg(int reason)
97 {
98         size_t index = reason;
99
100         if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
101             ibcm_rej_reason_strs[index])
102                 return ibcm_rej_reason_strs[index];
103         else
104                 return "unrecognized reason";
105 }
106 EXPORT_SYMBOL(ibcm_reject_msg);
107
108 static void cm_add_one(struct ib_device *device);
109 static void cm_remove_one(struct ib_device *device, void *client_data);
110
111 static struct ib_client cm_client = {
112         .name   = "cm",
113         .add    = cm_add_one,
114         .remove = cm_remove_one
115 };
116
117 static struct ib_cm {
118         spinlock_t lock;
119         struct list_head device_list;
120         rwlock_t device_lock;
121         struct rb_root listen_service_table;
122         u64 listen_service_id;
123         /* struct rb_root peer_service_table; todo: fix peer to peer */
124         struct rb_root remote_qp_table;
125         struct rb_root remote_id_table;
126         struct rb_root remote_sidr_table;
127         struct idr local_id_table;
128         __be32 random_id_operand;
129         struct list_head timewait_list;
130         struct workqueue_struct *wq;
131         /* Sync on cm change port state */
132         spinlock_t state_lock;
133 } cm;
134
135 /* Counter indexes ordered by attribute ID */
136 enum {
137         CM_REQ_COUNTER,
138         CM_MRA_COUNTER,
139         CM_REJ_COUNTER,
140         CM_REP_COUNTER,
141         CM_RTU_COUNTER,
142         CM_DREQ_COUNTER,
143         CM_DREP_COUNTER,
144         CM_SIDR_REQ_COUNTER,
145         CM_SIDR_REP_COUNTER,
146         CM_LAP_COUNTER,
147         CM_APR_COUNTER,
148         CM_ATTR_COUNT,
149         CM_ATTR_ID_OFFSET = 0x0010,
150 };
151
152 enum {
153         CM_XMIT,
154         CM_XMIT_RETRIES,
155         CM_RECV,
156         CM_RECV_DUPLICATES,
157         CM_COUNTER_GROUPS
158 };
159
160 static char const counter_group_names[CM_COUNTER_GROUPS]
161                                      [sizeof("cm_rx_duplicates")] = {
162         "cm_tx_msgs", "cm_tx_retries",
163         "cm_rx_msgs", "cm_rx_duplicates"
164 };
165
166 struct cm_counter_group {
167         struct kobject obj;
168         atomic_long_t counter[CM_ATTR_COUNT];
169 };
170
171 struct cm_counter_attribute {
172         struct attribute attr;
173         int index;
174 };
175
176 #define CM_COUNTER_ATTR(_name, _index) \
177 struct cm_counter_attribute cm_##_name##_counter_attr = { \
178         .attr = { .name = __stringify(_name), .mode = 0444 }, \
179         .index = _index \
180 }
181
182 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
183 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
184 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
185 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
186 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
187 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
188 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
189 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
190 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
191 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
192 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
193
194 static struct attribute *cm_counter_default_attrs[] = {
195         &cm_req_counter_attr.attr,
196         &cm_mra_counter_attr.attr,
197         &cm_rej_counter_attr.attr,
198         &cm_rep_counter_attr.attr,
199         &cm_rtu_counter_attr.attr,
200         &cm_dreq_counter_attr.attr,
201         &cm_drep_counter_attr.attr,
202         &cm_sidr_req_counter_attr.attr,
203         &cm_sidr_rep_counter_attr.attr,
204         &cm_lap_counter_attr.attr,
205         &cm_apr_counter_attr.attr,
206         NULL
207 };
208
209 struct cm_port {
210         struct cm_device *cm_dev;
211         struct ib_mad_agent *mad_agent;
212         struct kobject port_obj;
213         u8 port_num;
214         struct list_head cm_priv_prim_list;
215         struct list_head cm_priv_altr_list;
216         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
217 };
218
219 struct cm_device {
220         struct list_head list;
221         struct ib_device *ib_device;
222         struct device *device;
223         u8 ack_delay;
224         int going_down;
225         struct cm_port *port[0];
226 };
227
228 struct cm_av {
229         struct cm_port *port;
230         union ib_gid dgid;
231         struct rdma_ah_attr ah_attr;
232         u16 pkey_index;
233         u8 timeout;
234 };
235
236 struct cm_work {
237         struct delayed_work work;
238         struct list_head list;
239         struct cm_port *port;
240         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
241         __be32 local_id;                        /* Established / timewait */
242         __be32 remote_id;
243         struct ib_cm_event cm_event;
244         struct sa_path_rec path[0];
245 };
246
247 struct cm_timewait_info {
248         struct cm_work work;                    /* Must be first. */
249         struct list_head list;
250         struct rb_node remote_qp_node;
251         struct rb_node remote_id_node;
252         __be64 remote_ca_guid;
253         __be32 remote_qpn;
254         u8 inserted_remote_qp;
255         u8 inserted_remote_id;
256 };
257
258 struct cm_id_private {
259         struct ib_cm_id id;
260
261         struct rb_node service_node;
262         struct rb_node sidr_id_node;
263         spinlock_t lock;        /* Do not acquire inside cm.lock */
264         struct completion comp;
265         atomic_t refcount;
266         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
267          * Protected by the cm.lock spinlock. */
268         int listen_sharecount;
269
270         struct ib_mad_send_buf *msg;
271         struct cm_timewait_info *timewait_info;
272         /* todo: use alternate port on send failure */
273         struct cm_av av;
274         struct cm_av alt_av;
275
276         void *private_data;
277         __be64 tid;
278         __be32 local_qpn;
279         __be32 remote_qpn;
280         enum ib_qp_type qp_type;
281         __be32 sq_psn;
282         __be32 rq_psn;
283         int timeout_ms;
284         enum ib_mtu path_mtu;
285         __be16 pkey;
286         u8 private_data_len;
287         u8 max_cm_retries;
288         u8 peer_to_peer;
289         u8 responder_resources;
290         u8 initiator_depth;
291         u8 retry_count;
292         u8 rnr_retry_count;
293         u8 service_timeout;
294         u8 target_ack_delay;
295
296         struct list_head prim_list;
297         struct list_head altr_list;
298         /* Indicates that the send port mad is registered and av is set */
299         int prim_send_port_not_ready;
300         int altr_send_port_not_ready;
301
302         struct list_head work_list;
303         atomic_t work_count;
304 };
305
306 static void cm_work_handler(struct work_struct *work);
307
308 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
309 {
310         if (atomic_dec_and_test(&cm_id_priv->refcount))
311                 complete(&cm_id_priv->comp);
312 }
313
314 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
315                         struct ib_mad_send_buf **msg)
316 {
317         struct ib_mad_agent *mad_agent;
318         struct ib_mad_send_buf *m;
319         struct ib_ah *ah;
320         struct cm_av *av;
321         unsigned long flags, flags2;
322         int ret = 0;
323
324         /* don't let the port to be released till the agent is down */
325         spin_lock_irqsave(&cm.state_lock, flags2);
326         spin_lock_irqsave(&cm.lock, flags);
327         if (!cm_id_priv->prim_send_port_not_ready)
328                 av = &cm_id_priv->av;
329         else if (!cm_id_priv->altr_send_port_not_ready &&
330                  (cm_id_priv->alt_av.port))
331                 av = &cm_id_priv->alt_av;
332         else {
333                 pr_info("%s: not valid CM id\n", __func__);
334                 ret = -ENODEV;
335                 spin_unlock_irqrestore(&cm.lock, flags);
336                 goto out;
337         }
338         spin_unlock_irqrestore(&cm.lock, flags);
339         /* Make sure the port haven't released the mad yet */
340         mad_agent = cm_id_priv->av.port->mad_agent;
341         if (!mad_agent) {
342                 pr_info("%s: not a valid MAD agent\n", __func__);
343                 ret = -ENODEV;
344                 goto out;
345         }
346         ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
347         if (IS_ERR(ah)) {
348                 ret = PTR_ERR(ah);
349                 goto out;
350         }
351
352         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
353                                av->pkey_index,
354                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
355                                GFP_ATOMIC,
356                                IB_MGMT_BASE_VERSION);
357         if (IS_ERR(m)) {
358                 rdma_destroy_ah(ah);
359                 ret = PTR_ERR(m);
360                 goto out;
361         }
362
363         /* Timeout set by caller if response is expected. */
364         m->ah = ah;
365         m->retries = cm_id_priv->max_cm_retries;
366
367         atomic_inc(&cm_id_priv->refcount);
368         m->context[0] = cm_id_priv;
369         *msg = m;
370
371 out:
372         spin_unlock_irqrestore(&cm.state_lock, flags2);
373         return ret;
374 }
375
376 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
377                                                            struct ib_mad_recv_wc *mad_recv_wc)
378 {
379         return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
380                                   0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
381                                   GFP_ATOMIC,
382                                   IB_MGMT_BASE_VERSION);
383 }
384
385 static int cm_create_response_msg_ah(struct cm_port *port,
386                                      struct ib_mad_recv_wc *mad_recv_wc,
387                                      struct ib_mad_send_buf *msg)
388 {
389         struct ib_ah *ah;
390
391         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
392                                   mad_recv_wc->recv_buf.grh, port->port_num);
393         if (IS_ERR(ah))
394                 return PTR_ERR(ah);
395
396         msg->ah = ah;
397         return 0;
398 }
399
400 static void cm_free_msg(struct ib_mad_send_buf *msg)
401 {
402         if (msg->ah)
403                 rdma_destroy_ah(msg->ah);
404         if (msg->context[0])
405                 cm_deref_id(msg->context[0]);
406         ib_free_send_mad(msg);
407 }
408
409 static int cm_alloc_response_msg(struct cm_port *port,
410                                  struct ib_mad_recv_wc *mad_recv_wc,
411                                  struct ib_mad_send_buf **msg)
412 {
413         struct ib_mad_send_buf *m;
414         int ret;
415
416         m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
417         if (IS_ERR(m))
418                 return PTR_ERR(m);
419
420         ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
421         if (ret) {
422                 cm_free_msg(m);
423                 return ret;
424         }
425
426         *msg = m;
427         return 0;
428 }
429
430 static void * cm_copy_private_data(const void *private_data,
431                                    u8 private_data_len)
432 {
433         void *data;
434
435         if (!private_data || !private_data_len)
436                 return NULL;
437
438         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
439         if (!data)
440                 return ERR_PTR(-ENOMEM);
441
442         return data;
443 }
444
445 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
446                                  void *private_data, u8 private_data_len)
447 {
448         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
449                 kfree(cm_id_priv->private_data);
450
451         cm_id_priv->private_data = private_data;
452         cm_id_priv->private_data_len = private_data_len;
453 }
454
455 static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
456                               struct ib_grh *grh, struct cm_av *av)
457 {
458         struct rdma_ah_attr new_ah_attr;
459         int ret;
460
461         av->port = port;
462         av->pkey_index = wc->pkey_index;
463
464         /*
465          * av->ah_attr might be initialized based on past wc during incoming
466          * connect request or while sending out connect request. So initialize
467          * a new ah_attr on stack. If initialization fails, old ah_attr is
468          * used for sending any responses. If initialization is successful,
469          * than new ah_attr is used by overwriting old one.
470          */
471         ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
472                                       port->port_num, wc,
473                                       grh, &new_ah_attr);
474         if (ret)
475                 return ret;
476
477         rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
478         return 0;
479 }
480
481 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
482                                    struct ib_grh *grh, struct cm_av *av)
483 {
484         av->port = port;
485         av->pkey_index = wc->pkey_index;
486         return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
487                                        port->port_num, wc,
488                                        grh, &av->ah_attr);
489 }
490
491 static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
492                                   struct cm_av *av,
493                                   struct cm_port *port)
494 {
495         unsigned long flags;
496         int ret = 0;
497
498         spin_lock_irqsave(&cm.lock, flags);
499
500         if (&cm_id_priv->av == av)
501                 list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
502         else if (&cm_id_priv->alt_av == av)
503                 list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
504         else
505                 ret = -EINVAL;
506
507         spin_unlock_irqrestore(&cm.lock, flags);
508         return ret;
509 }
510
511 static struct cm_port *
512 get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
513 {
514         struct cm_device *cm_dev;
515         struct cm_port *port = NULL;
516         unsigned long flags;
517
518         if (attr) {
519                 read_lock_irqsave(&cm.device_lock, flags);
520                 list_for_each_entry(cm_dev, &cm.device_list, list) {
521                         if (cm_dev->ib_device == attr->device) {
522                                 port = cm_dev->port[attr->port_num - 1];
523                                 break;
524                         }
525                 }
526                 read_unlock_irqrestore(&cm.device_lock, flags);
527         } else {
528                 /* SGID attribute can be NULL in following
529                  * conditions.
530                  * (a) Alternative path
531                  * (b) IB link layer without GRH
532                  * (c) LAP send messages
533                  */
534                 read_lock_irqsave(&cm.device_lock, flags);
535                 list_for_each_entry(cm_dev, &cm.device_list, list) {
536                         attr = rdma_find_gid(cm_dev->ib_device,
537                                              &path->sgid,
538                                              sa_conv_pathrec_to_gid_type(path),
539                                              NULL);
540                         if (!IS_ERR(attr)) {
541                                 port = cm_dev->port[attr->port_num - 1];
542                                 break;
543                         }
544                 }
545                 read_unlock_irqrestore(&cm.device_lock, flags);
546                 if (port)
547                         rdma_put_gid_attr(attr);
548         }
549         return port;
550 }
551
552 static int cm_init_av_by_path(struct sa_path_rec *path,
553                               const struct ib_gid_attr *sgid_attr,
554                               struct cm_av *av,
555                               struct cm_id_private *cm_id_priv)
556 {
557         struct rdma_ah_attr new_ah_attr;
558         struct cm_device *cm_dev;
559         struct cm_port *port;
560         int ret;
561
562         port = get_cm_port_from_path(path, sgid_attr);
563         if (!port)
564                 return -EINVAL;
565         cm_dev = port->cm_dev;
566
567         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
568                                   be16_to_cpu(path->pkey), &av->pkey_index);
569         if (ret)
570                 return ret;
571
572         av->port = port;
573
574         /*
575          * av->ah_attr might be initialized based on wc or during
576          * request processing time which might have reference to sgid_attr.
577          * So initialize a new ah_attr on stack.
578          * If initialization fails, old ah_attr is used for sending any
579          * responses. If initialization is successful, than new ah_attr
580          * is used by overwriting the old one. So that right ah_attr
581          * can be used to return an error response.
582          */
583         ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
584                                         &new_ah_attr, sgid_attr);
585         if (ret)
586                 return ret;
587
588         av->timeout = path->packet_life_time + 1;
589
590         ret = add_cm_id_to_port_list(cm_id_priv, av, port);
591         if (ret) {
592                 rdma_destroy_ah_attr(&new_ah_attr);
593                 return ret;
594         }
595         rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
596         return 0;
597 }
598
599 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
600 {
601         unsigned long flags;
602         int id;
603
604         idr_preload(GFP_KERNEL);
605         spin_lock_irqsave(&cm.lock, flags);
606
607         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
608
609         spin_unlock_irqrestore(&cm.lock, flags);
610         idr_preload_end();
611
612         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
613         return id < 0 ? id : 0;
614 }
615
616 static void cm_free_id(__be32 local_id)
617 {
618         spin_lock_irq(&cm.lock);
619         idr_remove(&cm.local_id_table,
620                    (__force int) (local_id ^ cm.random_id_operand));
621         spin_unlock_irq(&cm.lock);
622 }
623
624 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
625 {
626         struct cm_id_private *cm_id_priv;
627
628         cm_id_priv = idr_find(&cm.local_id_table,
629                               (__force int) (local_id ^ cm.random_id_operand));
630         if (cm_id_priv) {
631                 if (cm_id_priv->id.remote_id == remote_id)
632                         atomic_inc(&cm_id_priv->refcount);
633                 else
634                         cm_id_priv = NULL;
635         }
636
637         return cm_id_priv;
638 }
639
640 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
641 {
642         struct cm_id_private *cm_id_priv;
643
644         spin_lock_irq(&cm.lock);
645         cm_id_priv = cm_get_id(local_id, remote_id);
646         spin_unlock_irq(&cm.lock);
647
648         return cm_id_priv;
649 }
650
651 /*
652  * Trivial helpers to strip endian annotation and compare; the
653  * endianness doesn't actually matter since we just need a stable
654  * order for the RB tree.
655  */
656 static int be32_lt(__be32 a, __be32 b)
657 {
658         return (__force u32) a < (__force u32) b;
659 }
660
661 static int be32_gt(__be32 a, __be32 b)
662 {
663         return (__force u32) a > (__force u32) b;
664 }
665
666 static int be64_lt(__be64 a, __be64 b)
667 {
668         return (__force u64) a < (__force u64) b;
669 }
670
671 static int be64_gt(__be64 a, __be64 b)
672 {
673         return (__force u64) a > (__force u64) b;
674 }
675
676 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
677 {
678         struct rb_node **link = &cm.listen_service_table.rb_node;
679         struct rb_node *parent = NULL;
680         struct cm_id_private *cur_cm_id_priv;
681         __be64 service_id = cm_id_priv->id.service_id;
682         __be64 service_mask = cm_id_priv->id.service_mask;
683
684         while (*link) {
685                 parent = *link;
686                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
687                                           service_node);
688                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
689                     (service_mask & cur_cm_id_priv->id.service_id) &&
690                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
691                         return cur_cm_id_priv;
692
693                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
694                         link = &(*link)->rb_left;
695                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
696                         link = &(*link)->rb_right;
697                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
698                         link = &(*link)->rb_left;
699                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
700                         link = &(*link)->rb_right;
701                 else
702                         link = &(*link)->rb_right;
703         }
704         rb_link_node(&cm_id_priv->service_node, parent, link);
705         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
706         return NULL;
707 }
708
709 static struct cm_id_private * cm_find_listen(struct ib_device *device,
710                                              __be64 service_id)
711 {
712         struct rb_node *node = cm.listen_service_table.rb_node;
713         struct cm_id_private *cm_id_priv;
714
715         while (node) {
716                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
717                 if ((cm_id_priv->id.service_mask & service_id) ==
718                      cm_id_priv->id.service_id &&
719                     (cm_id_priv->id.device == device))
720                         return cm_id_priv;
721
722                 if (device < cm_id_priv->id.device)
723                         node = node->rb_left;
724                 else if (device > cm_id_priv->id.device)
725                         node = node->rb_right;
726                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
727                         node = node->rb_left;
728                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
729                         node = node->rb_right;
730                 else
731                         node = node->rb_right;
732         }
733         return NULL;
734 }
735
736 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
737                                                      *timewait_info)
738 {
739         struct rb_node **link = &cm.remote_id_table.rb_node;
740         struct rb_node *parent = NULL;
741         struct cm_timewait_info *cur_timewait_info;
742         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
743         __be32 remote_id = timewait_info->work.remote_id;
744
745         while (*link) {
746                 parent = *link;
747                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
748                                              remote_id_node);
749                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
750                         link = &(*link)->rb_left;
751                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
752                         link = &(*link)->rb_right;
753                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
754                         link = &(*link)->rb_left;
755                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
756                         link = &(*link)->rb_right;
757                 else
758                         return cur_timewait_info;
759         }
760         timewait_info->inserted_remote_id = 1;
761         rb_link_node(&timewait_info->remote_id_node, parent, link);
762         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
763         return NULL;
764 }
765
766 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
767                                                    __be32 remote_id)
768 {
769         struct rb_node *node = cm.remote_id_table.rb_node;
770         struct cm_timewait_info *timewait_info;
771
772         while (node) {
773                 timewait_info = rb_entry(node, struct cm_timewait_info,
774                                          remote_id_node);
775                 if (be32_lt(remote_id, timewait_info->work.remote_id))
776                         node = node->rb_left;
777                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
778                         node = node->rb_right;
779                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
780                         node = node->rb_left;
781                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
782                         node = node->rb_right;
783                 else
784                         return timewait_info;
785         }
786         return NULL;
787 }
788
789 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
790                                                       *timewait_info)
791 {
792         struct rb_node **link = &cm.remote_qp_table.rb_node;
793         struct rb_node *parent = NULL;
794         struct cm_timewait_info *cur_timewait_info;
795         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
796         __be32 remote_qpn = timewait_info->remote_qpn;
797
798         while (*link) {
799                 parent = *link;
800                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
801                                              remote_qp_node);
802                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
803                         link = &(*link)->rb_left;
804                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
805                         link = &(*link)->rb_right;
806                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
807                         link = &(*link)->rb_left;
808                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
809                         link = &(*link)->rb_right;
810                 else
811                         return cur_timewait_info;
812         }
813         timewait_info->inserted_remote_qp = 1;
814         rb_link_node(&timewait_info->remote_qp_node, parent, link);
815         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
816         return NULL;
817 }
818
819 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
820                                                     *cm_id_priv)
821 {
822         struct rb_node **link = &cm.remote_sidr_table.rb_node;
823         struct rb_node *parent = NULL;
824         struct cm_id_private *cur_cm_id_priv;
825         union ib_gid *port_gid = &cm_id_priv->av.dgid;
826         __be32 remote_id = cm_id_priv->id.remote_id;
827
828         while (*link) {
829                 parent = *link;
830                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
831                                           sidr_id_node);
832                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
833                         link = &(*link)->rb_left;
834                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
835                         link = &(*link)->rb_right;
836                 else {
837                         int cmp;
838                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
839                                      sizeof *port_gid);
840                         if (cmp < 0)
841                                 link = &(*link)->rb_left;
842                         else if (cmp > 0)
843                                 link = &(*link)->rb_right;
844                         else
845                                 return cur_cm_id_priv;
846                 }
847         }
848         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
849         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
850         return NULL;
851 }
852
853 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
854                                enum ib_cm_sidr_status status)
855 {
856         struct ib_cm_sidr_rep_param param;
857
858         memset(&param, 0, sizeof param);
859         param.status = status;
860         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
861 }
862
863 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
864                                  ib_cm_handler cm_handler,
865                                  void *context)
866 {
867         struct cm_id_private *cm_id_priv;
868         int ret;
869
870         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
871         if (!cm_id_priv)
872                 return ERR_PTR(-ENOMEM);
873
874         cm_id_priv->id.state = IB_CM_IDLE;
875         cm_id_priv->id.device = device;
876         cm_id_priv->id.cm_handler = cm_handler;
877         cm_id_priv->id.context = context;
878         cm_id_priv->id.remote_cm_qpn = 1;
879         ret = cm_alloc_id(cm_id_priv);
880         if (ret)
881                 goto error;
882
883         spin_lock_init(&cm_id_priv->lock);
884         init_completion(&cm_id_priv->comp);
885         INIT_LIST_HEAD(&cm_id_priv->work_list);
886         INIT_LIST_HEAD(&cm_id_priv->prim_list);
887         INIT_LIST_HEAD(&cm_id_priv->altr_list);
888         atomic_set(&cm_id_priv->work_count, -1);
889         atomic_set(&cm_id_priv->refcount, 1);
890         return &cm_id_priv->id;
891
892 error:
893         kfree(cm_id_priv);
894         return ERR_PTR(-ENOMEM);
895 }
896 EXPORT_SYMBOL(ib_create_cm_id);
897
898 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
899 {
900         struct cm_work *work;
901
902         if (list_empty(&cm_id_priv->work_list))
903                 return NULL;
904
905         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
906         list_del(&work->list);
907         return work;
908 }
909
910 static void cm_free_work(struct cm_work *work)
911 {
912         if (work->mad_recv_wc)
913                 ib_free_recv_mad(work->mad_recv_wc);
914         kfree(work);
915 }
916
917 static inline int cm_convert_to_ms(int iba_time)
918 {
919         /* approximate conversion to ms from 4.096us x 2^iba_time */
920         return 1 << max(iba_time - 8, 0);
921 }
922
923 /*
924  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
925  * Because of how ack_timeout is stored, adding one doubles the timeout.
926  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
927  * increment it (round up) only if the other is within 50%.
928  */
929 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
930 {
931         int ack_timeout = packet_life_time + 1;
932
933         if (ack_timeout >= ca_ack_delay)
934                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
935         else
936                 ack_timeout = ca_ack_delay +
937                               (ack_timeout >= (ca_ack_delay - 1));
938
939         return min(31, ack_timeout);
940 }
941
942 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
943 {
944         if (timewait_info->inserted_remote_id) {
945                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
946                 timewait_info->inserted_remote_id = 0;
947         }
948
949         if (timewait_info->inserted_remote_qp) {
950                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
951                 timewait_info->inserted_remote_qp = 0;
952         }
953 }
954
955 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
956 {
957         struct cm_timewait_info *timewait_info;
958
959         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
960         if (!timewait_info)
961                 return ERR_PTR(-ENOMEM);
962
963         timewait_info->work.local_id = local_id;
964         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
965         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
966         return timewait_info;
967 }
968
969 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
970 {
971         int wait_time;
972         unsigned long flags;
973         struct cm_device *cm_dev;
974
975         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
976         if (!cm_dev)
977                 return;
978
979         spin_lock_irqsave(&cm.lock, flags);
980         cm_cleanup_timewait(cm_id_priv->timewait_info);
981         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
982         spin_unlock_irqrestore(&cm.lock, flags);
983
984         /*
985          * The cm_id could be destroyed by the user before we exit timewait.
986          * To protect against this, we search for the cm_id after exiting
987          * timewait before notifying the user that we've exited timewait.
988          */
989         cm_id_priv->id.state = IB_CM_TIMEWAIT;
990         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
991
992         /* Check if the device started its remove_one */
993         spin_lock_irqsave(&cm.lock, flags);
994         if (!cm_dev->going_down)
995                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
996                                    msecs_to_jiffies(wait_time));
997         spin_unlock_irqrestore(&cm.lock, flags);
998
999         cm_id_priv->timewait_info = NULL;
1000 }
1001
1002 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
1003 {
1004         unsigned long flags;
1005
1006         cm_id_priv->id.state = IB_CM_IDLE;
1007         if (cm_id_priv->timewait_info) {
1008                 spin_lock_irqsave(&cm.lock, flags);
1009                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1010                 spin_unlock_irqrestore(&cm.lock, flags);
1011                 kfree(cm_id_priv->timewait_info);
1012                 cm_id_priv->timewait_info = NULL;
1013         }
1014 }
1015
1016 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
1017 {
1018         struct cm_id_private *cm_id_priv;
1019         struct cm_work *work;
1020
1021         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1022 retest:
1023         spin_lock_irq(&cm_id_priv->lock);
1024         switch (cm_id->state) {
1025         case IB_CM_LISTEN:
1026                 spin_unlock_irq(&cm_id_priv->lock);
1027
1028                 spin_lock_irq(&cm.lock);
1029                 if (--cm_id_priv->listen_sharecount > 0) {
1030                         /* The id is still shared. */
1031                         cm_deref_id(cm_id_priv);
1032                         spin_unlock_irq(&cm.lock);
1033                         return;
1034                 }
1035                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
1036                 spin_unlock_irq(&cm.lock);
1037                 break;
1038         case IB_CM_SIDR_REQ_SENT:
1039                 cm_id->state = IB_CM_IDLE;
1040                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1041                 spin_unlock_irq(&cm_id_priv->lock);
1042                 break;
1043         case IB_CM_SIDR_REQ_RCVD:
1044                 spin_unlock_irq(&cm_id_priv->lock);
1045                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
1046                 spin_lock_irq(&cm.lock);
1047                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
1048                         rb_erase(&cm_id_priv->sidr_id_node,
1049                                  &cm.remote_sidr_table);
1050                 spin_unlock_irq(&cm.lock);
1051                 break;
1052         case IB_CM_REQ_SENT:
1053         case IB_CM_MRA_REQ_RCVD:
1054                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1055                 spin_unlock_irq(&cm_id_priv->lock);
1056                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
1057                                &cm_id_priv->id.device->node_guid,
1058                                sizeof cm_id_priv->id.device->node_guid,
1059                                NULL, 0);
1060                 break;
1061         case IB_CM_REQ_RCVD:
1062                 if (err == -ENOMEM) {
1063                         /* Do not reject to allow future retries. */
1064                         cm_reset_to_idle(cm_id_priv);
1065                         spin_unlock_irq(&cm_id_priv->lock);
1066                 } else {
1067                         spin_unlock_irq(&cm_id_priv->lock);
1068                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1069                                        NULL, 0, NULL, 0);
1070                 }
1071                 break;
1072         case IB_CM_REP_SENT:
1073         case IB_CM_MRA_REP_RCVD:
1074                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1075                 /* Fall through */
1076         case IB_CM_MRA_REQ_SENT:
1077         case IB_CM_REP_RCVD:
1078         case IB_CM_MRA_REP_SENT:
1079                 spin_unlock_irq(&cm_id_priv->lock);
1080                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
1081                                NULL, 0, NULL, 0);
1082                 break;
1083         case IB_CM_ESTABLISHED:
1084                 spin_unlock_irq(&cm_id_priv->lock);
1085                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
1086                         break;
1087                 ib_send_cm_dreq(cm_id, NULL, 0);
1088                 goto retest;
1089         case IB_CM_DREQ_SENT:
1090                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1091                 cm_enter_timewait(cm_id_priv);
1092                 spin_unlock_irq(&cm_id_priv->lock);
1093                 break;
1094         case IB_CM_DREQ_RCVD:
1095                 spin_unlock_irq(&cm_id_priv->lock);
1096                 ib_send_cm_drep(cm_id, NULL, 0);
1097                 break;
1098         default:
1099                 spin_unlock_irq(&cm_id_priv->lock);
1100                 break;
1101         }
1102
1103         spin_lock_irq(&cm.lock);
1104         if (!list_empty(&cm_id_priv->altr_list) &&
1105             (!cm_id_priv->altr_send_port_not_ready))
1106                 list_del(&cm_id_priv->altr_list);
1107         if (!list_empty(&cm_id_priv->prim_list) &&
1108             (!cm_id_priv->prim_send_port_not_ready))
1109                 list_del(&cm_id_priv->prim_list);
1110         spin_unlock_irq(&cm.lock);
1111
1112         cm_free_id(cm_id->local_id);
1113         cm_deref_id(cm_id_priv);
1114         wait_for_completion(&cm_id_priv->comp);
1115         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1116                 cm_free_work(work);
1117
1118         rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
1119         rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
1120         kfree(cm_id_priv->private_data);
1121         kfree(cm_id_priv);
1122 }
1123
1124 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
1125 {
1126         cm_destroy_id(cm_id, 0);
1127 }
1128 EXPORT_SYMBOL(ib_destroy_cm_id);
1129
1130 /**
1131  * __ib_cm_listen - Initiates listening on the specified service ID for
1132  *   connection and service ID resolution requests.
1133  * @cm_id: Connection identifier associated with the listen request.
1134  * @service_id: Service identifier matched against incoming connection
1135  *   and service ID resolution requests.  The service ID should be specified
1136  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1137  *   assign a service ID to the caller.
1138  * @service_mask: Mask applied to service ID used to listen across a
1139  *   range of service IDs.  If set to 0, the service ID is matched
1140  *   exactly.  This parameter is ignored if %service_id is set to
1141  *   IB_CM_ASSIGN_SERVICE_ID.
1142  */
1143 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1144                           __be64 service_mask)
1145 {
1146         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1147         int ret = 0;
1148
1149         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1150         service_id &= service_mask;
1151         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1152             (service_id != IB_CM_ASSIGN_SERVICE_ID))
1153                 return -EINVAL;
1154
1155         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1156         if (cm_id->state != IB_CM_IDLE)
1157                 return -EINVAL;
1158
1159         cm_id->state = IB_CM_LISTEN;
1160         ++cm_id_priv->listen_sharecount;
1161
1162         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1163                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1164                 cm_id->service_mask = ~cpu_to_be64(0);
1165         } else {
1166                 cm_id->service_id = service_id;
1167                 cm_id->service_mask = service_mask;
1168         }
1169         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1170
1171         if (cur_cm_id_priv) {
1172                 cm_id->state = IB_CM_IDLE;
1173                 --cm_id_priv->listen_sharecount;
1174                 ret = -EBUSY;
1175         }
1176         return ret;
1177 }
1178
1179 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1180 {
1181         unsigned long flags;
1182         int ret;
1183
1184         spin_lock_irqsave(&cm.lock, flags);
1185         ret = __ib_cm_listen(cm_id, service_id, service_mask);
1186         spin_unlock_irqrestore(&cm.lock, flags);
1187
1188         return ret;
1189 }
1190 EXPORT_SYMBOL(ib_cm_listen);
1191
1192 /**
1193  * Create a new listening ib_cm_id and listen on the given service ID.
1194  *
1195  * If there's an existing ID listening on that same device and service ID,
1196  * return it.
1197  *
1198  * @device: Device associated with the cm_id.  All related communication will
1199  * be associated with the specified device.
1200  * @cm_handler: Callback invoked to notify the user of CM events.
1201  * @service_id: Service identifier matched against incoming connection
1202  *   and service ID resolution requests.  The service ID should be specified
1203  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1204  *   assign a service ID to the caller.
1205  *
1206  * Callers should call ib_destroy_cm_id when done with the listener ID.
1207  */
1208 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1209                                      ib_cm_handler cm_handler,
1210                                      __be64 service_id)
1211 {
1212         struct cm_id_private *cm_id_priv;
1213         struct ib_cm_id *cm_id;
1214         unsigned long flags;
1215         int err = 0;
1216
1217         /* Create an ID in advance, since the creation may sleep */
1218         cm_id = ib_create_cm_id(device, cm_handler, NULL);
1219         if (IS_ERR(cm_id))
1220                 return cm_id;
1221
1222         spin_lock_irqsave(&cm.lock, flags);
1223
1224         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1225                 goto new_id;
1226
1227         /* Find an existing ID */
1228         cm_id_priv = cm_find_listen(device, service_id);
1229         if (cm_id_priv) {
1230                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1231                         /* Sharing an ib_cm_id with different handlers is not
1232                          * supported */
1233                         spin_unlock_irqrestore(&cm.lock, flags);
1234                         return ERR_PTR(-EINVAL);
1235                 }
1236                 atomic_inc(&cm_id_priv->refcount);
1237                 ++cm_id_priv->listen_sharecount;
1238                 spin_unlock_irqrestore(&cm.lock, flags);
1239
1240                 ib_destroy_cm_id(cm_id);
1241                 cm_id = &cm_id_priv->id;
1242                 return cm_id;
1243         }
1244
1245 new_id:
1246         /* Use newly created ID */
1247         err = __ib_cm_listen(cm_id, service_id, 0);
1248
1249         spin_unlock_irqrestore(&cm.lock, flags);
1250
1251         if (err) {
1252                 ib_destroy_cm_id(cm_id);
1253                 return ERR_PTR(err);
1254         }
1255         return cm_id;
1256 }
1257 EXPORT_SYMBOL(ib_cm_insert_listen);
1258
1259 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1260 {
1261         u64 hi_tid, low_tid;
1262
1263         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1264         low_tid  = (u64)cm_id_priv->id.local_id;
1265         return cpu_to_be64(hi_tid | low_tid);
1266 }
1267
1268 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1269                               __be16 attr_id, __be64 tid)
1270 {
1271         hdr->base_version  = IB_MGMT_BASE_VERSION;
1272         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1273         hdr->class_version = IB_CM_CLASS_VERSION;
1274         hdr->method        = IB_MGMT_METHOD_SEND;
1275         hdr->attr_id       = attr_id;
1276         hdr->tid           = tid;
1277 }
1278
1279 static void cm_format_req(struct cm_req_msg *req_msg,
1280                           struct cm_id_private *cm_id_priv,
1281                           struct ib_cm_req_param *param)
1282 {
1283         struct sa_path_rec *pri_path = param->primary_path;
1284         struct sa_path_rec *alt_path = param->alternate_path;
1285         bool pri_ext = false;
1286
1287         if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
1288                 pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
1289                                               pri_path->opa.slid);
1290
1291         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1292                           cm_form_tid(cm_id_priv));
1293
1294         req_msg->local_comm_id = cm_id_priv->id.local_id;
1295         req_msg->service_id = param->service_id;
1296         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1297         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1298         cm_req_set_init_depth(req_msg, param->initiator_depth);
1299         cm_req_set_remote_resp_timeout(req_msg,
1300                                        param->remote_cm_response_timeout);
1301         cm_req_set_qp_type(req_msg, param->qp_type);
1302         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1303         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1304         cm_req_set_local_resp_timeout(req_msg,
1305                                       param->local_cm_response_timeout);
1306         req_msg->pkey = param->primary_path->pkey;
1307         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1308         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1309
1310         if (param->qp_type != IB_QPT_XRC_INI) {
1311                 cm_req_set_resp_res(req_msg, param->responder_resources);
1312                 cm_req_set_retry_count(req_msg, param->retry_count);
1313                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1314                 cm_req_set_srq(req_msg, param->srq);
1315         }
1316
1317         req_msg->primary_local_gid = pri_path->sgid;
1318         req_msg->primary_remote_gid = pri_path->dgid;
1319         if (pri_ext) {
1320                 req_msg->primary_local_gid.global.interface_id
1321                         = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
1322                 req_msg->primary_remote_gid.global.interface_id
1323                         = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
1324         }
1325         if (pri_path->hop_limit <= 1) {
1326                 req_msg->primary_local_lid = pri_ext ? 0 :
1327                         htons(ntohl(sa_path_get_slid(pri_path)));
1328                 req_msg->primary_remote_lid = pri_ext ? 0 :
1329                         htons(ntohl(sa_path_get_dlid(pri_path)));
1330         } else {
1331                 /* Work-around until there's a way to obtain remote LID info */
1332                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1333                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1334         }
1335         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1336         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1337         req_msg->primary_traffic_class = pri_path->traffic_class;
1338         req_msg->primary_hop_limit = pri_path->hop_limit;
1339         cm_req_set_primary_sl(req_msg, pri_path->sl);
1340         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1341         cm_req_set_primary_local_ack_timeout(req_msg,
1342                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1343                                pri_path->packet_life_time));
1344
1345         if (alt_path) {
1346                 bool alt_ext = false;
1347
1348                 if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
1349                         alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
1350                                                       alt_path->opa.slid);
1351
1352                 req_msg->alt_local_gid = alt_path->sgid;
1353                 req_msg->alt_remote_gid = alt_path->dgid;
1354                 if (alt_ext) {
1355                         req_msg->alt_local_gid.global.interface_id
1356                                 = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
1357                         req_msg->alt_remote_gid.global.interface_id
1358                                 = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
1359                 }
1360                 if (alt_path->hop_limit <= 1) {
1361                         req_msg->alt_local_lid = alt_ext ? 0 :
1362                                 htons(ntohl(sa_path_get_slid(alt_path)));
1363                         req_msg->alt_remote_lid = alt_ext ? 0 :
1364                                 htons(ntohl(sa_path_get_dlid(alt_path)));
1365                 } else {
1366                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1367                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1368                 }
1369                 cm_req_set_alt_flow_label(req_msg,
1370                                           alt_path->flow_label);
1371                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1372                 req_msg->alt_traffic_class = alt_path->traffic_class;
1373                 req_msg->alt_hop_limit = alt_path->hop_limit;
1374                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1375                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1376                 cm_req_set_alt_local_ack_timeout(req_msg,
1377                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1378                                        alt_path->packet_life_time));
1379         }
1380
1381         if (param->private_data && param->private_data_len)
1382                 memcpy(req_msg->private_data, param->private_data,
1383                        param->private_data_len);
1384 }
1385
1386 static int cm_validate_req_param(struct ib_cm_req_param *param)
1387 {
1388         /* peer-to-peer not supported */
1389         if (param->peer_to_peer)
1390                 return -EINVAL;
1391
1392         if (!param->primary_path)
1393                 return -EINVAL;
1394
1395         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1396             param->qp_type != IB_QPT_XRC_INI)
1397                 return -EINVAL;
1398
1399         if (param->private_data &&
1400             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1401                 return -EINVAL;
1402
1403         if (param->alternate_path &&
1404             (param->alternate_path->pkey != param->primary_path->pkey ||
1405              param->alternate_path->mtu != param->primary_path->mtu))
1406                 return -EINVAL;
1407
1408         return 0;
1409 }
1410
1411 int ib_send_cm_req(struct ib_cm_id *cm_id,
1412                    struct ib_cm_req_param *param)
1413 {
1414         struct cm_id_private *cm_id_priv;
1415         struct cm_req_msg *req_msg;
1416         unsigned long flags;
1417         int ret;
1418
1419         ret = cm_validate_req_param(param);
1420         if (ret)
1421                 return ret;
1422
1423         /* Verify that we're not in timewait. */
1424         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1425         spin_lock_irqsave(&cm_id_priv->lock, flags);
1426         if (cm_id->state != IB_CM_IDLE) {
1427                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1428                 ret = -EINVAL;
1429                 goto out;
1430         }
1431         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1432
1433         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1434                                                             id.local_id);
1435         if (IS_ERR(cm_id_priv->timewait_info)) {
1436                 ret = PTR_ERR(cm_id_priv->timewait_info);
1437                 goto out;
1438         }
1439
1440         ret = cm_init_av_by_path(param->primary_path,
1441                                  param->ppath_sgid_attr, &cm_id_priv->av,
1442                                  cm_id_priv);
1443         if (ret)
1444                 goto error1;
1445         if (param->alternate_path) {
1446                 ret = cm_init_av_by_path(param->alternate_path, NULL,
1447                                          &cm_id_priv->alt_av, cm_id_priv);
1448                 if (ret)
1449                         goto error1;
1450         }
1451         cm_id->service_id = param->service_id;
1452         cm_id->service_mask = ~cpu_to_be64(0);
1453         cm_id_priv->timeout_ms = cm_convert_to_ms(
1454                                     param->primary_path->packet_life_time) * 2 +
1455                                  cm_convert_to_ms(
1456                                     param->remote_cm_response_timeout);
1457         cm_id_priv->max_cm_retries = param->max_cm_retries;
1458         cm_id_priv->initiator_depth = param->initiator_depth;
1459         cm_id_priv->responder_resources = param->responder_resources;
1460         cm_id_priv->retry_count = param->retry_count;
1461         cm_id_priv->path_mtu = param->primary_path->mtu;
1462         cm_id_priv->pkey = param->primary_path->pkey;
1463         cm_id_priv->qp_type = param->qp_type;
1464
1465         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1466         if (ret)
1467                 goto error1;
1468
1469         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1470         cm_format_req(req_msg, cm_id_priv, param);
1471         cm_id_priv->tid = req_msg->hdr.tid;
1472         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1473         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1474
1475         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1476         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1477
1478         spin_lock_irqsave(&cm_id_priv->lock, flags);
1479         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1480         if (ret) {
1481                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1482                 goto error2;
1483         }
1484         BUG_ON(cm_id->state != IB_CM_IDLE);
1485         cm_id->state = IB_CM_REQ_SENT;
1486         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1487         return 0;
1488
1489 error2: cm_free_msg(cm_id_priv->msg);
1490 error1: kfree(cm_id_priv->timewait_info);
1491 out:    return ret;
1492 }
1493 EXPORT_SYMBOL(ib_send_cm_req);
1494
1495 static int cm_issue_rej(struct cm_port *port,
1496                         struct ib_mad_recv_wc *mad_recv_wc,
1497                         enum ib_cm_rej_reason reason,
1498                         enum cm_msg_response msg_rejected,
1499                         void *ari, u8 ari_length)
1500 {
1501         struct ib_mad_send_buf *msg = NULL;
1502         struct cm_rej_msg *rej_msg, *rcv_msg;
1503         int ret;
1504
1505         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1506         if (ret)
1507                 return ret;
1508
1509         /* We just need common CM header information.  Cast to any message. */
1510         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1511         rej_msg = (struct cm_rej_msg *) msg->mad;
1512
1513         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1514         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1515         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1516         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1517         rej_msg->reason = cpu_to_be16(reason);
1518
1519         if (ari && ari_length) {
1520                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1521                 memcpy(rej_msg->ari, ari, ari_length);
1522         }
1523
1524         ret = ib_post_send_mad(msg, NULL);
1525         if (ret)
1526                 cm_free_msg(msg);
1527
1528         return ret;
1529 }
1530
1531 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1532                                     __be32 local_qpn, __be32 remote_qpn)
1533 {
1534         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1535                 ((local_ca_guid == remote_ca_guid) &&
1536                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1537 }
1538
1539 static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
1540 {
1541         return ((req_msg->alt_local_lid) ||
1542                 (ib_is_opa_gid(&req_msg->alt_local_gid)));
1543 }
1544
1545 static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
1546                                  struct sa_path_rec *path, union ib_gid *gid)
1547 {
1548         if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
1549                 path->rec_type = SA_PATH_REC_TYPE_OPA;
1550         else
1551                 path->rec_type = SA_PATH_REC_TYPE_IB;
1552 }
1553
1554 static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
1555                                         struct sa_path_rec *primary_path,
1556                                         struct sa_path_rec *alt_path)
1557 {
1558         u32 lid;
1559
1560         if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1561                 sa_path_set_dlid(primary_path,
1562                                  ntohs(req_msg->primary_local_lid));
1563                 sa_path_set_slid(primary_path,
1564                                  ntohs(req_msg->primary_remote_lid));
1565         } else {
1566                 lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
1567                 sa_path_set_dlid(primary_path, lid);
1568
1569                 lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
1570                 sa_path_set_slid(primary_path, lid);
1571         }
1572
1573         if (!cm_req_has_alt_path(req_msg))
1574                 return;
1575
1576         if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
1577                 sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
1578                 sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
1579         } else {
1580                 lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
1581                 sa_path_set_dlid(alt_path, lid);
1582
1583                 lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
1584                 sa_path_set_slid(alt_path, lid);
1585         }
1586 }
1587
1588 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1589                                      struct sa_path_rec *primary_path,
1590                                      struct sa_path_rec *alt_path)
1591 {
1592         primary_path->dgid = req_msg->primary_local_gid;
1593         primary_path->sgid = req_msg->primary_remote_gid;
1594         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1595         primary_path->hop_limit = req_msg->primary_hop_limit;
1596         primary_path->traffic_class = req_msg->primary_traffic_class;
1597         primary_path->reversible = 1;
1598         primary_path->pkey = req_msg->pkey;
1599         primary_path->sl = cm_req_get_primary_sl(req_msg);
1600         primary_path->mtu_selector = IB_SA_EQ;
1601         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1602         primary_path->rate_selector = IB_SA_EQ;
1603         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1604         primary_path->packet_life_time_selector = IB_SA_EQ;
1605         primary_path->packet_life_time =
1606                 cm_req_get_primary_local_ack_timeout(req_msg);
1607         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1608         primary_path->service_id = req_msg->service_id;
1609         if (sa_path_is_roce(primary_path))
1610                 primary_path->roce.route_resolved = false;
1611
1612         if (cm_req_has_alt_path(req_msg)) {
1613                 alt_path->dgid = req_msg->alt_local_gid;
1614                 alt_path->sgid = req_msg->alt_remote_gid;
1615                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1616                 alt_path->hop_limit = req_msg->alt_hop_limit;
1617                 alt_path->traffic_class = req_msg->alt_traffic_class;
1618                 alt_path->reversible = 1;
1619                 alt_path->pkey = req_msg->pkey;
1620                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1621                 alt_path->mtu_selector = IB_SA_EQ;
1622                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1623                 alt_path->rate_selector = IB_SA_EQ;
1624                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1625                 alt_path->packet_life_time_selector = IB_SA_EQ;
1626                 alt_path->packet_life_time =
1627                         cm_req_get_alt_local_ack_timeout(req_msg);
1628                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1629                 alt_path->service_id = req_msg->service_id;
1630
1631                 if (sa_path_is_roce(alt_path))
1632                         alt_path->roce.route_resolved = false;
1633         }
1634         cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
1635 }
1636
1637 static u16 cm_get_bth_pkey(struct cm_work *work)
1638 {
1639         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1640         u8 port_num = work->port->port_num;
1641         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1642         u16 pkey;
1643         int ret;
1644
1645         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1646         if (ret) {
1647                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1648                                      port_num, pkey_index, ret);
1649                 return 0;
1650         }
1651
1652         return pkey;
1653 }
1654
1655 /**
1656  * Convert OPA SGID to IB SGID
1657  * ULPs (such as IPoIB) do not understand OPA GIDs and will
1658  * reject them as the local_gid will not match the sgid. Therefore,
1659  * change the pathrec's SGID to an IB SGID.
1660  *
1661  * @work: Work completion
1662  * @path: Path record
1663  */
1664 static void cm_opa_to_ib_sgid(struct cm_work *work,
1665                               struct sa_path_rec *path)
1666 {
1667         struct ib_device *dev = work->port->cm_dev->ib_device;
1668         u8 port_num = work->port->port_num;
1669
1670         if (rdma_cap_opa_ah(dev, port_num) &&
1671             (ib_is_opa_gid(&path->sgid))) {
1672                 union ib_gid sgid;
1673
1674                 if (rdma_query_gid(dev, port_num, 0, &sgid)) {
1675                         dev_warn(&dev->dev,
1676                                  "Error updating sgid in CM request\n");
1677                         return;
1678                 }
1679
1680                 path->sgid = sgid;
1681         }
1682 }
1683
1684 static void cm_format_req_event(struct cm_work *work,
1685                                 struct cm_id_private *cm_id_priv,
1686                                 struct ib_cm_id *listen_id)
1687 {
1688         struct cm_req_msg *req_msg;
1689         struct ib_cm_req_event_param *param;
1690
1691         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1692         param = &work->cm_event.param.req_rcvd;
1693         param->listen_id = listen_id;
1694         param->bth_pkey = cm_get_bth_pkey(work);
1695         param->port = cm_id_priv->av.port->port_num;
1696         param->primary_path = &work->path[0];
1697         cm_opa_to_ib_sgid(work, param->primary_path);
1698         if (cm_req_has_alt_path(req_msg)) {
1699                 param->alternate_path = &work->path[1];
1700                 cm_opa_to_ib_sgid(work, param->alternate_path);
1701         } else {
1702                 param->alternate_path = NULL;
1703         }
1704         param->remote_ca_guid = req_msg->local_ca_guid;
1705         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1706         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1707         param->qp_type = cm_req_get_qp_type(req_msg);
1708         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1709         param->responder_resources = cm_req_get_init_depth(req_msg);
1710         param->initiator_depth = cm_req_get_resp_res(req_msg);
1711         param->local_cm_response_timeout =
1712                                         cm_req_get_remote_resp_timeout(req_msg);
1713         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1714         param->remote_cm_response_timeout =
1715                                         cm_req_get_local_resp_timeout(req_msg);
1716         param->retry_count = cm_req_get_retry_count(req_msg);
1717         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1718         param->srq = cm_req_get_srq(req_msg);
1719         param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
1720         work->cm_event.private_data = &req_msg->private_data;
1721 }
1722
1723 static void cm_process_work(struct cm_id_private *cm_id_priv,
1724                             struct cm_work *work)
1725 {
1726         int ret;
1727
1728         /* We will typically only have the current event to report. */
1729         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1730         cm_free_work(work);
1731
1732         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1733                 spin_lock_irq(&cm_id_priv->lock);
1734                 work = cm_dequeue_work(cm_id_priv);
1735                 spin_unlock_irq(&cm_id_priv->lock);
1736                 if (!work)
1737                         return;
1738
1739                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1740                                                 &work->cm_event);
1741                 cm_free_work(work);
1742         }
1743         cm_deref_id(cm_id_priv);
1744         if (ret)
1745                 cm_destroy_id(&cm_id_priv->id, ret);
1746 }
1747
1748 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1749                           struct cm_id_private *cm_id_priv,
1750                           enum cm_msg_response msg_mraed, u8 service_timeout,
1751                           const void *private_data, u8 private_data_len)
1752 {
1753         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1754         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1755         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1756         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1757         cm_mra_set_service_timeout(mra_msg, service_timeout);
1758
1759         if (private_data && private_data_len)
1760                 memcpy(mra_msg->private_data, private_data, private_data_len);
1761 }
1762
1763 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1764                           struct cm_id_private *cm_id_priv,
1765                           enum ib_cm_rej_reason reason,
1766                           void *ari,
1767                           u8 ari_length,
1768                           const void *private_data,
1769                           u8 private_data_len)
1770 {
1771         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1772         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1773
1774         switch(cm_id_priv->id.state) {
1775         case IB_CM_REQ_RCVD:
1776                 rej_msg->local_comm_id = 0;
1777                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1778                 break;
1779         case IB_CM_MRA_REQ_SENT:
1780                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1781                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1782                 break;
1783         case IB_CM_REP_RCVD:
1784         case IB_CM_MRA_REP_SENT:
1785                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1786                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1787                 break;
1788         default:
1789                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1790                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1791                 break;
1792         }
1793
1794         rej_msg->reason = cpu_to_be16(reason);
1795         if (ari && ari_length) {
1796                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1797                 memcpy(rej_msg->ari, ari, ari_length);
1798         }
1799
1800         if (private_data && private_data_len)
1801                 memcpy(rej_msg->private_data, private_data, private_data_len);
1802 }
1803
1804 static void cm_dup_req_handler(struct cm_work *work,
1805                                struct cm_id_private *cm_id_priv)
1806 {
1807         struct ib_mad_send_buf *msg = NULL;
1808         int ret;
1809
1810         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1811                         counter[CM_REQ_COUNTER]);
1812
1813         /* Quick state check to discard duplicate REQs. */
1814         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1815                 return;
1816
1817         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1818         if (ret)
1819                 return;
1820
1821         spin_lock_irq(&cm_id_priv->lock);
1822         switch (cm_id_priv->id.state) {
1823         case IB_CM_MRA_REQ_SENT:
1824                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1825                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1826                               cm_id_priv->private_data,
1827                               cm_id_priv->private_data_len);
1828                 break;
1829         case IB_CM_TIMEWAIT:
1830                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1831                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1832                 break;
1833         default:
1834                 goto unlock;
1835         }
1836         spin_unlock_irq(&cm_id_priv->lock);
1837
1838         ret = ib_post_send_mad(msg, NULL);
1839         if (ret)
1840                 goto free;
1841         return;
1842
1843 unlock: spin_unlock_irq(&cm_id_priv->lock);
1844 free:   cm_free_msg(msg);
1845 }
1846
1847 static struct cm_id_private * cm_match_req(struct cm_work *work,
1848                                            struct cm_id_private *cm_id_priv)
1849 {
1850         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1851         struct cm_timewait_info *timewait_info;
1852         struct cm_req_msg *req_msg;
1853         struct ib_cm_id *cm_id;
1854
1855         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1856
1857         /* Check for possible duplicate REQ. */
1858         spin_lock_irq(&cm.lock);
1859         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1860         if (timewait_info) {
1861                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1862                                            timewait_info->work.remote_id);
1863                 spin_unlock_irq(&cm.lock);
1864                 if (cur_cm_id_priv) {
1865                         cm_dup_req_handler(work, cur_cm_id_priv);
1866                         cm_deref_id(cur_cm_id_priv);
1867                 }
1868                 return NULL;
1869         }
1870
1871         /* Check for stale connections. */
1872         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1873         if (timewait_info) {
1874                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1875                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1876                                            timewait_info->work.remote_id);
1877
1878                 spin_unlock_irq(&cm.lock);
1879                 cm_issue_rej(work->port, work->mad_recv_wc,
1880                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1881                              NULL, 0);
1882                 if (cur_cm_id_priv) {
1883                         cm_id = &cur_cm_id_priv->id;
1884                         ib_send_cm_dreq(cm_id, NULL, 0);
1885                         cm_deref_id(cur_cm_id_priv);
1886                 }
1887                 return NULL;
1888         }
1889
1890         /* Find matching listen request. */
1891         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1892                                            req_msg->service_id);
1893         if (!listen_cm_id_priv) {
1894                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1895                 spin_unlock_irq(&cm.lock);
1896                 cm_issue_rej(work->port, work->mad_recv_wc,
1897                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1898                              NULL, 0);
1899                 goto out;
1900         }
1901         atomic_inc(&listen_cm_id_priv->refcount);
1902         atomic_inc(&cm_id_priv->refcount);
1903         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1904         atomic_inc(&cm_id_priv->work_count);
1905         spin_unlock_irq(&cm.lock);
1906 out:
1907         return listen_cm_id_priv;
1908 }
1909
1910 /*
1911  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1912  * we need to override the LID/SL data in the REQ with the LID information
1913  * in the work completion.
1914  */
1915 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1916 {
1917         if (!cm_req_get_primary_subnet_local(req_msg)) {
1918                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1919                         req_msg->primary_local_lid = ib_lid_be16(wc->slid);
1920                         cm_req_set_primary_sl(req_msg, wc->sl);
1921                 }
1922
1923                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1924                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1925         }
1926
1927         if (!cm_req_get_alt_subnet_local(req_msg)) {
1928                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1929                         req_msg->alt_local_lid = ib_lid_be16(wc->slid);
1930                         cm_req_set_alt_sl(req_msg, wc->sl);
1931                 }
1932
1933                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1934                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1935         }
1936 }
1937
1938 static int cm_req_handler(struct cm_work *work)
1939 {
1940         struct ib_cm_id *cm_id;
1941         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1942         struct cm_req_msg *req_msg;
1943         const struct ib_global_route *grh;
1944         const struct ib_gid_attr *gid_attr;
1945         int ret;
1946
1947         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1948
1949         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1950         if (IS_ERR(cm_id))
1951                 return PTR_ERR(cm_id);
1952
1953         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1954         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1955         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1956                                       work->mad_recv_wc->recv_buf.grh,
1957                                       &cm_id_priv->av);
1958         if (ret)
1959                 goto destroy;
1960         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1961                                                             id.local_id);
1962         if (IS_ERR(cm_id_priv->timewait_info)) {
1963                 ret = PTR_ERR(cm_id_priv->timewait_info);
1964                 goto destroy;
1965         }
1966         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1967         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1968         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1969
1970         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1971         if (!listen_cm_id_priv) {
1972                 pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
1973                          be32_to_cpu(cm_id->local_id));
1974                 ret = -EINVAL;
1975                 goto free_timeinfo;
1976         }
1977
1978         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1979         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1980         cm_id_priv->id.service_id = req_msg->service_id;
1981         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1982
1983         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1984
1985         memset(&work->path[0], 0, sizeof(work->path[0]));
1986         if (cm_req_has_alt_path(req_msg))
1987                 memset(&work->path[1], 0, sizeof(work->path[1]));
1988         grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
1989         gid_attr = grh->sgid_attr;
1990
1991         if (gid_attr && gid_attr->ndev) {
1992                 work->path[0].rec_type =
1993                         sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
1994         } else {
1995                 /* If no GID attribute or ndev is null, it is not RoCE. */
1996                 cm_path_set_rec_type(work->port->cm_dev->ib_device,
1997                                      work->port->port_num,
1998                                      &work->path[0],
1999                                      &req_msg->primary_local_gid);
2000         }
2001         if (cm_req_has_alt_path(req_msg))
2002                 work->path[1].rec_type = work->path[0].rec_type;
2003         cm_format_paths_from_req(req_msg, &work->path[0],
2004                                  &work->path[1]);
2005         if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
2006                 sa_path_set_dmac(&work->path[0],
2007                                  cm_id_priv->av.ah_attr.roce.dmac);
2008         work->path[0].hop_limit = grh->hop_limit;
2009         ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
2010                                  cm_id_priv);
2011         if (ret) {
2012                 int err;
2013
2014                 err = rdma_query_gid(work->port->cm_dev->ib_device,
2015                                      work->port->port_num, 0,
2016                                      &work->path[0].sgid);
2017                 if (err)
2018                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2019                                        NULL, 0, NULL, 0);
2020                 else
2021                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2022                                        &work->path[0].sgid,
2023                                        sizeof(work->path[0].sgid),
2024                                        NULL, 0);
2025                 goto rejected;
2026         }
2027         if (cm_req_has_alt_path(req_msg)) {
2028                 ret = cm_init_av_by_path(&work->path[1], NULL,
2029                                          &cm_id_priv->alt_av, cm_id_priv);
2030                 if (ret) {
2031                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
2032                                        &work->path[0].sgid,
2033                                        sizeof(work->path[0].sgid), NULL, 0);
2034                         goto rejected;
2035                 }
2036         }
2037         cm_id_priv->tid = req_msg->hdr.tid;
2038         cm_id_priv->timeout_ms = cm_convert_to_ms(
2039                                         cm_req_get_local_resp_timeout(req_msg));
2040         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
2041         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
2042         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
2043         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
2044         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
2045         cm_id_priv->pkey = req_msg->pkey;
2046         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
2047         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
2048         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
2049         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
2050
2051         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
2052         cm_process_work(cm_id_priv, work);
2053         cm_deref_id(listen_cm_id_priv);
2054         return 0;
2055
2056 rejected:
2057         atomic_dec(&cm_id_priv->refcount);
2058         cm_deref_id(listen_cm_id_priv);
2059 free_timeinfo:
2060         kfree(cm_id_priv->timewait_info);
2061 destroy:
2062         ib_destroy_cm_id(cm_id);
2063         return ret;
2064 }
2065
2066 static void cm_format_rep(struct cm_rep_msg *rep_msg,
2067                           struct cm_id_private *cm_id_priv,
2068                           struct ib_cm_rep_param *param)
2069 {
2070         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
2071         rep_msg->local_comm_id = cm_id_priv->id.local_id;
2072         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2073         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
2074         rep_msg->resp_resources = param->responder_resources;
2075         cm_rep_set_target_ack_delay(rep_msg,
2076                                     cm_id_priv->av.port->cm_dev->ack_delay);
2077         cm_rep_set_failover(rep_msg, param->failover_accepted);
2078         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
2079         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
2080
2081         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
2082                 rep_msg->initiator_depth = param->initiator_depth;
2083                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
2084                 cm_rep_set_srq(rep_msg, param->srq);
2085                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
2086         } else {
2087                 cm_rep_set_srq(rep_msg, 1);
2088                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
2089         }
2090
2091         if (param->private_data && param->private_data_len)
2092                 memcpy(rep_msg->private_data, param->private_data,
2093                        param->private_data_len);
2094 }
2095
2096 int ib_send_cm_rep(struct ib_cm_id *cm_id,
2097                    struct ib_cm_rep_param *param)
2098 {
2099         struct cm_id_private *cm_id_priv;
2100         struct ib_mad_send_buf *msg;
2101         struct cm_rep_msg *rep_msg;
2102         unsigned long flags;
2103         int ret;
2104
2105         if (param->private_data &&
2106             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
2107                 return -EINVAL;
2108
2109         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2110         spin_lock_irqsave(&cm_id_priv->lock, flags);
2111         if (cm_id->state != IB_CM_REQ_RCVD &&
2112             cm_id->state != IB_CM_MRA_REQ_SENT) {
2113                 pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
2114                          be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2115                 ret = -EINVAL;
2116                 goto out;
2117         }
2118
2119         ret = cm_alloc_msg(cm_id_priv, &msg);
2120         if (ret)
2121                 goto out;
2122
2123         rep_msg = (struct cm_rep_msg *) msg->mad;
2124         cm_format_rep(rep_msg, cm_id_priv, param);
2125         msg->timeout_ms = cm_id_priv->timeout_ms;
2126         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
2127
2128         ret = ib_post_send_mad(msg, NULL);
2129         if (ret) {
2130                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2131                 cm_free_msg(msg);
2132                 return ret;
2133         }
2134
2135         cm_id->state = IB_CM_REP_SENT;
2136         cm_id_priv->msg = msg;
2137         cm_id_priv->initiator_depth = param->initiator_depth;
2138         cm_id_priv->responder_resources = param->responder_resources;
2139         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
2140         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
2141
2142 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2143         return ret;
2144 }
2145 EXPORT_SYMBOL(ib_send_cm_rep);
2146
2147 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
2148                           struct cm_id_private *cm_id_priv,
2149                           const void *private_data,
2150                           u8 private_data_len)
2151 {
2152         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
2153         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
2154         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
2155
2156         if (private_data && private_data_len)
2157                 memcpy(rtu_msg->private_data, private_data, private_data_len);
2158 }
2159
2160 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
2161                    const void *private_data,
2162                    u8 private_data_len)
2163 {
2164         struct cm_id_private *cm_id_priv;
2165         struct ib_mad_send_buf *msg;
2166         unsigned long flags;
2167         void *data;
2168         int ret;
2169
2170         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
2171                 return -EINVAL;
2172
2173         data = cm_copy_private_data(private_data, private_data_len);
2174         if (IS_ERR(data))
2175                 return PTR_ERR(data);
2176
2177         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2178         spin_lock_irqsave(&cm_id_priv->lock, flags);
2179         if (cm_id->state != IB_CM_REP_RCVD &&
2180             cm_id->state != IB_CM_MRA_REP_SENT) {
2181                 pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
2182                          be32_to_cpu(cm_id->local_id), cm_id->state);
2183                 ret = -EINVAL;
2184                 goto error;
2185         }
2186
2187         ret = cm_alloc_msg(cm_id_priv, &msg);
2188         if (ret)
2189                 goto error;
2190
2191         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2192                       private_data, private_data_len);
2193
2194         ret = ib_post_send_mad(msg, NULL);
2195         if (ret) {
2196                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2197                 cm_free_msg(msg);
2198                 kfree(data);
2199                 return ret;
2200         }
2201
2202         cm_id->state = IB_CM_ESTABLISHED;
2203         cm_set_private_data(cm_id_priv, data, private_data_len);
2204         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2205         return 0;
2206
2207 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2208         kfree(data);
2209         return ret;
2210 }
2211 EXPORT_SYMBOL(ib_send_cm_rtu);
2212
2213 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
2214 {
2215         struct cm_rep_msg *rep_msg;
2216         struct ib_cm_rep_event_param *param;
2217
2218         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2219         param = &work->cm_event.param.rep_rcvd;
2220         param->remote_ca_guid = rep_msg->local_ca_guid;
2221         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
2222         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
2223         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
2224         param->responder_resources = rep_msg->initiator_depth;
2225         param->initiator_depth = rep_msg->resp_resources;
2226         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2227         param->failover_accepted = cm_rep_get_failover(rep_msg);
2228         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
2229         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2230         param->srq = cm_rep_get_srq(rep_msg);
2231         work->cm_event.private_data = &rep_msg->private_data;
2232 }
2233
2234 static void cm_dup_rep_handler(struct cm_work *work)
2235 {
2236         struct cm_id_private *cm_id_priv;
2237         struct cm_rep_msg *rep_msg;
2238         struct ib_mad_send_buf *msg = NULL;
2239         int ret;
2240
2241         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
2242         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
2243                                    rep_msg->local_comm_id);
2244         if (!cm_id_priv)
2245                 return;
2246
2247         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2248                         counter[CM_REP_COUNTER]);
2249         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
2250         if (ret)
2251                 goto deref;
2252
2253         spin_lock_irq(&cm_id_priv->lock);
2254         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
2255                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
2256                               cm_id_priv->private_data,
2257                               cm_id_priv->private_data_len);
2258         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
2259                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2260                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
2261                               cm_id_priv->private_data,
2262                               cm_id_priv->private_data_len);
2263         else
2264                 goto unlock;
2265         spin_unlock_irq(&cm_id_priv->lock);
2266
2267         ret = ib_post_send_mad(msg, NULL);
2268         if (ret)
2269                 goto free;
2270         goto deref;
2271
2272 unlock: spin_unlock_irq(&cm_id_priv->lock);
2273 free:   cm_free_msg(msg);
2274 deref:  cm_deref_id(cm_id_priv);
2275 }
2276
2277 static int cm_rep_handler(struct cm_work *work)
2278 {
2279         struct cm_id_private *cm_id_priv;
2280         struct cm_rep_msg *rep_msg;
2281         int ret;
2282         struct cm_id_private *cur_cm_id_priv;
2283         struct ib_cm_id *cm_id;
2284         struct cm_timewait_info *timewait_info;
2285
2286         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2287         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2288         if (!cm_id_priv) {
2289                 cm_dup_rep_handler(work);
2290                 pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
2291                          be32_to_cpu(rep_msg->remote_comm_id));
2292                 return -EINVAL;
2293         }
2294
2295         cm_format_rep_event(work, cm_id_priv->qp_type);
2296
2297         spin_lock_irq(&cm_id_priv->lock);
2298         switch (cm_id_priv->id.state) {
2299         case IB_CM_REQ_SENT:
2300         case IB_CM_MRA_REQ_RCVD:
2301                 break;
2302         default:
2303                 spin_unlock_irq(&cm_id_priv->lock);
2304                 ret = -EINVAL;
2305                 pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
2306                          __func__, cm_id_priv->id.state,
2307                          be32_to_cpu(rep_msg->local_comm_id),
2308                          be32_to_cpu(rep_msg->remote_comm_id));
2309                 goto error;
2310         }
2311
2312         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
2313         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
2314         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2315
2316         spin_lock(&cm.lock);
2317         /* Check for duplicate REP. */
2318         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2319                 spin_unlock(&cm.lock);
2320                 spin_unlock_irq(&cm_id_priv->lock);
2321                 ret = -EINVAL;
2322                 pr_debug("%s: Failed to insert remote id %d\n", __func__,
2323                          be32_to_cpu(rep_msg->remote_comm_id));
2324                 goto error;
2325         }
2326         /* Check for a stale connection. */
2327         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
2328         if (timewait_info) {
2329                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2330                          &cm.remote_id_table);
2331                 cm_id_priv->timewait_info->inserted_remote_id = 0;
2332                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
2333                                            timewait_info->work.remote_id);
2334
2335                 spin_unlock(&cm.lock);
2336                 spin_unlock_irq(&cm_id_priv->lock);
2337                 cm_issue_rej(work->port, work->mad_recv_wc,
2338                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2339                              NULL, 0);
2340                 ret = -EINVAL;
2341                 pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
2342                          __func__, be32_to_cpu(rep_msg->local_comm_id),
2343                          be32_to_cpu(rep_msg->remote_comm_id));
2344
2345                 if (cur_cm_id_priv) {
2346                         cm_id = &cur_cm_id_priv->id;
2347                         ib_send_cm_dreq(cm_id, NULL, 0);
2348                         cm_deref_id(cur_cm_id_priv);
2349                 }
2350
2351                 goto error;
2352         }
2353         spin_unlock(&cm.lock);
2354
2355         cm_id_priv->id.state = IB_CM_REP_RCVD;
2356         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2357         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2358         cm_id_priv->initiator_depth = rep_msg->resp_resources;
2359         cm_id_priv->responder_resources = rep_msg->initiator_depth;
2360         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2361         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2362         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2363         cm_id_priv->av.timeout =
2364                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2365                                        cm_id_priv->av.timeout - 1);
2366         cm_id_priv->alt_av.timeout =
2367                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2368                                        cm_id_priv->alt_av.timeout - 1);
2369
2370         /* todo: handle peer_to_peer */
2371
2372         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2373         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2374         if (!ret)
2375                 list_add_tail(&work->list, &cm_id_priv->work_list);
2376         spin_unlock_irq(&cm_id_priv->lock);
2377
2378         if (ret)
2379                 cm_process_work(cm_id_priv, work);
2380         else
2381                 cm_deref_id(cm_id_priv);
2382         return 0;
2383
2384 error:
2385         cm_deref_id(cm_id_priv);
2386         return ret;
2387 }
2388
2389 static int cm_establish_handler(struct cm_work *work)
2390 {
2391         struct cm_id_private *cm_id_priv;
2392         int ret;
2393
2394         /* See comment in cm_establish about lookup. */
2395         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2396         if (!cm_id_priv)
2397                 return -EINVAL;
2398
2399         spin_lock_irq(&cm_id_priv->lock);
2400         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2401                 spin_unlock_irq(&cm_id_priv->lock);
2402                 goto out;
2403         }
2404
2405         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2406         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2407         if (!ret)
2408                 list_add_tail(&work->list, &cm_id_priv->work_list);
2409         spin_unlock_irq(&cm_id_priv->lock);
2410
2411         if (ret)
2412                 cm_process_work(cm_id_priv, work);
2413         else
2414                 cm_deref_id(cm_id_priv);
2415         return 0;
2416 out:
2417         cm_deref_id(cm_id_priv);
2418         return -EINVAL;
2419 }
2420
2421 static int cm_rtu_handler(struct cm_work *work)
2422 {
2423         struct cm_id_private *cm_id_priv;
2424         struct cm_rtu_msg *rtu_msg;
2425         int ret;
2426
2427         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2428         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2429                                    rtu_msg->local_comm_id);
2430         if (!cm_id_priv)
2431                 return -EINVAL;
2432
2433         work->cm_event.private_data = &rtu_msg->private_data;
2434
2435         spin_lock_irq(&cm_id_priv->lock);
2436         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2437             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2438                 spin_unlock_irq(&cm_id_priv->lock);
2439                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2440                                 counter[CM_RTU_COUNTER]);
2441                 goto out;
2442         }
2443         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2444
2445         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2446         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2447         if (!ret)
2448                 list_add_tail(&work->list, &cm_id_priv->work_list);
2449         spin_unlock_irq(&cm_id_priv->lock);
2450
2451         if (ret)
2452                 cm_process_work(cm_id_priv, work);
2453         else
2454                 cm_deref_id(cm_id_priv);
2455         return 0;
2456 out:
2457         cm_deref_id(cm_id_priv);
2458         return -EINVAL;
2459 }
2460
2461 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2462                           struct cm_id_private *cm_id_priv,
2463                           const void *private_data,
2464                           u8 private_data_len)
2465 {
2466         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2467                           cm_form_tid(cm_id_priv));
2468         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2469         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2470         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2471
2472         if (private_data && private_data_len)
2473                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2474 }
2475
2476 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2477                     const void *private_data,
2478                     u8 private_data_len)
2479 {
2480         struct cm_id_private *cm_id_priv;
2481         struct ib_mad_send_buf *msg;
2482         unsigned long flags;
2483         int ret;
2484
2485         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2486                 return -EINVAL;
2487
2488         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2489         spin_lock_irqsave(&cm_id_priv->lock, flags);
2490         if (cm_id->state != IB_CM_ESTABLISHED) {
2491                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2492                          be32_to_cpu(cm_id->local_id), cm_id->state);
2493                 ret = -EINVAL;
2494                 goto out;
2495         }
2496
2497         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2498             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2499                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2500
2501         ret = cm_alloc_msg(cm_id_priv, &msg);
2502         if (ret) {
2503                 cm_enter_timewait(cm_id_priv);
2504                 goto out;
2505         }
2506
2507         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2508                        private_data, private_data_len);
2509         msg->timeout_ms = cm_id_priv->timeout_ms;
2510         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2511
2512         ret = ib_post_send_mad(msg, NULL);
2513         if (ret) {
2514                 cm_enter_timewait(cm_id_priv);
2515                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2516                 cm_free_msg(msg);
2517                 return ret;
2518         }
2519
2520         cm_id->state = IB_CM_DREQ_SENT;
2521         cm_id_priv->msg = msg;
2522 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2523         return ret;
2524 }
2525 EXPORT_SYMBOL(ib_send_cm_dreq);
2526
2527 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2528                           struct cm_id_private *cm_id_priv,
2529                           const void *private_data,
2530                           u8 private_data_len)
2531 {
2532         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2533         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2534         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2535
2536         if (private_data && private_data_len)
2537                 memcpy(drep_msg->private_data, private_data, private_data_len);
2538 }
2539
2540 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2541                     const void *private_data,
2542                     u8 private_data_len)
2543 {
2544         struct cm_id_private *cm_id_priv;
2545         struct ib_mad_send_buf *msg;
2546         unsigned long flags;
2547         void *data;
2548         int ret;
2549
2550         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2551                 return -EINVAL;
2552
2553         data = cm_copy_private_data(private_data, private_data_len);
2554         if (IS_ERR(data))
2555                 return PTR_ERR(data);
2556
2557         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2558         spin_lock_irqsave(&cm_id_priv->lock, flags);
2559         if (cm_id->state != IB_CM_DREQ_RCVD) {
2560                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2561                 kfree(data);
2562                 pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
2563                          __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
2564                 return -EINVAL;
2565         }
2566
2567         cm_set_private_data(cm_id_priv, data, private_data_len);
2568         cm_enter_timewait(cm_id_priv);
2569
2570         ret = cm_alloc_msg(cm_id_priv, &msg);
2571         if (ret)
2572                 goto out;
2573
2574         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2575                        private_data, private_data_len);
2576
2577         ret = ib_post_send_mad(msg, NULL);
2578         if (ret) {
2579                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2580                 cm_free_msg(msg);
2581                 return ret;
2582         }
2583
2584 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2585         return ret;
2586 }
2587 EXPORT_SYMBOL(ib_send_cm_drep);
2588
2589 static int cm_issue_drep(struct cm_port *port,
2590                          struct ib_mad_recv_wc *mad_recv_wc)
2591 {
2592         struct ib_mad_send_buf *msg = NULL;
2593         struct cm_dreq_msg *dreq_msg;
2594         struct cm_drep_msg *drep_msg;
2595         int ret;
2596
2597         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2598         if (ret)
2599                 return ret;
2600
2601         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2602         drep_msg = (struct cm_drep_msg *) msg->mad;
2603
2604         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2605         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2606         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2607
2608         ret = ib_post_send_mad(msg, NULL);
2609         if (ret)
2610                 cm_free_msg(msg);
2611
2612         return ret;
2613 }
2614
2615 static int cm_dreq_handler(struct cm_work *work)
2616 {
2617         struct cm_id_private *cm_id_priv;
2618         struct cm_dreq_msg *dreq_msg;
2619         struct ib_mad_send_buf *msg = NULL;
2620         int ret;
2621
2622         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2623         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2624                                    dreq_msg->local_comm_id);
2625         if (!cm_id_priv) {
2626                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2627                                 counter[CM_DREQ_COUNTER]);
2628                 cm_issue_drep(work->port, work->mad_recv_wc);
2629                 pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
2630                          __func__, be32_to_cpu(dreq_msg->local_comm_id),
2631                          be32_to_cpu(dreq_msg->remote_comm_id));
2632                 return -EINVAL;
2633         }
2634
2635         work->cm_event.private_data = &dreq_msg->private_data;
2636
2637         spin_lock_irq(&cm_id_priv->lock);
2638         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2639                 goto unlock;
2640
2641         switch (cm_id_priv->id.state) {
2642         case IB_CM_REP_SENT:
2643         case IB_CM_DREQ_SENT:
2644                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2645                 break;
2646         case IB_CM_ESTABLISHED:
2647                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2648                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2649                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2650                 break;
2651         case IB_CM_MRA_REP_RCVD:
2652                 break;
2653         case IB_CM_TIMEWAIT:
2654                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2655                                 counter[CM_DREQ_COUNTER]);
2656                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2657                 if (IS_ERR(msg))
2658                         goto unlock;
2659
2660                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2661                                cm_id_priv->private_data,
2662                                cm_id_priv->private_data_len);
2663                 spin_unlock_irq(&cm_id_priv->lock);
2664
2665                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2666                     ib_post_send_mad(msg, NULL))
2667                         cm_free_msg(msg);
2668                 goto deref;
2669         case IB_CM_DREQ_RCVD:
2670                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2671                                 counter[CM_DREQ_COUNTER]);
2672                 goto unlock;
2673         default:
2674                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2675                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2676                          cm_id_priv->id.state);
2677                 goto unlock;
2678         }
2679         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2680         cm_id_priv->tid = dreq_msg->hdr.tid;
2681         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2682         if (!ret)
2683                 list_add_tail(&work->list, &cm_id_priv->work_list);
2684         spin_unlock_irq(&cm_id_priv->lock);
2685
2686         if (ret)
2687                 cm_process_work(cm_id_priv, work);
2688         else
2689                 cm_deref_id(cm_id_priv);
2690         return 0;
2691
2692 unlock: spin_unlock_irq(&cm_id_priv->lock);
2693 deref:  cm_deref_id(cm_id_priv);
2694         return -EINVAL;
2695 }
2696
2697 static int cm_drep_handler(struct cm_work *work)
2698 {
2699         struct cm_id_private *cm_id_priv;
2700         struct cm_drep_msg *drep_msg;
2701         int ret;
2702
2703         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2704         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2705                                    drep_msg->local_comm_id);
2706         if (!cm_id_priv)
2707                 return -EINVAL;
2708
2709         work->cm_event.private_data = &drep_msg->private_data;
2710
2711         spin_lock_irq(&cm_id_priv->lock);
2712         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2713             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2714                 spin_unlock_irq(&cm_id_priv->lock);
2715                 goto out;
2716         }
2717         cm_enter_timewait(cm_id_priv);
2718
2719         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2720         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2721         if (!ret)
2722                 list_add_tail(&work->list, &cm_id_priv->work_list);
2723         spin_unlock_irq(&cm_id_priv->lock);
2724
2725         if (ret)
2726                 cm_process_work(cm_id_priv, work);
2727         else
2728                 cm_deref_id(cm_id_priv);
2729         return 0;
2730 out:
2731         cm_deref_id(cm_id_priv);
2732         return -EINVAL;
2733 }
2734
2735 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2736                    enum ib_cm_rej_reason reason,
2737                    void *ari,
2738                    u8 ari_length,
2739                    const void *private_data,
2740                    u8 private_data_len)
2741 {
2742         struct cm_id_private *cm_id_priv;
2743         struct ib_mad_send_buf *msg;
2744         unsigned long flags;
2745         int ret;
2746
2747         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2748             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2749                 return -EINVAL;
2750
2751         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2752
2753         spin_lock_irqsave(&cm_id_priv->lock, flags);
2754         switch (cm_id->state) {
2755         case IB_CM_REQ_SENT:
2756         case IB_CM_MRA_REQ_RCVD:
2757         case IB_CM_REQ_RCVD:
2758         case IB_CM_MRA_REQ_SENT:
2759         case IB_CM_REP_RCVD:
2760         case IB_CM_MRA_REP_SENT:
2761                 ret = cm_alloc_msg(cm_id_priv, &msg);
2762                 if (!ret)
2763                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2764                                       cm_id_priv, reason, ari, ari_length,
2765                                       private_data, private_data_len);
2766
2767                 cm_reset_to_idle(cm_id_priv);
2768                 break;
2769         case IB_CM_REP_SENT:
2770         case IB_CM_MRA_REP_RCVD:
2771                 ret = cm_alloc_msg(cm_id_priv, &msg);
2772                 if (!ret)
2773                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2774                                       cm_id_priv, reason, ari, ari_length,
2775                                       private_data, private_data_len);
2776
2777                 cm_enter_timewait(cm_id_priv);
2778                 break;
2779         default:
2780                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2781                          be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2782                 ret = -EINVAL;
2783                 goto out;
2784         }
2785
2786         if (ret)
2787                 goto out;
2788
2789         ret = ib_post_send_mad(msg, NULL);
2790         if (ret)
2791                 cm_free_msg(msg);
2792
2793 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2794         return ret;
2795 }
2796 EXPORT_SYMBOL(ib_send_cm_rej);
2797
2798 static void cm_format_rej_event(struct cm_work *work)
2799 {
2800         struct cm_rej_msg *rej_msg;
2801         struct ib_cm_rej_event_param *param;
2802
2803         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2804         param = &work->cm_event.param.rej_rcvd;
2805         param->ari = rej_msg->ari;
2806         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2807         param->reason = __be16_to_cpu(rej_msg->reason);
2808         work->cm_event.private_data = &rej_msg->private_data;
2809 }
2810
2811 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2812 {
2813         struct cm_timewait_info *timewait_info;
2814         struct cm_id_private *cm_id_priv;
2815         __be32 remote_id;
2816
2817         remote_id = rej_msg->local_comm_id;
2818
2819         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2820                 spin_lock_irq(&cm.lock);
2821                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2822                                                   remote_id);
2823                 if (!timewait_info) {
2824                         spin_unlock_irq(&cm.lock);
2825                         return NULL;
2826                 }
2827                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2828                                       (timewait_info->work.local_id ^
2829                                        cm.random_id_operand));
2830                 if (cm_id_priv) {
2831                         if (cm_id_priv->id.remote_id == remote_id)
2832                                 atomic_inc(&cm_id_priv->refcount);
2833                         else
2834                                 cm_id_priv = NULL;
2835                 }
2836                 spin_unlock_irq(&cm.lock);
2837         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2838                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2839         else
2840                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2841
2842         return cm_id_priv;
2843 }
2844
2845 static int cm_rej_handler(struct cm_work *work)
2846 {
2847         struct cm_id_private *cm_id_priv;
2848         struct cm_rej_msg *rej_msg;
2849         int ret;
2850
2851         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2852         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2853         if (!cm_id_priv)
2854                 return -EINVAL;
2855
2856         cm_format_rej_event(work);
2857
2858         spin_lock_irq(&cm_id_priv->lock);
2859         switch (cm_id_priv->id.state) {
2860         case IB_CM_REQ_SENT:
2861         case IB_CM_MRA_REQ_RCVD:
2862         case IB_CM_REP_SENT:
2863         case IB_CM_MRA_REP_RCVD:
2864                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2865                 /* fall through */
2866         case IB_CM_REQ_RCVD:
2867         case IB_CM_MRA_REQ_SENT:
2868                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2869                         cm_enter_timewait(cm_id_priv);
2870                 else
2871                         cm_reset_to_idle(cm_id_priv);
2872                 break;
2873         case IB_CM_DREQ_SENT:
2874                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2875                 /* fall through */
2876         case IB_CM_REP_RCVD:
2877         case IB_CM_MRA_REP_SENT:
2878                 cm_enter_timewait(cm_id_priv);
2879                 break;
2880         case IB_CM_ESTABLISHED:
2881                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2882                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2883                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2884                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2885                                               cm_id_priv->msg);
2886                         cm_enter_timewait(cm_id_priv);
2887                         break;
2888                 }
2889                 /* fall through */
2890         default:
2891                 spin_unlock_irq(&cm_id_priv->lock);
2892                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2893                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2894                          cm_id_priv->id.state);
2895                 ret = -EINVAL;
2896                 goto out;
2897         }
2898
2899         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2900         if (!ret)
2901                 list_add_tail(&work->list, &cm_id_priv->work_list);
2902         spin_unlock_irq(&cm_id_priv->lock);
2903
2904         if (ret)
2905                 cm_process_work(cm_id_priv, work);
2906         else
2907                 cm_deref_id(cm_id_priv);
2908         return 0;
2909 out:
2910         cm_deref_id(cm_id_priv);
2911         return -EINVAL;
2912 }
2913
2914 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2915                    u8 service_timeout,
2916                    const void *private_data,
2917                    u8 private_data_len)
2918 {
2919         struct cm_id_private *cm_id_priv;
2920         struct ib_mad_send_buf *msg;
2921         enum ib_cm_state cm_state;
2922         enum ib_cm_lap_state lap_state;
2923         enum cm_msg_response msg_response;
2924         void *data;
2925         unsigned long flags;
2926         int ret;
2927
2928         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2929                 return -EINVAL;
2930
2931         data = cm_copy_private_data(private_data, private_data_len);
2932         if (IS_ERR(data))
2933                 return PTR_ERR(data);
2934
2935         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2936
2937         spin_lock_irqsave(&cm_id_priv->lock, flags);
2938         switch(cm_id_priv->id.state) {
2939         case IB_CM_REQ_RCVD:
2940                 cm_state = IB_CM_MRA_REQ_SENT;
2941                 lap_state = cm_id->lap_state;
2942                 msg_response = CM_MSG_RESPONSE_REQ;
2943                 break;
2944         case IB_CM_REP_RCVD:
2945                 cm_state = IB_CM_MRA_REP_SENT;
2946                 lap_state = cm_id->lap_state;
2947                 msg_response = CM_MSG_RESPONSE_REP;
2948                 break;
2949         case IB_CM_ESTABLISHED:
2950                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2951                         cm_state = cm_id->state;
2952                         lap_state = IB_CM_MRA_LAP_SENT;
2953                         msg_response = CM_MSG_RESPONSE_OTHER;
2954                         break;
2955                 }
2956                 /* fall through */
2957         default:
2958                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2959                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
2960                          cm_id_priv->id.state);
2961                 ret = -EINVAL;
2962                 goto error1;
2963         }
2964
2965         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2966                 ret = cm_alloc_msg(cm_id_priv, &msg);
2967                 if (ret)
2968                         goto error1;
2969
2970                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2971                               msg_response, service_timeout,
2972                               private_data, private_data_len);
2973                 ret = ib_post_send_mad(msg, NULL);
2974                 if (ret)
2975                         goto error2;
2976         }
2977
2978         cm_id->state = cm_state;
2979         cm_id->lap_state = lap_state;
2980         cm_id_priv->service_timeout = service_timeout;
2981         cm_set_private_data(cm_id_priv, data, private_data_len);
2982         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2983         return 0;
2984
2985 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2986         kfree(data);
2987         return ret;
2988
2989 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2990         kfree(data);
2991         cm_free_msg(msg);
2992         return ret;
2993 }
2994 EXPORT_SYMBOL(ib_send_cm_mra);
2995
2996 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2997 {
2998         switch (cm_mra_get_msg_mraed(mra_msg)) {
2999         case CM_MSG_RESPONSE_REQ:
3000                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
3001         case CM_MSG_RESPONSE_REP:
3002         case CM_MSG_RESPONSE_OTHER:
3003                 return cm_acquire_id(mra_msg->remote_comm_id,
3004                                      mra_msg->local_comm_id);
3005         default:
3006                 return NULL;
3007         }
3008 }
3009
3010 static int cm_mra_handler(struct cm_work *work)
3011 {
3012         struct cm_id_private *cm_id_priv;
3013         struct cm_mra_msg *mra_msg;
3014         int timeout, ret;
3015
3016         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
3017         cm_id_priv = cm_acquire_mraed_id(mra_msg);
3018         if (!cm_id_priv)
3019                 return -EINVAL;
3020
3021         work->cm_event.private_data = &mra_msg->private_data;
3022         work->cm_event.param.mra_rcvd.service_timeout =
3023                                         cm_mra_get_service_timeout(mra_msg);
3024         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
3025                   cm_convert_to_ms(cm_id_priv->av.timeout);
3026
3027         spin_lock_irq(&cm_id_priv->lock);
3028         switch (cm_id_priv->id.state) {
3029         case IB_CM_REQ_SENT:
3030                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
3031                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3032                                   cm_id_priv->msg, timeout))
3033                         goto out;
3034                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
3035                 break;
3036         case IB_CM_REP_SENT:
3037                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
3038                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3039                                   cm_id_priv->msg, timeout))
3040                         goto out;
3041                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
3042                 break;
3043         case IB_CM_ESTABLISHED:
3044                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
3045                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
3046                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
3047                                   cm_id_priv->msg, timeout)) {
3048                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
3049                                 atomic_long_inc(&work->port->
3050                                                 counter_group[CM_RECV_DUPLICATES].
3051                                                 counter[CM_MRA_COUNTER]);
3052                         goto out;
3053                 }
3054                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
3055                 break;
3056         case IB_CM_MRA_REQ_RCVD:
3057         case IB_CM_MRA_REP_RCVD:
3058                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3059                                 counter[CM_MRA_COUNTER]);
3060                 /* fall through */
3061         default:
3062                 pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
3063                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
3064                          cm_id_priv->id.state);
3065                 goto out;
3066         }
3067
3068         cm_id_priv->msg->context[1] = (void *) (unsigned long)
3069                                       cm_id_priv->id.state;
3070         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3071         if (!ret)
3072                 list_add_tail(&work->list, &cm_id_priv->work_list);
3073         spin_unlock_irq(&cm_id_priv->lock);
3074
3075         if (ret)
3076                 cm_process_work(cm_id_priv, work);
3077         else
3078                 cm_deref_id(cm_id_priv);
3079         return 0;
3080 out:
3081         spin_unlock_irq(&cm_id_priv->lock);
3082         cm_deref_id(cm_id_priv);
3083         return -EINVAL;
3084 }
3085
3086 static void cm_format_lap(struct cm_lap_msg *lap_msg,
3087                           struct cm_id_private *cm_id_priv,
3088                           struct sa_path_rec *alternate_path,
3089                           const void *private_data,
3090                           u8 private_data_len)
3091 {
3092         bool alt_ext = false;
3093
3094         if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
3095                 alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
3096                                               alternate_path->opa.slid);
3097         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
3098                           cm_form_tid(cm_id_priv));
3099         lap_msg->local_comm_id = cm_id_priv->id.local_id;
3100         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
3101         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
3102         /* todo: need remote CM response timeout */
3103         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
3104         lap_msg->alt_local_lid =
3105                 htons(ntohl(sa_path_get_slid(alternate_path)));
3106         lap_msg->alt_remote_lid =
3107                 htons(ntohl(sa_path_get_dlid(alternate_path)));
3108         lap_msg->alt_local_gid = alternate_path->sgid;
3109         lap_msg->alt_remote_gid = alternate_path->dgid;
3110         if (alt_ext) {
3111                 lap_msg->alt_local_gid.global.interface_id
3112                         = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
3113                 lap_msg->alt_remote_gid.global.interface_id
3114                         = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
3115         }
3116         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
3117         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
3118         lap_msg->alt_hop_limit = alternate_path->hop_limit;
3119         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
3120         cm_lap_set_sl(lap_msg, alternate_path->sl);
3121         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
3122         cm_lap_set_local_ack_timeout(lap_msg,
3123                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
3124                                alternate_path->packet_life_time));
3125
3126         if (private_data && private_data_len)
3127                 memcpy(lap_msg->private_data, private_data, private_data_len);
3128 }
3129
3130 int ib_send_cm_lap(struct ib_cm_id *cm_id,
3131                    struct sa_path_rec *alternate_path,
3132                    const void *private_data,
3133                    u8 private_data_len)
3134 {
3135         struct cm_id_private *cm_id_priv;
3136         struct ib_mad_send_buf *msg;
3137         unsigned long flags;
3138         int ret;
3139
3140         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
3141                 return -EINVAL;
3142
3143         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3144         spin_lock_irqsave(&cm_id_priv->lock, flags);
3145         if (cm_id->state != IB_CM_ESTABLISHED ||
3146             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
3147              cm_id->lap_state != IB_CM_LAP_IDLE)) {
3148                 ret = -EINVAL;
3149                 goto out;
3150         }
3151
3152         ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
3153                                  cm_id_priv);
3154         if (ret)
3155                 goto out;
3156         cm_id_priv->alt_av.timeout =
3157                         cm_ack_timeout(cm_id_priv->target_ack_delay,
3158                                        cm_id_priv->alt_av.timeout - 1);
3159
3160         ret = cm_alloc_msg(cm_id_priv, &msg);
3161         if (ret)
3162                 goto out;
3163
3164         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
3165                       alternate_path, private_data, private_data_len);
3166         msg->timeout_ms = cm_id_priv->timeout_ms;
3167         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
3168
3169         ret = ib_post_send_mad(msg, NULL);
3170         if (ret) {
3171                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3172                 cm_free_msg(msg);
3173                 return ret;
3174         }
3175
3176         cm_id->lap_state = IB_CM_LAP_SENT;
3177         cm_id_priv->msg = msg;
3178
3179 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3180         return ret;
3181 }
3182 EXPORT_SYMBOL(ib_send_cm_lap);
3183
3184 static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
3185                                         struct sa_path_rec *path)
3186 {
3187         u32 lid;
3188
3189         if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
3190                 sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
3191                 sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
3192         } else {
3193                 lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
3194                 sa_path_set_dlid(path, lid);
3195
3196                 lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
3197                 sa_path_set_slid(path, lid);
3198         }
3199 }
3200
3201 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
3202                                     struct sa_path_rec *path,
3203                                     struct cm_lap_msg *lap_msg)
3204 {
3205         path->dgid = lap_msg->alt_local_gid;
3206         path->sgid = lap_msg->alt_remote_gid;
3207         path->flow_label = cm_lap_get_flow_label(lap_msg);
3208         path->hop_limit = lap_msg->alt_hop_limit;
3209         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
3210         path->reversible = 1;
3211         path->pkey = cm_id_priv->pkey;
3212         path->sl = cm_lap_get_sl(lap_msg);
3213         path->mtu_selector = IB_SA_EQ;
3214         path->mtu = cm_id_priv->path_mtu;
3215         path->rate_selector = IB_SA_EQ;
3216         path->rate = cm_lap_get_packet_rate(lap_msg);
3217         path->packet_life_time_selector = IB_SA_EQ;
3218         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
3219         path->packet_life_time -= (path->packet_life_time > 0);
3220         cm_format_path_lid_from_lap(lap_msg, path);
3221 }
3222
3223 static int cm_lap_handler(struct cm_work *work)
3224 {
3225         struct cm_id_private *cm_id_priv;
3226         struct cm_lap_msg *lap_msg;
3227         struct ib_cm_lap_event_param *param;
3228         struct ib_mad_send_buf *msg = NULL;
3229         int ret;
3230
3231         /* Currently Alternate path messages are not supported for
3232          * RoCE link layer.
3233          */
3234         if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3235                                work->port->port_num))
3236                 return -EINVAL;
3237
3238         /* todo: verify LAP request and send reject APR if invalid. */
3239         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
3240         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
3241                                    lap_msg->local_comm_id);
3242         if (!cm_id_priv)
3243                 return -EINVAL;
3244
3245         param = &work->cm_event.param.lap_rcvd;
3246         memset(&work->path[0], 0, sizeof(work->path[1]));
3247         cm_path_set_rec_type(work->port->cm_dev->ib_device,
3248                              work->port->port_num,
3249                              &work->path[0],
3250                              &lap_msg->alt_local_gid);
3251         param->alternate_path = &work->path[0];
3252         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
3253         work->cm_event.private_data = &lap_msg->private_data;
3254
3255         spin_lock_irq(&cm_id_priv->lock);
3256         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
3257                 goto unlock;
3258
3259         switch (cm_id_priv->id.lap_state) {
3260         case IB_CM_LAP_UNINIT:
3261         case IB_CM_LAP_IDLE:
3262                 break;
3263         case IB_CM_MRA_LAP_SENT:
3264                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3265                                 counter[CM_LAP_COUNTER]);
3266                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
3267                 if (IS_ERR(msg))
3268                         goto unlock;
3269
3270                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
3271                               CM_MSG_RESPONSE_OTHER,
3272                               cm_id_priv->service_timeout,
3273                               cm_id_priv->private_data,
3274                               cm_id_priv->private_data_len);
3275                 spin_unlock_irq(&cm_id_priv->lock);
3276
3277                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
3278                     ib_post_send_mad(msg, NULL))
3279                         cm_free_msg(msg);
3280                 goto deref;
3281         case IB_CM_LAP_RCVD:
3282                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3283                                 counter[CM_LAP_COUNTER]);
3284                 goto unlock;
3285         default:
3286                 goto unlock;
3287         }
3288
3289         ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
3290                                  work->mad_recv_wc->recv_buf.grh,
3291                                  &cm_id_priv->av);
3292         if (ret)
3293                 goto unlock;
3294
3295         ret = cm_init_av_by_path(param->alternate_path, NULL,
3296                                  &cm_id_priv->alt_av, cm_id_priv);
3297         if (ret)
3298                 goto unlock;
3299
3300         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3301         cm_id_priv->tid = lap_msg->hdr.tid;
3302         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3303         if (!ret)
3304                 list_add_tail(&work->list, &cm_id_priv->work_list);
3305         spin_unlock_irq(&cm_id_priv->lock);
3306
3307         if (ret)
3308                 cm_process_work(cm_id_priv, work);
3309         else
3310                 cm_deref_id(cm_id_priv);
3311         return 0;
3312
3313 unlock: spin_unlock_irq(&cm_id_priv->lock);
3314 deref:  cm_deref_id(cm_id_priv);
3315         return -EINVAL;
3316 }
3317
3318 static void cm_format_apr(struct cm_apr_msg *apr_msg,
3319                           struct cm_id_private *cm_id_priv,
3320                           enum ib_cm_apr_status status,
3321                           void *info,
3322                           u8 info_length,
3323                           const void *private_data,
3324                           u8 private_data_len)
3325 {
3326         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
3327         apr_msg->local_comm_id = cm_id_priv->id.local_id;
3328         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
3329         apr_msg->ap_status = (u8) status;
3330
3331         if (info && info_length) {
3332                 apr_msg->info_length = info_length;
3333                 memcpy(apr_msg->info, info, info_length);
3334         }
3335
3336         if (private_data && private_data_len)
3337                 memcpy(apr_msg->private_data, private_data, private_data_len);
3338 }
3339
3340 int ib_send_cm_apr(struct ib_cm_id *cm_id,
3341                    enum ib_cm_apr_status status,
3342                    void *info,
3343                    u8 info_length,
3344                    const void *private_data,
3345                    u8 private_data_len)
3346 {
3347         struct cm_id_private *cm_id_priv;
3348         struct ib_mad_send_buf *msg;
3349         unsigned long flags;
3350         int ret;
3351
3352         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
3353             (info && info_length > IB_CM_APR_INFO_LENGTH))
3354                 return -EINVAL;
3355
3356         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3357         spin_lock_irqsave(&cm_id_priv->lock, flags);
3358         if (cm_id->state != IB_CM_ESTABLISHED ||
3359             (cm_id->lap_state != IB_CM_LAP_RCVD &&
3360              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
3361                 ret = -EINVAL;
3362                 goto out;
3363         }
3364
3365         ret = cm_alloc_msg(cm_id_priv, &msg);
3366         if (ret)
3367                 goto out;
3368
3369         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
3370                       info, info_length, private_data, private_data_len);
3371         ret = ib_post_send_mad(msg, NULL);
3372         if (ret) {
3373                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3374                 cm_free_msg(msg);
3375                 return ret;
3376         }
3377
3378         cm_id->lap_state = IB_CM_LAP_IDLE;
3379 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3380         return ret;
3381 }
3382 EXPORT_SYMBOL(ib_send_cm_apr);
3383
3384 static int cm_apr_handler(struct cm_work *work)
3385 {
3386         struct cm_id_private *cm_id_priv;
3387         struct cm_apr_msg *apr_msg;
3388         int ret;
3389
3390         /* Currently Alternate path messages are not supported for
3391          * RoCE link layer.
3392          */
3393         if (rdma_protocol_roce(work->port->cm_dev->ib_device,
3394                                work->port->port_num))
3395                 return -EINVAL;
3396
3397         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3398         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
3399                                    apr_msg->local_comm_id);
3400         if (!cm_id_priv)
3401                 return -EINVAL; /* Unmatched reply. */
3402
3403         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
3404         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
3405         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
3406         work->cm_event.private_data = &apr_msg->private_data;
3407
3408         spin_lock_irq(&cm_id_priv->lock);
3409         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3410             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3411              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3412                 spin_unlock_irq(&cm_id_priv->lock);
3413                 goto out;
3414         }
3415         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3416         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3417         cm_id_priv->msg = NULL;
3418
3419         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3420         if (!ret)
3421                 list_add_tail(&work->list, &cm_id_priv->work_list);
3422         spin_unlock_irq(&cm_id_priv->lock);
3423
3424         if (ret)
3425                 cm_process_work(cm_id_priv, work);
3426         else
3427                 cm_deref_id(cm_id_priv);
3428         return 0;
3429 out:
3430         cm_deref_id(cm_id_priv);
3431         return -EINVAL;
3432 }
3433
3434 static int cm_timewait_handler(struct cm_work *work)
3435 {
3436         struct cm_timewait_info *timewait_info;
3437         struct cm_id_private *cm_id_priv;
3438         int ret;
3439
3440         timewait_info = (struct cm_timewait_info *)work;
3441         spin_lock_irq(&cm.lock);
3442         list_del(&timewait_info->list);
3443         spin_unlock_irq(&cm.lock);
3444
3445         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3446                                    timewait_info->work.remote_id);
3447         if (!cm_id_priv)
3448                 return -EINVAL;
3449
3450         spin_lock_irq(&cm_id_priv->lock);
3451         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3452             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3453                 spin_unlock_irq(&cm_id_priv->lock);
3454                 goto out;
3455         }
3456         cm_id_priv->id.state = IB_CM_IDLE;
3457         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3458         if (!ret)
3459                 list_add_tail(&work->list, &cm_id_priv->work_list);
3460         spin_unlock_irq(&cm_id_priv->lock);
3461
3462         if (ret)
3463                 cm_process_work(cm_id_priv, work);
3464         else
3465                 cm_deref_id(cm_id_priv);
3466         return 0;
3467 out:
3468         cm_deref_id(cm_id_priv);
3469         return -EINVAL;
3470 }
3471
3472 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3473                                struct cm_id_private *cm_id_priv,
3474                                struct ib_cm_sidr_req_param *param)
3475 {
3476         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3477                           cm_form_tid(cm_id_priv));
3478         sidr_req_msg->request_id = cm_id_priv->id.local_id;
3479         sidr_req_msg->pkey = param->path->pkey;
3480         sidr_req_msg->service_id = param->service_id;
3481
3482         if (param->private_data && param->private_data_len)
3483                 memcpy(sidr_req_msg->private_data, param->private_data,
3484                        param->private_data_len);
3485 }
3486
3487 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3488                         struct ib_cm_sidr_req_param *param)
3489 {
3490         struct cm_id_private *cm_id_priv;
3491         struct ib_mad_send_buf *msg;
3492         unsigned long flags;
3493         int ret;
3494
3495         if (!param->path || (param->private_data &&
3496              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3497                 return -EINVAL;
3498
3499         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3500         ret = cm_init_av_by_path(param->path, param->sgid_attr,
3501                                  &cm_id_priv->av,
3502                                  cm_id_priv);
3503         if (ret)
3504                 goto out;
3505
3506         cm_id->service_id = param->service_id;
3507         cm_id->service_mask = ~cpu_to_be64(0);
3508         cm_id_priv->timeout_ms = param->timeout_ms;
3509         cm_id_priv->max_cm_retries = param->max_cm_retries;
3510         ret = cm_alloc_msg(cm_id_priv, &msg);
3511         if (ret)
3512                 goto out;
3513
3514         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3515                            param);
3516         msg->timeout_ms = cm_id_priv->timeout_ms;
3517         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3518
3519         spin_lock_irqsave(&cm_id_priv->lock, flags);
3520         if (cm_id->state == IB_CM_IDLE)
3521                 ret = ib_post_send_mad(msg, NULL);
3522         else
3523                 ret = -EINVAL;
3524
3525         if (ret) {
3526                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3527                 cm_free_msg(msg);
3528                 goto out;
3529         }
3530         cm_id->state = IB_CM_SIDR_REQ_SENT;
3531         cm_id_priv->msg = msg;
3532         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3533 out:
3534         return ret;
3535 }
3536 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3537
3538 static void cm_format_sidr_req_event(struct cm_work *work,
3539                                      const struct cm_id_private *rx_cm_id,
3540                                      struct ib_cm_id *listen_id)
3541 {
3542         struct cm_sidr_req_msg *sidr_req_msg;
3543         struct ib_cm_sidr_req_event_param *param;
3544
3545         sidr_req_msg = (struct cm_sidr_req_msg *)
3546                                 work->mad_recv_wc->recv_buf.mad;
3547         param = &work->cm_event.param.sidr_req_rcvd;
3548         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3549         param->listen_id = listen_id;
3550         param->service_id = sidr_req_msg->service_id;
3551         param->bth_pkey = cm_get_bth_pkey(work);
3552         param->port = work->port->port_num;
3553         param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
3554         work->cm_event.private_data = &sidr_req_msg->private_data;
3555 }
3556
3557 static int cm_sidr_req_handler(struct cm_work *work)
3558 {
3559         struct ib_cm_id *cm_id;
3560         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3561         struct cm_sidr_req_msg *sidr_req_msg;
3562         struct ib_wc *wc;
3563         int ret;
3564
3565         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3566         if (IS_ERR(cm_id))
3567                 return PTR_ERR(cm_id);
3568         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3569
3570         /* Record SGID/SLID and request ID for lookup. */
3571         sidr_req_msg = (struct cm_sidr_req_msg *)
3572                                 work->mad_recv_wc->recv_buf.mad;
3573         wc = work->mad_recv_wc->wc;
3574         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3575         cm_id_priv->av.dgid.global.interface_id = 0;
3576         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3577                                       work->mad_recv_wc->recv_buf.grh,
3578                                       &cm_id_priv->av);
3579         if (ret)
3580                 goto out;
3581
3582         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3583         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3584         atomic_inc(&cm_id_priv->work_count);
3585
3586         spin_lock_irq(&cm.lock);
3587         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3588         if (cur_cm_id_priv) {
3589                 spin_unlock_irq(&cm.lock);
3590                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3591                                 counter[CM_SIDR_REQ_COUNTER]);
3592                 goto out; /* Duplicate message. */
3593         }
3594         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3595         cur_cm_id_priv = cm_find_listen(cm_id->device,
3596                                         sidr_req_msg->service_id);
3597         if (!cur_cm_id_priv) {
3598                 spin_unlock_irq(&cm.lock);
3599                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3600                 goto out; /* No match. */
3601         }
3602         atomic_inc(&cur_cm_id_priv->refcount);
3603         atomic_inc(&cm_id_priv->refcount);
3604         spin_unlock_irq(&cm.lock);
3605
3606         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3607         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3608         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3609         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3610
3611         cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3612         cm_process_work(cm_id_priv, work);
3613         cm_deref_id(cur_cm_id_priv);
3614         return 0;
3615 out:
3616         ib_destroy_cm_id(&cm_id_priv->id);
3617         return -EINVAL;
3618 }
3619
3620 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3621                                struct cm_id_private *cm_id_priv,
3622                                struct ib_cm_sidr_rep_param *param)
3623 {
3624         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3625                           cm_id_priv->tid);
3626         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3627         sidr_rep_msg->status = param->status;
3628         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3629         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3630         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3631
3632         if (param->info && param->info_length)
3633                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3634
3635         if (param->private_data && param->private_data_len)
3636                 memcpy(sidr_rep_msg->private_data, param->private_data,
3637                        param->private_data_len);
3638 }
3639
3640 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3641                         struct ib_cm_sidr_rep_param *param)
3642 {
3643         struct cm_id_private *cm_id_priv;
3644         struct ib_mad_send_buf *msg;
3645         unsigned long flags;
3646         int ret;
3647
3648         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3649             (param->private_data &&
3650              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3651                 return -EINVAL;
3652
3653         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3654         spin_lock_irqsave(&cm_id_priv->lock, flags);
3655         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3656                 ret = -EINVAL;
3657                 goto error;
3658         }
3659
3660         ret = cm_alloc_msg(cm_id_priv, &msg);
3661         if (ret)
3662                 goto error;
3663
3664         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3665                            param);
3666         ret = ib_post_send_mad(msg, NULL);
3667         if (ret) {
3668                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3669                 cm_free_msg(msg);
3670                 return ret;
3671         }
3672         cm_id->state = IB_CM_IDLE;
3673         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3674
3675         spin_lock_irqsave(&cm.lock, flags);
3676         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3677                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3678                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3679         }
3680         spin_unlock_irqrestore(&cm.lock, flags);
3681         return 0;
3682
3683 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3684         return ret;
3685 }
3686 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3687
3688 static void cm_format_sidr_rep_event(struct cm_work *work,
3689                                      const struct cm_id_private *cm_id_priv)
3690 {
3691         struct cm_sidr_rep_msg *sidr_rep_msg;
3692         struct ib_cm_sidr_rep_event_param *param;
3693
3694         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3695                                 work->mad_recv_wc->recv_buf.mad;
3696         param = &work->cm_event.param.sidr_rep_rcvd;
3697         param->status = sidr_rep_msg->status;
3698         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3699         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3700         param->info = &sidr_rep_msg->info;
3701         param->info_len = sidr_rep_msg->info_length;
3702         param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
3703         work->cm_event.private_data = &sidr_rep_msg->private_data;
3704 }
3705
3706 static int cm_sidr_rep_handler(struct cm_work *work)
3707 {
3708         struct cm_sidr_rep_msg *sidr_rep_msg;
3709         struct cm_id_private *cm_id_priv;
3710
3711         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3712                                 work->mad_recv_wc->recv_buf.mad;
3713         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3714         if (!cm_id_priv)
3715                 return -EINVAL; /* Unmatched reply. */
3716
3717         spin_lock_irq(&cm_id_priv->lock);
3718         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3719                 spin_unlock_irq(&cm_id_priv->lock);
3720                 goto out;
3721         }
3722         cm_id_priv->id.state = IB_CM_IDLE;
3723         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3724         spin_unlock_irq(&cm_id_priv->lock);
3725
3726         cm_format_sidr_rep_event(work, cm_id_priv);
3727         cm_process_work(cm_id_priv, work);
3728         return 0;
3729 out:
3730         cm_deref_id(cm_id_priv);
3731         return -EINVAL;
3732 }
3733
3734 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3735                                   enum ib_wc_status wc_status)
3736 {
3737         struct cm_id_private *cm_id_priv;
3738         struct ib_cm_event cm_event;
3739         enum ib_cm_state state;
3740         int ret;
3741
3742         memset(&cm_event, 0, sizeof cm_event);
3743         cm_id_priv = msg->context[0];
3744
3745         /* Discard old sends or ones without a response. */
3746         spin_lock_irq(&cm_id_priv->lock);
3747         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3748         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3749                 goto discard;
3750
3751         pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
3752                              state, ib_wc_status_msg(wc_status));
3753         switch (state) {
3754         case IB_CM_REQ_SENT:
3755         case IB_CM_MRA_REQ_RCVD:
3756                 cm_reset_to_idle(cm_id_priv);
3757                 cm_event.event = IB_CM_REQ_ERROR;
3758                 break;
3759         case IB_CM_REP_SENT:
3760         case IB_CM_MRA_REP_RCVD:
3761                 cm_reset_to_idle(cm_id_priv);
3762                 cm_event.event = IB_CM_REP_ERROR;
3763                 break;
3764         case IB_CM_DREQ_SENT:
3765                 cm_enter_timewait(cm_id_priv);
3766                 cm_event.event = IB_CM_DREQ_ERROR;
3767                 break;
3768         case IB_CM_SIDR_REQ_SENT:
3769                 cm_id_priv->id.state = IB_CM_IDLE;
3770                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3771                 break;
3772         default:
3773                 goto discard;
3774         }
3775         spin_unlock_irq(&cm_id_priv->lock);
3776         cm_event.param.send_status = wc_status;
3777
3778         /* No other events can occur on the cm_id at this point. */
3779         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3780         cm_free_msg(msg);
3781         if (ret)
3782                 ib_destroy_cm_id(&cm_id_priv->id);
3783         return;
3784 discard:
3785         spin_unlock_irq(&cm_id_priv->lock);
3786         cm_free_msg(msg);
3787 }
3788
3789 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3790                             struct ib_mad_send_wc *mad_send_wc)
3791 {
3792         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3793         struct cm_port *port;
3794         u16 attr_index;
3795
3796         port = mad_agent->context;
3797         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3798                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3799
3800         /*
3801          * If the send was in response to a received message (context[0] is not
3802          * set to a cm_id), and is not a REJ, then it is a send that was
3803          * manually retried.
3804          */
3805         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3806                 msg->retries = 1;
3807
3808         atomic_long_add(1 + msg->retries,
3809                         &port->counter_group[CM_XMIT].counter[attr_index]);
3810         if (msg->retries)
3811                 atomic_long_add(msg->retries,
3812                                 &port->counter_group[CM_XMIT_RETRIES].
3813                                 counter[attr_index]);
3814
3815         switch (mad_send_wc->status) {
3816         case IB_WC_SUCCESS:
3817         case IB_WC_WR_FLUSH_ERR:
3818                 cm_free_msg(msg);
3819                 break;
3820         default:
3821                 if (msg->context[0] && msg->context[1])
3822                         cm_process_send_error(msg, mad_send_wc->status);
3823                 else
3824                         cm_free_msg(msg);
3825                 break;
3826         }
3827 }
3828
3829 static void cm_work_handler(struct work_struct *_work)
3830 {
3831         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3832         int ret;
3833
3834         switch (work->cm_event.event) {
3835         case IB_CM_REQ_RECEIVED:
3836                 ret = cm_req_handler(work);
3837                 break;
3838         case IB_CM_MRA_RECEIVED:
3839                 ret = cm_mra_handler(work);
3840                 break;
3841         case IB_CM_REJ_RECEIVED:
3842                 ret = cm_rej_handler(work);
3843                 break;
3844         case IB_CM_REP_RECEIVED:
3845                 ret = cm_rep_handler(work);
3846                 break;
3847         case IB_CM_RTU_RECEIVED:
3848                 ret = cm_rtu_handler(work);
3849                 break;
3850         case IB_CM_USER_ESTABLISHED:
3851                 ret = cm_establish_handler(work);
3852                 break;
3853         case IB_CM_DREQ_RECEIVED:
3854                 ret = cm_dreq_handler(work);
3855                 break;
3856         case IB_CM_DREP_RECEIVED:
3857                 ret = cm_drep_handler(work);
3858                 break;
3859         case IB_CM_SIDR_REQ_RECEIVED:
3860                 ret = cm_sidr_req_handler(work);
3861                 break;
3862         case IB_CM_SIDR_REP_RECEIVED:
3863                 ret = cm_sidr_rep_handler(work);
3864                 break;
3865         case IB_CM_LAP_RECEIVED:
3866                 ret = cm_lap_handler(work);
3867                 break;
3868         case IB_CM_APR_RECEIVED:
3869                 ret = cm_apr_handler(work);
3870                 break;
3871         case IB_CM_TIMEWAIT_EXIT:
3872                 ret = cm_timewait_handler(work);
3873                 break;
3874         default:
3875                 pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
3876                 ret = -EINVAL;
3877                 break;
3878         }
3879         if (ret)
3880                 cm_free_work(work);
3881 }
3882
3883 static int cm_establish(struct ib_cm_id *cm_id)
3884 {
3885         struct cm_id_private *cm_id_priv;
3886         struct cm_work *work;
3887         unsigned long flags;
3888         int ret = 0;
3889         struct cm_device *cm_dev;
3890
3891         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3892         if (!cm_dev)
3893                 return -ENODEV;
3894
3895         work = kmalloc(sizeof *work, GFP_ATOMIC);
3896         if (!work)
3897                 return -ENOMEM;
3898
3899         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3900         spin_lock_irqsave(&cm_id_priv->lock, flags);
3901         switch (cm_id->state)
3902         {
3903         case IB_CM_REP_SENT:
3904         case IB_CM_MRA_REP_RCVD:
3905                 cm_id->state = IB_CM_ESTABLISHED;
3906                 break;
3907         case IB_CM_ESTABLISHED:
3908                 ret = -EISCONN;
3909                 break;
3910         default:
3911                 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
3912                          be32_to_cpu(cm_id->local_id), cm_id->state);
3913                 ret = -EINVAL;
3914                 break;
3915         }
3916         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3917
3918         if (ret) {
3919                 kfree(work);
3920                 goto out;
3921         }
3922
3923         /*
3924          * The CM worker thread may try to destroy the cm_id before it
3925          * can execute this work item.  To prevent potential deadlock,
3926          * we need to find the cm_id once we're in the context of the
3927          * worker thread, rather than holding a reference on it.
3928          */
3929         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3930         work->local_id = cm_id->local_id;
3931         work->remote_id = cm_id->remote_id;
3932         work->mad_recv_wc = NULL;
3933         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3934
3935         /* Check if the device started its remove_one */
3936         spin_lock_irqsave(&cm.lock, flags);
3937         if (!cm_dev->going_down) {
3938                 queue_delayed_work(cm.wq, &work->work, 0);
3939         } else {
3940                 kfree(work);
3941                 ret = -ENODEV;
3942         }
3943         spin_unlock_irqrestore(&cm.lock, flags);
3944
3945 out:
3946         return ret;
3947 }
3948
3949 static int cm_migrate(struct ib_cm_id *cm_id)
3950 {
3951         struct cm_id_private *cm_id_priv;
3952         struct cm_av tmp_av;
3953         unsigned long flags;
3954         int tmp_send_port_not_ready;
3955         int ret = 0;
3956
3957         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3958         spin_lock_irqsave(&cm_id_priv->lock, flags);
3959         if (cm_id->state == IB_CM_ESTABLISHED &&
3960             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3961              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3962                 cm_id->lap_state = IB_CM_LAP_IDLE;
3963                 /* Swap address vector */
3964                 tmp_av = cm_id_priv->av;
3965                 cm_id_priv->av = cm_id_priv->alt_av;
3966                 cm_id_priv->alt_av = tmp_av;
3967                 /* Swap port send ready state */
3968                 tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3969                 cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3970                 cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3971         } else
3972                 ret = -EINVAL;
3973         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3974
3975         return ret;
3976 }
3977
3978 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3979 {
3980         int ret;
3981
3982         switch (event) {
3983         case IB_EVENT_COMM_EST:
3984                 ret = cm_establish(cm_id);
3985                 break;
3986         case IB_EVENT_PATH_MIG:
3987                 ret = cm_migrate(cm_id);
3988                 break;
3989         default:
3990                 ret = -EINVAL;
3991         }
3992         return ret;
3993 }
3994 EXPORT_SYMBOL(ib_cm_notify);
3995
3996 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3997                             struct ib_mad_send_buf *send_buf,
3998                             struct ib_mad_recv_wc *mad_recv_wc)
3999 {
4000         struct cm_port *port = mad_agent->context;
4001         struct cm_work *work;
4002         enum ib_cm_event_type event;
4003         bool alt_path = false;
4004         u16 attr_id;
4005         int paths = 0;
4006         int going_down = 0;
4007
4008         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
4009         case CM_REQ_ATTR_ID:
4010                 alt_path = cm_req_has_alt_path((struct cm_req_msg *)
4011                                                 mad_recv_wc->recv_buf.mad);
4012                 paths = 1 + (alt_path != 0);
4013                 event = IB_CM_REQ_RECEIVED;
4014                 break;
4015         case CM_MRA_ATTR_ID:
4016                 event = IB_CM_MRA_RECEIVED;
4017                 break;
4018         case CM_REJ_ATTR_ID:
4019                 event = IB_CM_REJ_RECEIVED;
4020                 break;
4021         case CM_REP_ATTR_ID:
4022                 event = IB_CM_REP_RECEIVED;
4023                 break;
4024         case CM_RTU_ATTR_ID:
4025                 event = IB_CM_RTU_RECEIVED;
4026                 break;
4027         case CM_DREQ_ATTR_ID:
4028                 event = IB_CM_DREQ_RECEIVED;
4029                 break;
4030         case CM_DREP_ATTR_ID:
4031                 event = IB_CM_DREP_RECEIVED;
4032                 break;
4033         case CM_SIDR_REQ_ATTR_ID:
4034                 event = IB_CM_SIDR_REQ_RECEIVED;
4035                 break;
4036         case CM_SIDR_REP_ATTR_ID:
4037                 event = IB_CM_SIDR_REP_RECEIVED;
4038                 break;
4039         case CM_LAP_ATTR_ID:
4040                 paths = 1;
4041                 event = IB_CM_LAP_RECEIVED;
4042                 break;
4043         case CM_APR_ATTR_ID:
4044                 event = IB_CM_APR_RECEIVED;
4045                 break;
4046         default:
4047                 ib_free_recv_mad(mad_recv_wc);
4048                 return;
4049         }
4050
4051         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
4052         atomic_long_inc(&port->counter_group[CM_RECV].
4053                         counter[attr_id - CM_ATTR_ID_OFFSET]);
4054
4055         work = kmalloc(sizeof(*work) + sizeof(struct sa_path_rec) * paths,
4056                        GFP_KERNEL);
4057         if (!work) {
4058                 ib_free_recv_mad(mad_recv_wc);
4059                 return;
4060         }
4061
4062         INIT_DELAYED_WORK(&work->work, cm_work_handler);
4063         work->cm_event.event = event;
4064         work->mad_recv_wc = mad_recv_wc;
4065         work->port = port;
4066
4067         /* Check if the device started its remove_one */
4068         spin_lock_irq(&cm.lock);
4069         if (!port->cm_dev->going_down)
4070                 queue_delayed_work(cm.wq, &work->work, 0);
4071         else
4072                 going_down = 1;
4073         spin_unlock_irq(&cm.lock);
4074
4075         if (going_down) {
4076                 kfree(work);
4077                 ib_free_recv_mad(mad_recv_wc);
4078         }
4079 }
4080
4081 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
4082                                 struct ib_qp_attr *qp_attr,
4083                                 int *qp_attr_mask)
4084 {
4085         unsigned long flags;
4086         int ret;
4087
4088         spin_lock_irqsave(&cm_id_priv->lock, flags);
4089         switch (cm_id_priv->id.state) {
4090         case IB_CM_REQ_SENT:
4091         case IB_CM_MRA_REQ_RCVD:
4092         case IB_CM_REQ_RCVD:
4093         case IB_CM_MRA_REQ_SENT:
4094         case IB_CM_REP_RCVD:
4095         case IB_CM_MRA_REP_SENT:
4096         case IB_CM_REP_SENT:
4097         case IB_CM_MRA_REP_RCVD:
4098         case IB_CM_ESTABLISHED:
4099                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
4100                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
4101                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
4102                 if (cm_id_priv->responder_resources)
4103                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
4104                                                     IB_ACCESS_REMOTE_ATOMIC;
4105                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
4106                 qp_attr->port_num = cm_id_priv->av.port->port_num;
4107                 ret = 0;
4108                 break;
4109         default:
4110                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4111                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4112                          cm_id_priv->id.state);
4113                 ret = -EINVAL;
4114                 break;
4115         }
4116         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4117         return ret;
4118 }
4119
4120 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
4121                                struct ib_qp_attr *qp_attr,
4122                                int *qp_attr_mask)
4123 {
4124         unsigned long flags;
4125         int ret;
4126
4127         spin_lock_irqsave(&cm_id_priv->lock, flags);
4128         switch (cm_id_priv->id.state) {
4129         case IB_CM_REQ_RCVD:
4130         case IB_CM_MRA_REQ_SENT:
4131         case IB_CM_REP_RCVD:
4132         case IB_CM_MRA_REP_SENT:
4133         case IB_CM_REP_SENT:
4134         case IB_CM_MRA_REP_RCVD:
4135         case IB_CM_ESTABLISHED:
4136                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
4137                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
4138                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
4139                 qp_attr->path_mtu = cm_id_priv->path_mtu;
4140                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
4141                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
4142                 if (cm_id_priv->qp_type == IB_QPT_RC ||
4143                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
4144                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
4145                                          IB_QP_MIN_RNR_TIMER;
4146                         qp_attr->max_dest_rd_atomic =
4147                                         cm_id_priv->responder_resources;
4148                         qp_attr->min_rnr_timer = 0;
4149                 }
4150                 if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4151                         *qp_attr_mask |= IB_QP_ALT_PATH;
4152                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4153                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4154                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4155                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4156                 }
4157                 ret = 0;
4158                 break;
4159         default:
4160                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4161                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4162                          cm_id_priv->id.state);
4163                 ret = -EINVAL;
4164                 break;
4165         }
4166         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4167         return ret;
4168 }
4169
4170 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
4171                                struct ib_qp_attr *qp_attr,
4172                                int *qp_attr_mask)
4173 {
4174         unsigned long flags;
4175         int ret;
4176
4177         spin_lock_irqsave(&cm_id_priv->lock, flags);
4178         switch (cm_id_priv->id.state) {
4179         /* Allow transition to RTS before sending REP */
4180         case IB_CM_REQ_RCVD:
4181         case IB_CM_MRA_REQ_SENT:
4182
4183         case IB_CM_REP_RCVD:
4184         case IB_CM_MRA_REP_SENT:
4185         case IB_CM_REP_SENT:
4186         case IB_CM_MRA_REP_RCVD:
4187         case IB_CM_ESTABLISHED:
4188                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
4189                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
4190                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
4191                         switch (cm_id_priv->qp_type) {
4192                         case IB_QPT_RC:
4193                         case IB_QPT_XRC_INI:
4194                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4195                                                  IB_QP_MAX_QP_RD_ATOMIC;
4196                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
4197                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
4198                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
4199                                 /* fall through */
4200                         case IB_QPT_XRC_TGT:
4201                                 *qp_attr_mask |= IB_QP_TIMEOUT;
4202                                 qp_attr->timeout = cm_id_priv->av.timeout;
4203                                 break;
4204                         default:
4205                                 break;
4206                         }
4207                         if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
4208                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
4209                                 qp_attr->path_mig_state = IB_MIG_REARM;
4210                         }
4211                 } else {
4212                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
4213                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
4214                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
4215                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
4216                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
4217                         qp_attr->path_mig_state = IB_MIG_REARM;
4218                 }
4219                 ret = 0;
4220                 break;
4221         default:
4222                 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4223                          __func__, be32_to_cpu(cm_id_priv->id.local_id),
4224                          cm_id_priv->id.state);
4225                 ret = -EINVAL;
4226                 break;
4227         }
4228         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
4229         return ret;
4230 }
4231
4232 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
4233                        struct ib_qp_attr *qp_attr,
4234                        int *qp_attr_mask)
4235 {
4236         struct cm_id_private *cm_id_priv;
4237         int ret;
4238
4239         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
4240         switch (qp_attr->qp_state) {
4241         case IB_QPS_INIT:
4242                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
4243                 break;
4244         case IB_QPS_RTR:
4245                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
4246                 break;
4247         case IB_QPS_RTS:
4248                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
4249                 break;
4250         default:
4251                 ret = -EINVAL;
4252                 break;
4253         }
4254         return ret;
4255 }
4256 EXPORT_SYMBOL(ib_cm_init_qp_attr);
4257
4258 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
4259                                char *buf)
4260 {
4261         struct cm_counter_group *group;
4262         struct cm_counter_attribute *cm_attr;
4263
4264         group = container_of(obj, struct cm_counter_group, obj);
4265         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
4266
4267         return sprintf(buf, "%ld\n",
4268                        atomic_long_read(&group->counter[cm_attr->index]));
4269 }
4270
4271 static const struct sysfs_ops cm_counter_ops = {
4272         .show = cm_show_counter
4273 };
4274
4275 static struct kobj_type cm_counter_obj_type = {
4276         .sysfs_ops = &cm_counter_ops,
4277         .default_attrs = cm_counter_default_attrs
4278 };
4279
4280 static void cm_release_port_obj(struct kobject *obj)
4281 {
4282         struct cm_port *cm_port;
4283
4284         cm_port = container_of(obj, struct cm_port, port_obj);
4285         kfree(cm_port);
4286 }
4287
4288 static struct kobj_type cm_port_obj_type = {
4289         .release = cm_release_port_obj
4290 };
4291
4292 static char *cm_devnode(struct device *dev, umode_t *mode)
4293 {
4294         if (mode)
4295                 *mode = 0666;
4296         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
4297 }
4298
4299 struct class cm_class = {
4300         .owner   = THIS_MODULE,
4301         .name    = "infiniband_cm",
4302         .devnode = cm_devnode,
4303 };
4304 EXPORT_SYMBOL(cm_class);
4305
4306 static int cm_create_port_fs(struct cm_port *port)
4307 {
4308         int i, ret;
4309
4310         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
4311                                    &port->cm_dev->device->kobj,
4312                                    "%d", port->port_num);
4313         if (ret) {
4314                 kfree(port);
4315                 return ret;
4316         }
4317
4318         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
4319                 ret = kobject_init_and_add(&port->counter_group[i].obj,
4320                                            &cm_counter_obj_type,
4321                                            &port->port_obj,
4322                                            "%s", counter_group_names[i]);
4323                 if (ret)
4324                         goto error;
4325         }
4326
4327         return 0;
4328
4329 error:
4330         while (i--)
4331                 kobject_put(&port->counter_group[i].obj);
4332         kobject_put(&port->port_obj);
4333         return ret;
4334
4335 }
4336
4337 static void cm_remove_port_fs(struct cm_port *port)
4338 {
4339         int i;
4340
4341         for (i = 0; i < CM_COUNTER_GROUPS; i++)
4342                 kobject_put(&port->counter_group[i].obj);
4343
4344         kobject_put(&port->port_obj);
4345 }
4346
4347 static void cm_add_one(struct ib_device *ib_device)
4348 {
4349         struct cm_device *cm_dev;
4350         struct cm_port *port;
4351         struct ib_mad_reg_req reg_req = {
4352                 .mgmt_class = IB_MGMT_CLASS_CM,
4353                 .mgmt_class_version = IB_CM_CLASS_VERSION,
4354         };
4355         struct ib_port_modify port_modify = {
4356                 .set_port_cap_mask = IB_PORT_CM_SUP
4357         };
4358         unsigned long flags;
4359         int ret;
4360         int count = 0;
4361         u8 i;
4362
4363         cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
4364                          GFP_KERNEL);
4365         if (!cm_dev)
4366                 return;
4367
4368         cm_dev->ib_device = ib_device;
4369         cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
4370         cm_dev->going_down = 0;
4371         cm_dev->device = device_create(&cm_class, &ib_device->dev,
4372                                        MKDEV(0, 0), NULL,
4373                                        "%s", dev_name(&ib_device->dev));
4374         if (IS_ERR(cm_dev->device)) {
4375                 kfree(cm_dev);
4376                 return;
4377         }
4378
4379         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
4380         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4381                 if (!rdma_cap_ib_cm(ib_device, i))
4382                         continue;
4383
4384                 port = kzalloc(sizeof *port, GFP_KERNEL);
4385                 if (!port)
4386                         goto error1;
4387
4388                 cm_dev->port[i-1] = port;
4389                 port->cm_dev = cm_dev;
4390                 port->port_num = i;
4391
4392                 INIT_LIST_HEAD(&port->cm_priv_prim_list);
4393                 INIT_LIST_HEAD(&port->cm_priv_altr_list);
4394
4395                 ret = cm_create_port_fs(port);
4396                 if (ret)
4397                         goto error1;
4398
4399                 port->mad_agent = ib_register_mad_agent(ib_device, i,
4400                                                         IB_QPT_GSI,
4401                                                         &reg_req,
4402                                                         0,
4403                                                         cm_send_handler,
4404                                                         cm_recv_handler,
4405                                                         port,
4406                                                         0);
4407                 if (IS_ERR(port->mad_agent))
4408                         goto error2;
4409
4410                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
4411                 if (ret)
4412                         goto error3;
4413
4414                 count++;
4415         }
4416
4417         if (!count)
4418                 goto free;
4419
4420         ib_set_client_data(ib_device, &cm_client, cm_dev);
4421
4422         write_lock_irqsave(&cm.device_lock, flags);
4423         list_add_tail(&cm_dev->list, &cm.device_list);
4424         write_unlock_irqrestore(&cm.device_lock, flags);
4425         return;
4426
4427 error3:
4428         ib_unregister_mad_agent(port->mad_agent);
4429 error2:
4430         cm_remove_port_fs(port);
4431 error1:
4432         port_modify.set_port_cap_mask = 0;
4433         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4434         while (--i) {
4435                 if (!rdma_cap_ib_cm(ib_device, i))
4436                         continue;
4437
4438                 port = cm_dev->port[i-1];
4439                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4440                 ib_unregister_mad_agent(port->mad_agent);
4441                 cm_remove_port_fs(port);
4442         }
4443 free:
4444         device_unregister(cm_dev->device);
4445         kfree(cm_dev);
4446 }
4447
4448 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4449 {
4450         struct cm_device *cm_dev = client_data;
4451         struct cm_port *port;
4452         struct cm_id_private *cm_id_priv;
4453         struct ib_mad_agent *cur_mad_agent;
4454         struct ib_port_modify port_modify = {
4455                 .clr_port_cap_mask = IB_PORT_CM_SUP
4456         };
4457         unsigned long flags;
4458         int i;
4459
4460         if (!cm_dev)
4461                 return;
4462
4463         write_lock_irqsave(&cm.device_lock, flags);
4464         list_del(&cm_dev->list);
4465         write_unlock_irqrestore(&cm.device_lock, flags);
4466
4467         spin_lock_irq(&cm.lock);
4468         cm_dev->going_down = 1;
4469         spin_unlock_irq(&cm.lock);
4470
4471         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4472                 if (!rdma_cap_ib_cm(ib_device, i))
4473                         continue;
4474
4475                 port = cm_dev->port[i-1];
4476                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4477                 /* Mark all the cm_id's as not valid */
4478                 spin_lock_irq(&cm.lock);
4479                 list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4480                         cm_id_priv->altr_send_port_not_ready = 1;
4481                 list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4482                         cm_id_priv->prim_send_port_not_ready = 1;
4483                 spin_unlock_irq(&cm.lock);
4484                 /*
4485                  * We flush the queue here after the going_down set, this
4486                  * verify that no new works will be queued in the recv handler,
4487                  * after that we can call the unregister_mad_agent
4488                  */
4489                 flush_workqueue(cm.wq);
4490                 spin_lock_irq(&cm.state_lock);
4491                 cur_mad_agent = port->mad_agent;
4492                 port->mad_agent = NULL;
4493                 spin_unlock_irq(&cm.state_lock);
4494                 ib_unregister_mad_agent(cur_mad_agent);
4495                 cm_remove_port_fs(port);
4496         }
4497
4498         device_unregister(cm_dev->device);
4499         kfree(cm_dev);
4500 }
4501
4502 static int __init ib_cm_init(void)
4503 {
4504         int ret;
4505
4506         memset(&cm, 0, sizeof cm);
4507         INIT_LIST_HEAD(&cm.device_list);
4508         rwlock_init(&cm.device_lock);
4509         spin_lock_init(&cm.lock);
4510         spin_lock_init(&cm.state_lock);
4511         cm.listen_service_table = RB_ROOT;
4512         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4513         cm.remote_id_table = RB_ROOT;
4514         cm.remote_qp_table = RB_ROOT;
4515         cm.remote_sidr_table = RB_ROOT;
4516         idr_init(&cm.local_id_table);
4517         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4518         INIT_LIST_HEAD(&cm.timewait_list);
4519
4520         ret = class_register(&cm_class);
4521         if (ret) {
4522                 ret = -ENOMEM;
4523                 goto error1;
4524         }
4525
4526         cm.wq = alloc_workqueue("ib_cm", 0, 1);
4527         if (!cm.wq) {
4528                 ret = -ENOMEM;
4529                 goto error2;
4530         }
4531
4532         ret = ib_register_client(&cm_client);
4533         if (ret)
4534                 goto error3;
4535
4536         return 0;
4537 error3:
4538         destroy_workqueue(cm.wq);
4539 error2:
4540         class_unregister(&cm_class);
4541 error1:
4542         idr_destroy(&cm.local_id_table);
4543         return ret;
4544 }
4545
4546 static void __exit ib_cm_cleanup(void)
4547 {
4548         struct cm_timewait_info *timewait_info, *tmp;
4549
4550         spin_lock_irq(&cm.lock);
4551         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4552                 cancel_delayed_work(&timewait_info->work.work);
4553         spin_unlock_irq(&cm.lock);
4554
4555         ib_unregister_client(&cm_client);
4556         destroy_workqueue(cm.wq);
4557
4558         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4559                 list_del(&timewait_info->list);
4560                 kfree(timewait_info);
4561         }
4562
4563         class_unregister(&cm_class);
4564         idr_destroy(&cm.local_id_table);
4565 }
4566
4567 module_init(ib_cm_init);
4568 module_exit(ib_cm_cleanup);
4569