Statistics
| Branch: | Revision:

iof-bird-daemon / proto / bgp / packets.c @ 852b7062

History | View | Annotate | Download (32 KB)

1
/*
2
 *        BIRD -- BGP Packet Processing
3
 *
4
 *        (c) 2000 Martin Mares <mj@ucw.cz>
5
 *
6
 *        Can be freely distributed and used under the terms of the GNU GPL.
7
 */
8

    
9
#undef LOCAL_DEBUG
10

    
11
#include "nest/bird.h"
12
#include "nest/iface.h"
13
#include "nest/protocol.h"
14
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "nest/mrtdump.h"
17
#include "conf/conf.h"
18
#include "lib/unaligned.h"
19
#include "lib/socket.h"
20

    
21
#include "nest/cli.h"
22

    
23
#include "bgp.h"
24

    
25
static struct rate_limit rl_rcv_update,  rl_snd_update;
26

    
27
/*
28
 * MRT Dump format is not semantically specified.
29
 * We will use these values in appropriate fields:
30
 *
31
 * Local AS, Remote AS - configured AS numbers for given BGP instance.
32
 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
33
 *
34
 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
35
 * changes) and MESSAGE (for received BGP messages).
36
 *
37
 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
38
 * only when AS4 session is established and even in that case MESSAGE
39
 * does not use AS4 variant for initial OPEN message. This strange
40
 * behavior is here for compatibility with Quagga and Bgpdump,
41
 */
42

    
43
static byte *
44
mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
45
{
46
  struct bgp_proto *p = conn->bgp;
47

    
48
  if (as4)
49
    {
50
      put_u32(buf+0, p->remote_as);
51
      put_u32(buf+4, p->local_as);
52
      buf+=8;
53
    }
54
  else
55
    {
56
      put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
57
      put_u16(buf+2, (p->local_as <= 0xFFFF)  ? p->local_as  : AS_TRANS);
58
      buf+=4;
59
    }
60

    
61
  put_u16(buf+0, p->neigh ? p->neigh->iface->index : 0);
62
  put_u16(buf+2, BGP_AF);
63
  buf+=4;
64
  buf = ipa_put_addr(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
65
  buf = ipa_put_addr(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
66

    
67
  return buf;
68
}
69

    
70
static void
71
mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
72
{
73
  byte buf[BGP_MAX_PACKET_LENGTH + 128];
74
  byte *bp = buf + MRTDUMP_HDR_LENGTH;
75
  int as4 = conn->bgp->as4_session;
76

    
77
  bp = mrt_put_bgp4_hdr(bp, conn, as4);
78
  memcpy(bp, pkt, len);
79
  bp += len;
80
  mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
81
                   buf, bp-buf);
82
}
83

    
84
static inline u16
85
convert_state(unsigned state)
86
{
87
  /* Convert state from our BS_* values to values used in MRTDump */
88
  return (state == BS_CLOSE) ? 1 : state + 1;
89
}
90

    
91
void
92
mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
93
{
94
  byte buf[128];
95
  byte *bp = buf + MRTDUMP_HDR_LENGTH;
96

    
97
  bp = mrt_put_bgp4_hdr(bp, conn, 1);
98
  put_u16(bp+0, convert_state(old));
99
  put_u16(bp+2, convert_state(new));
100
  bp += 4;
101
  mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
102
}
103

    
104
static byte *
105
bgp_create_notification(struct bgp_conn *conn, byte *buf)
106
{
107
  struct bgp_proto *p = conn->bgp;
108

    
109
  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
110
  buf[0] = conn->notify_code;
111
  buf[1] = conn->notify_subcode;
112
  memcpy(buf+2, conn->notify_data, conn->notify_size);
113
  return buf + 2 + conn->notify_size;
114
}
115

    
116
#ifdef IPV6
117
static byte *
118
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
119
{
120
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
121
  *buf++ = 4;                /* Capability data length */
122
  *buf++ = 0;                /* We support AF IPv6 */
123
  *buf++ = BGP_AF_IPV6;
124
  *buf++ = 0;                /* RFU */
125
  *buf++ = 1;                /* and SAFI 1 */
126
  return buf;
127
}
128

    
129
#else
130

    
131
static byte *
132
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
133
{
134
  *buf++ = 1;                /* Capability 1: Multiprotocol extensions */
135
  *buf++ = 4;                /* Capability data length */
136
  *buf++ = 0;                /* We support AF IPv4 */
137
  *buf++ = BGP_AF_IPV4;
138
  *buf++ = 0;                /* RFU */
139
  *buf++ = 1;                /* and SAFI 1 */
140
  return buf;
141
}
142
#endif
143

    
144
static byte *
145
bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
146
{
147
  *buf++ = 2;                /* Capability 2: Support for route refresh */
148
  *buf++ = 0;                /* Capability data length */
149
  return buf;
150
}
151

    
152
static byte *
153
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
154
{
155
  *buf++ = 65;                /* Capability 65: Support for 4-octet AS number */
156
  *buf++ = 4;                /* Capability data length */
157
  put_u32(buf, conn->bgp->local_as);
158
  return buf + 4;
159
}
160

    
161
static byte *
162
bgp_create_open(struct bgp_conn *conn, byte *buf)
163
{
164
  struct bgp_proto *p = conn->bgp;
165
  byte *cap;
166
  int cap_len;
167

    
168
  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
169
            BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
170
  buf[0] = BGP_VERSION;
171
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
172
  put_u16(buf+3, p->cf->hold_time);
173
  put_u32(buf+5, p->local_id);
174

    
175
  if (conn->start_state == BSS_CONNECT_NOCAP)
176
    {
177
      BGP_TRACE(D_PACKETS, "Skipping capabilities");
178
      buf[9] = 0;
179
      return buf + 10;
180
    }
181

    
182
  /* Skipped 3 B for length field and Capabilities parameter header */
183
  cap = buf + 12;
184

    
185
#ifndef IPV6
186
  if (p->cf->advertise_ipv4)
187
    cap = bgp_put_cap_ipv4(conn, cap);
188
#endif
189

    
190
#ifdef IPV6
191
  cap = bgp_put_cap_ipv6(conn, cap);
192
#endif
193

    
194
  if (p->cf->enable_refresh)
195
    cap = bgp_put_cap_rr(conn, cap);
196

    
197
  if (conn->want_as4_support)
198
    cap = bgp_put_cap_as4(conn, cap);
199

    
200
  cap_len = cap - buf - 12;
201
  if (cap_len > 0)
202
    {
203
      buf[9]  = cap_len + 2;        /* Optional params len */
204
      buf[10] = 2;                /* Option: Capability list */
205
      buf[11] = cap_len;        /* Option length */
206
      return cap;
207
    }
208
  else
209
    {
210
      buf[9] = 0;                /* No optional parameters */
211
      return buf + 10;
212
    }
213
}
214

    
215
static unsigned int
216
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
217
{
218
  byte *start = w;
219
  ip_addr a;
220
  int bytes;
221

    
222
  while (!EMPTY_LIST(buck->prefixes) && remains >= (1+sizeof(ip_addr)))
223
    {
224
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
225
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
226
      *w++ = px->n.pxlen;
227
      bytes = (px->n.pxlen + 7) / 8;
228
      a = px->n.prefix;
229
      ipa_hton(a);
230
      memcpy(w, &a, bytes);
231
      w += bytes;
232
      remains -= bytes + 1;
233
      rem_node(&px->bucket_node);
234
      fib_delete(&p->prefix_fib, px);
235
    }
236
  return w - start;
237
}
238

    
239
static void
240
bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
241
{
242
  while (!EMPTY_LIST(buck->prefixes))
243
    {
244
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
245
      log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
246
      rem_node(&px->bucket_node);
247
      fib_delete(&p->prefix_fib, px);
248
    }
249
}
250

    
251
#ifndef IPV6                /* IPv4 version */
252

    
253
static byte *
254
bgp_create_update(struct bgp_conn *conn, byte *buf)
255
{
256
  struct bgp_proto *p = conn->bgp;
257
  struct bgp_bucket *buck;
258
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
259
  byte *w;
260
  int wd_size = 0;
261
  int r_size = 0;
262
  int a_size = 0;
263

    
264
  w = buf+2;
265
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
266
    {
267
      DBG("Withdrawn routes:\n");
268
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
269
      w += wd_size;
270
      remains -= wd_size;
271
    }
272
  put_u16(buf, wd_size);
273

    
274
  if (remains >= 3072)
275
    {
276
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
277
        {
278
          if (EMPTY_LIST(buck->prefixes))
279
            {
280
              DBG("Deleting empty bucket %p\n", buck);
281
              rem_node(&buck->send_node);
282
              bgp_free_bucket(p, buck);
283
              continue;
284
            }
285

    
286
          DBG("Processing bucket %p\n", buck);
287
          a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);
288

    
289
          if (a_size < 0)
290
            {
291
              log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
292
              bgp_flush_prefixes(p, buck);
293
              rem_node(&buck->send_node);
294
              bgp_free_bucket(p, buck);
295
              continue;
296
            }
297

    
298
          put_u16(w, a_size);
299
          w += a_size + 2;
300
          r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
301
          w += r_size;
302
          break;
303
        }
304
    }
305
  if (!a_size)                                /* Attributes not already encoded */
306
    {
307
      put_u16(w, 0);
308
      w += 2;
309
    }
310
  if (wd_size || r_size)
311
    {
312
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
313
      return w;
314
    }
315
  else
316
    return NULL;
317
}
318

    
319
#else                /* IPv6 version */
320

    
321
static byte *
322
bgp_create_update(struct bgp_conn *conn, byte *buf)
323
{
324
  struct bgp_proto *p = conn->bgp;
325
  struct bgp_bucket *buck;
326
  int size, second, rem_stored;
327
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
328
  byte *w, *w_stored, *tmp, *tstart;
329
  ip_addr *ipp, ip, ip_ll;
330
  ea_list *ea;
331
  eattr *nh;
332
  neighbor *n;
333

    
334
  put_u16(buf, 0);
335
  w = buf+4;
336

    
337
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
338
    {
339
      DBG("Withdrawn routes:\n");
340
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
341
      *tmp++ = 0;
342
      *tmp++ = BGP_AF_IPV6;
343
      *tmp++ = 1;
344
      ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
345
      size = bgp_encode_attrs(p, w, ea, remains);
346
      ASSERT(size >= 0);
347
      w += size;
348
      remains -= size;
349
    }
350

    
351
  if (remains >= 3072)
352
    {
353
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
354
        {
355
          if (EMPTY_LIST(buck->prefixes))
356
            {
357
              DBG("Deleting empty bucket %p\n", buck);
358
              rem_node(&buck->send_node);
359
              bgp_free_bucket(p, buck);
360
              continue;
361
            }
362

    
363
          DBG("Processing bucket %p\n", buck);
364
          rem_stored = remains;
365
          w_stored = w;
366

    
367
          size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
368
          if (size < 0)
369
            {
370
              log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
371
              bgp_flush_prefixes(p, buck);
372
              rem_node(&buck->send_node);
373
              bgp_free_bucket(p, buck);
374
              continue;
375
            }
376
          w += size;
377
          remains -= size;
378

    
379
          /* We have two addresses here in NEXT_HOP eattr. Really.
380
             Unless NEXT_HOP was modified by filter */
381
          nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
382
          ASSERT(nh);
383
          second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
384
          ipp = (ip_addr *) nh->u.ptr->data;
385
          ip = ipp[0];
386
          ip_ll = IPA_NONE;
387

    
388
          if (ipa_equal(ip, p->source_addr))
389
            ip_ll = p->local_link;
390
          else
391
            {
392
              /* If we send a route with 'third party' next hop destinated 
393
               * in the same interface, we should also send a link local 
394
               * next hop address. We use the received one (stored in the 
395
               * other part of BA_NEXT_HOP eattr). If we didn't received
396
               * it (for example it is a static route), we can't use
397
               * 'third party' next hop and we have to use local IP address
398
               * as next hop. Sending original next hop address without
399
               * link local address seems to be a natural way to solve that
400
               * problem, but it is contrary to RFC 2545 and Quagga does not
401
               * accept such routes.
402
               */
403

    
404
              n = neigh_find(&p->p, &ip, 0);
405
              if (n && p->neigh && n->iface == p->neigh->iface)
406
                {
407
                  if (second && ipa_nonzero(ipp[1]))
408
                    ip_ll = ipp[1];
409
                  else
410
                    {
411
                      switch (p->cf->missing_lladdr)
412
                        {
413
                        case MLL_SELF:
414
                          ip = p->source_addr;
415
                          ip_ll = p->local_link;
416
                          break;
417
                        case MLL_DROP:
418
                          log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
419
                          w = w_stored;
420
                          remains = rem_stored;
421
                          bgp_flush_prefixes(p, buck);
422
                          rem_node(&buck->send_node);
423
                          bgp_free_bucket(p, buck);
424
                          continue;
425
                        case MLL_IGNORE:
426
                          break;
427
                        }
428
                    }
429
                }
430
            }
431

    
432
          tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
433
          *tmp++ = 0;
434
          *tmp++ = BGP_AF_IPV6;
435
          *tmp++ = 1;
436

    
437
          if (ipa_nonzero(ip_ll))
438
            {
439
              *tmp++ = 32;
440
              ipa_hton(ip);
441
              memcpy(tmp, &ip, 16);
442
              ipa_hton(ip_ll);
443
              memcpy(tmp+16, &ip_ll, 16);
444
              tmp += 32;
445
            }
446
          else
447
            {
448
              *tmp++ = 16;
449
              ipa_hton(ip);
450
              memcpy(tmp, &ip, 16);
451
              tmp += 16;
452
            }
453

    
454
          *tmp++ = 0;                        /* No SNPA information */
455
          tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
456
          ea->attrs[0].u.ptr->length = tmp - tstart;
457
          size = bgp_encode_attrs(p, w, ea, remains);
458
          ASSERT(size >= 0);
459
          w += size;
460
          break;
461
        }
462
    }
463

    
464
  size = w - (buf+4);
465
  put_u16(buf+2, size);
466
  lp_flush(bgp_linpool);
467
  if (size)
468
    {
469
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
470
      return w;
471
    }
472
  else
473
    return NULL;
474
}
475

    
476
#endif
477

    
478
static byte *
479
bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
480
{
481
  struct bgp_proto *p = conn->bgp;
482
  BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
483

    
484
  *buf++ = 0;
485
  *buf++ = BGP_AF;
486
  *buf++ = 0;                /* RFU */
487
  *buf++ = 1;                /* and SAFI 1 */
488
  return buf;
489
}
490

    
491
static void
492
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
493
{
494
  memset(buf, 0xff, 16);                /* Marker */
495
  put_u16(buf+16, len);
496
  buf[18] = type;
497
}
498

    
499
/**
500
 * bgp_fire_tx - transmit packets
501
 * @conn: connection
502
 *
503
 * Whenever the transmit buffers of the underlying TCP connection
504
 * are free and we have any packets queued for sending, the socket functions
505
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
506
 * queued (Notification > Keepalive > Open > Update), assembling its header
507
 * and body and sending it to the connection.
508
 */
509
static int
510
bgp_fire_tx(struct bgp_conn *conn)
511
{
512
  struct bgp_proto *p = conn->bgp;
513
  unsigned int s = conn->packets_to_send;
514
  sock *sk = conn->sk;
515
  byte *buf, *pkt, *end;
516
  int type;
517

    
518
  if (!sk)
519
    {
520
      conn->packets_to_send = 0;
521
      return 0;
522
    }
523
  buf = sk->tbuf;
524
  pkt = buf + BGP_HEADER_LENGTH;
525

    
526
  if (s & (1 << PKT_SCHEDULE_CLOSE))
527
    {
528
      /* We can finally close connection and enter idle state */
529
      bgp_conn_enter_idle_state(conn);
530
      return 0;
531
    }
532
  if (s & (1 << PKT_NOTIFICATION))
533
    {
534
      s = 1 << PKT_SCHEDULE_CLOSE;
535
      type = PKT_NOTIFICATION;
536
      end = bgp_create_notification(conn, pkt);
537
    }
538
  else if (s & (1 << PKT_KEEPALIVE))
539
    {
540
      s &= ~(1 << PKT_KEEPALIVE);
541
      type = PKT_KEEPALIVE;
542
      end = pkt;                        /* Keepalives carry no data */
543
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
544
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
545
    }
546
  else if (s & (1 << PKT_OPEN))
547
    {
548
      s &= ~(1 << PKT_OPEN);
549
      type = PKT_OPEN;
550
      end = bgp_create_open(conn, pkt);
551
    }
552
  else if (s & (1 << PKT_ROUTE_REFRESH))
553
    {
554
      s &= ~(1 << PKT_ROUTE_REFRESH);
555
      type = PKT_ROUTE_REFRESH;
556
      end = bgp_create_route_refresh(conn, pkt);
557
    }
558
  else if (s & (1 << PKT_UPDATE))
559
    {
560
      end = bgp_create_update(conn, pkt);
561
      type = PKT_UPDATE;
562
      if (!end)
563
        {
564
          conn->packets_to_send = 0;
565
          return 0;
566
        }
567
    }
568
  else
569
    return 0;
570
  conn->packets_to_send = s;
571
  bgp_create_header(buf, end - buf, type);
572
  return sk_send(sk, end - buf);
573
}
574

    
575
/**
576
 * bgp_schedule_packet - schedule a packet for transmission
577
 * @conn: connection
578
 * @type: packet type
579
 *
580
 * Schedule a packet of type @type to be sent as soon as possible.
581
 */
582
void
583
bgp_schedule_packet(struct bgp_conn *conn, int type)
584
{
585
  DBG("BGP: Scheduling packet type %d\n", type);
586
  conn->packets_to_send |= 1 << type;
587
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
588
    ev_schedule(conn->tx_ev);
589
}
590

    
591
void
592
bgp_kick_tx(void *vconn)
593
{
594
  struct bgp_conn *conn = vconn;
595

    
596
  DBG("BGP: kicking TX\n");
597
  while (bgp_fire_tx(conn) > 0)
598
    ;
599
}
600

    
601
void
602
bgp_tx(sock *sk)
603
{
604
  struct bgp_conn *conn = sk->data;
605

    
606
  DBG("BGP: TX hook\n");
607
  while (bgp_fire_tx(conn) > 0)
608
    ;
609
}
610

    
611
/* Capatibility negotiation as per RFC 2842 */
612

    
613
void
614
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
615
{
616
  // struct bgp_proto *p = conn->bgp;
617
  int cl;
618

    
619
  while (len > 0)
620
    {
621
      if (len < 2 || len < 2 + opt[1])
622
        goto err;
623
      
624
      cl = opt[1];
625

    
626
      switch (opt[0])
627
        {
628
        case 2:        /* Route refresh capability, RFC 2918 */
629
          if (cl != 0)
630
            goto err;
631
          conn->peer_refresh_support = 1;
632
          break;
633

    
634
        case 65: /* AS4 capability, RFC 4893 */ 
635
          if (cl != 4)
636
            goto err;
637
          conn->peer_as4_support = 1;
638
          if (conn->want_as4_support)
639
            conn->advertised_as = get_u32(opt + 2);
640
          break;
641

    
642
          /* We can safely ignore all other capabilities */
643
        }
644
      len -= 2 + cl;
645
      opt += 2 + cl;
646
    }
647
  return;
648

    
649
    err:
650
  bgp_error(conn, 2, 0, NULL, 0);
651
  return;
652
}
653

    
654
static int
655
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
656
{
657
  struct bgp_proto *p = conn->bgp;
658
  int ol;
659

    
660
  while (len > 0)
661
    {
662
      if (len < 2 || len < 2 + opt[1])
663
        { bgp_error(conn, 2, 0, NULL, 0); return 0; }
664
#ifdef LOCAL_DEBUG
665
      {
666
        int i;
667
        DBG("\tOption %02x:", opt[0]);
668
        for(i=0; i<opt[1]; i++)
669
          DBG(" %02x", opt[2+i]);
670
        DBG("\n");
671
      }
672
#endif
673

    
674
      ol = opt[1];
675
      switch (opt[0])
676
        {
677
        case 2:
678
          if (conn->start_state == BSS_CONNECT_NOCAP)
679
            BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
680
          else
681
            bgp_parse_capabilities(conn, opt + 2, ol);
682
          break;
683

    
684
        default:
685
          /*
686
           *  BGP specs don't tell us to send which option
687
           *  we didn't recognize, but it's common practice
688
           *  to do so. Also, capability negotiation with
689
           *  Cisco routers doesn't work without that.
690
           */
691
          bgp_error(conn, 2, 4, opt, ol);
692
          return 0;
693
        }
694
      len -= 2 + ol;
695
      opt += 2 + ol;
696
    }
697
  return 0;
698
}
699

    
700
static void
701
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
702
{
703
  struct bgp_conn *other;
704
  struct bgp_proto *p = conn->bgp;
705
  unsigned hold;
706
  u16 base_as;
707
  u32 id;
708

    
709
  /* Check state */
710
  if (conn->state != BS_OPENSENT)
711
    { bgp_error(conn, 5, 0, NULL, 0); return; }
712

    
713
  /* Check message contents */
714
  if (len < 29 || len != 29 + pkt[28])
715
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
716
  if (pkt[19] != BGP_VERSION)
717
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
718
  conn->advertised_as = base_as = get_u16(pkt+20);
719
  hold = get_u16(pkt+22);
720
  id = get_u32(pkt+24);
721
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
722

    
723
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
724
    return;
725

    
726
  if (hold > 0 && hold < 3)
727
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }
728

    
729
  if (!id || id == 0xffffffff || id == p->local_id)
730
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
731

    
732
  if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
733
    log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
734

    
735
  if (conn->advertised_as != p->remote_as)
736
    {
737
      if (conn->peer_as4_support)
738
        {
739
          u32 val = htonl(conn->advertised_as);
740
          bgp_error(conn, 2, 2, (byte *) &val, 4);
741
        }
742
      else
743
        bgp_error(conn, 2, 2, pkt+20, 2);
744

    
745
      return;
746
    }
747

    
748
  /* Check the other connection */
749
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
750
  switch (other->state)
751
    {
752
    case BS_IDLE:
753
    case BS_CONNECT:
754
    case BS_ACTIVE:
755
    case BS_OPENSENT:
756
    case BS_CLOSE:
757
      break;
758
    case BS_OPENCONFIRM:
759
      if ((p->local_id < id) == (conn == &p->incoming_conn))
760
        {
761
          /* Should close the other connection */
762
          BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
763
          bgp_error(other, 6, 7, NULL, 0);
764
          break;
765
        }
766
      /* Fall thru */
767
    case BS_ESTABLISHED:
768
      /* Should close this connection */
769
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
770
      bgp_error(conn, 6, 7, NULL, 0);
771
      return;
772
    default:
773
      bug("bgp_rx_open: Unknown state");
774
    }
775

    
776
  /* Update our local variables */
777
  conn->hold_time = MIN(hold, p->cf->hold_time);
778
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
779
  p->remote_id = id;
780
  p->as4_session = conn->want_as4_support && conn->peer_as4_support;
781

    
782
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
783

    
784
  bgp_schedule_packet(conn, PKT_KEEPALIVE);
785
  bgp_start_timer(conn->hold_timer, conn->hold_time);
786
  bgp_conn_enter_openconfirm_state(conn);
787
}
788

    
789
#define DECODE_PREFIX(pp, ll) do {                \
790
  int b = *pp++;                                \
791
  int q;                                        \
792
  ll--;                                                \
793
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
794
  q = (b+7) / 8;                                \
795
  if (ll < q) { err=1; goto bad; }                \
796
  memcpy(&prefix, pp, q);                        \
797
  pp += q;                                        \
798
  ll -= q;                                        \
799
  ipa_ntoh(prefix);                                \
800
  prefix = ipa_and(prefix, ipa_mkmask(b));        \
801
  pxlen = b;                                        \
802
} while (0)
803

    
804
static inline int
805
bgp_set_next_hop(struct bgp_proto *p, rta *a)
806
{
807
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
808
  ip_addr nexthop = *(ip_addr *) nh->u.ptr->data;
809

    
810
  if (p->cf->gw_mode == GW_DIRECT)
811
    {
812
      neighbor *ng = neigh_find(&p->p, &nexthop, 0) ? : p->neigh;
813
      if (ng->scope == SCOPE_HOST)
814
        return 0;
815

    
816
      a->dest = RTD_ROUTER;
817
      a->gw = ng->addr;
818
      a->iface = ng->iface;
819
      a->hostentry = NULL;
820
    }
821
  else /* GW_RECURSIVE */
822
    rta_set_recursive_next_hop(p->p.table, a, p->igp_table, &nexthop);
823

    
824
  return 1;
825
}
826

    
827
#ifndef IPV6                /* IPv4 version */
828

    
829
static void
830
bgp_do_rx_update(struct bgp_conn *conn,
831
                 byte *withdrawn, int withdrawn_len,
832
                 byte *nlri, int nlri_len,
833
                 byte *attrs, int attr_len)
834
{
835
  struct bgp_proto *p = conn->bgp;
836
  rta *a0;
837
  rta *a = NULL;
838
  ip_addr prefix;
839
  net *n;
840
  int err = 0, pxlen;
841

    
842
  /* Withdraw routes */
843
  while (withdrawn_len)
844
    {
845
      DECODE_PREFIX(withdrawn, withdrawn_len);
846
      DBG("Withdraw %I/%d\n", prefix, pxlen);
847
      if (n = net_find(p->p.table, prefix, pxlen))
848
        rte_update(p->p.table, n, &p->p, &p->p, NULL);
849
    }
850

    
851
  if (!attr_len && !nlri_len)                /* shortcut */
852
    return;
853

    
854
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
855
  if (a0 && nlri_len && bgp_set_next_hop(p, a0))
856
    {
857
      a = rta_lookup(a0);
858
      while (nlri_len)
859
        {
860
          rte *e;
861
          DECODE_PREFIX(nlri, nlri_len);
862
          DBG("Add %I/%d\n", prefix, pxlen);
863
          e = rte_get_temp(rta_clone(a));
864
          n = net_get(p->p.table, prefix, pxlen);
865
          e->net = n;
866
          e->pflags = 0;
867
          rte_update(p->p.table, n, &p->p, &p->p, e);
868
          if (bgp_apply_limits(p) < 0)
869
            goto bad2;
870
        }
871
      rta_free(a);
872
    }
873

    
874
  return;
875

    
876
 bad:
877
  bgp_error(conn, 3, err, NULL, 0);
878
 bad2:
879
  if (a)
880
    rta_free(a);
881
  return;
882
}
883

    
884
#else                        /* IPv6 version */
885

    
886
#define DO_NLRI(name)                                        \
887
  start = x = p->name##_start;                                \
888
  len = len0 = p->name##_len;                                \
889
  if (len)                                                \
890
    {                                                        \
891
      if (len < 3) goto bad;                                \
892
      af = get_u16(x);                                        \
893
      sub = x[2];                                        \
894
      x += 3;                                                \
895
      len -= 3;                                                \
896
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
897
    }                                                        \
898
  else                                                        \
899
    af = 0;                                                \
900
  if (af == BGP_AF_IPV6)
901

    
902
static void
903
bgp_do_rx_update(struct bgp_conn *conn,
904
                 byte *withdrawn, int withdrawn_len,
905
                 byte *nlri, int nlri_len,
906
                 byte *attrs, int attr_len)
907
{
908
  struct bgp_proto *p = conn->bgp;
909
  byte *start, *x;
910
  int len, len0;
911
  unsigned af, sub;
912
  rta *a0;
913
  rta *a = NULL;
914
  ip_addr prefix;
915
  net *n;
916
  int err = 0, pxlen;
917

    
918
  p->mp_reach_len = 0;
919
  p->mp_unreach_len = 0;
920
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
921
  if (!a0)
922
    return;
923

    
924
  DO_NLRI(mp_unreach)
925
    {
926
      while (len)
927
        {
928
          DECODE_PREFIX(x, len);
929
          DBG("Withdraw %I/%d\n", prefix, pxlen);
930
          if (n = net_find(p->p.table, prefix, pxlen))
931
            rte_update(p->p.table, n, &p->p, &p->p, NULL);
932
        }
933
    }
934

    
935
  DO_NLRI(mp_reach)
936
    {
937
      /* Create fake NEXT_HOP attribute */
938
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
939
        goto bad;
940

    
941
      ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
942
      memcpy(nh, x+1, 16);
943
      ipa_ntoh(nh[0]);
944

    
945
      /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
946
      if (*x == 32)
947
        {
948
          memcpy(nh+1, x+17, 16);
949
          ipa_ntoh(nh[1]);
950
        }
951
      else
952
        nh[1] = IPA_NONE;
953

    
954
      /* Also ignore one reserved byte */
955
      len -= *x + 2;
956
      x += *x + 2;
957

    
958
      if (bgp_get_nexthop(p, a0))
959
        {
960
          a = rta_lookup(a0);
961
          while (len)
962
            {
963
              rte *e;
964
              DECODE_PREFIX(x, len);
965
              DBG("Add %I/%d\n", prefix, pxlen);
966
              e = rte_get_temp(rta_clone(a));
967
              n = net_get(p->p.table, prefix, pxlen);
968
              e->net = n;
969
              e->pflags = 0;
970
              rte_update(p->p.table, n, &p->p, &p->p, e);
971
              if (bgp_apply_limits(p) < 0)
972
                goto bad2;
973
            }
974
          rta_free(a);
975
        }
976
    }
977

    
978
  return;
979

    
980
 bad:
981
  bgp_error(conn, 3, 9, start, len0);
982
 bad2:
983
  if (a)
984
    rta_free(a);
985
  return;
986
}
987

    
988
#endif
989

    
990
static void
991
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
992
{
993
  struct bgp_proto *p = conn->bgp;
994
  byte *withdrawn, *attrs, *nlri;
995
  int withdrawn_len, attr_len, nlri_len;
996

    
997
  BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
998

    
999
  /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1000
  if (conn->state == BS_OPENCONFIRM)
1001
    bgp_conn_enter_established_state(conn);
1002

    
1003
  if (conn->state != BS_ESTABLISHED)
1004
    { bgp_error(conn, 5, 0, NULL, 0); return; }
1005
  bgp_start_timer(conn->hold_timer, conn->hold_time);
1006

    
1007
  /* Find parts of the packet and check sizes */
1008
  if (len < 23)
1009
    {
1010
      bgp_error(conn, 1, 2, pkt+16, 2);
1011
      return;
1012
    }
1013
  withdrawn = pkt + 21;
1014
  withdrawn_len = get_u16(pkt + 19);
1015
  if (withdrawn_len + 23 > len)
1016
    goto malformed;
1017
  attrs = withdrawn + withdrawn_len + 2;
1018
  attr_len = get_u16(attrs - 2);
1019
  if (withdrawn_len + attr_len + 23 > len)
1020
    goto malformed;
1021
  nlri = attrs + attr_len;
1022
  nlri_len = len - withdrawn_len - attr_len - 23;
1023
  if (!attr_len && nlri_len)
1024
    goto malformed;
1025
  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1026

    
1027
  lp_flush(bgp_linpool);
1028

    
1029
  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1030
  return;
1031

    
1032
malformed:
1033
  bgp_error(conn, 3, 1, NULL, 0);
1034
}
1035

    
1036
static struct {
1037
  byte major, minor;
1038
  byte *msg;
1039
} bgp_msg_table[] = {
1040
  { 1, 0, "Invalid message header" },
1041
  { 1, 1, "Connection not synchronized" },
1042
  { 1, 2, "Bad message length" },
1043
  { 1, 3, "Bad message type" },
1044
  { 2, 0, "Invalid OPEN message" },
1045
  { 2, 1, "Unsupported version number" },
1046
  { 2, 2, "Bad peer AS" },
1047
  { 2, 3, "Bad BGP identifier" },
1048
  { 2, 4, "Unsupported optional parameter" },
1049
  { 2, 5, "Authentication failure" },
1050
  { 2, 6, "Unacceptable hold time" },
1051
  { 2, 7, "Required capability missing" }, /* [RFC3392] */
1052
  { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1053
  { 3, 0, "Invalid UPDATE message" },
1054
  { 3, 1, "Malformed attribute list" },
1055
  { 3, 2, "Unrecognized well-known attribute" },
1056
  { 3, 3, "Missing mandatory attribute" },
1057
  { 3, 4, "Invalid attribute flags" },
1058
  { 3, 5, "Invalid attribute length" },
1059
  { 3, 6, "Invalid ORIGIN attribute" },
1060
  { 3, 7, "AS routing loop" },                /* Deprecated */
1061
  { 3, 8, "Invalid NEXT_HOP attribute" },
1062
  { 3, 9, "Optional attribute error" },
1063
  { 3, 10, "Invalid network field" },
1064
  { 3, 11, "Malformed AS_PATH" },
1065
  { 4, 0, "Hold timer expired" },
1066
  { 5, 0, "Finite state machine error" },
1067
  { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1068
  { 6, 1, "Maximum number of prefixes reached" },
1069
  { 6, 2, "Administrative shutdown" },
1070
  { 6, 3, "Peer de-configured" },
1071
  { 6, 4, "Administrative reset" },
1072
  { 6, 5, "Connection rejected" },
1073
  { 6, 6, "Other configuration change" },
1074
  { 6, 7, "Connection collision resolution" },
1075
  { 6, 8, "Out of Resources" }
1076
};
1077

    
1078
/**
1079
 * bgp_error_dsc - return BGP error description
1080
 * @code: BGP error code
1081
 * @subcode: BGP error subcode
1082
 *
1083
 * bgp_error_dsc() returns error description for BGP errors
1084
 * which might be static string or given temporary buffer.
1085
 */
1086
const char *
1087
bgp_error_dsc(unsigned code, unsigned subcode)
1088
{
1089
  static char buff[32];
1090
  unsigned i;
1091
  for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1092
    if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1093
      {
1094
        return bgp_msg_table[i].msg;
1095
      }
1096

    
1097
  bsprintf(buff, "Unknown error %d.%d", code, subcode);
1098
  return buff;
1099
}
1100

    
1101
void
1102
bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1103
{
1104
  const byte *name;
1105
  byte *t, argbuf[36];
1106
  unsigned i;
1107

    
1108
  /* Don't report Cease messages generated by myself */
1109
  if (code == 6 && class == BE_BGP_TX)
1110
    return;
1111

    
1112
  name = bgp_error_dsc(code, subcode);
1113
  t = argbuf;
1114
  if (len)
1115
    {
1116
      *t++ = ':';
1117
      *t++ = ' ';
1118

    
1119
      if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1120
        {
1121
          /* Bad peer AS - we would like to print the AS */
1122
          t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
1123
          goto done;
1124
        }
1125
      if (len > 16)
1126
        len = 16;
1127
      for (i=0; i<len; i++)
1128
        t += bsprintf(t, "%02x", data[i]);
1129
    }
1130
 done:
1131
  *t = 0;
1132
  log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1133
}
1134

    
1135
static void
1136
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
1137
{
1138
  struct bgp_proto *p = conn->bgp;
1139
  if (len < 21)
1140
    {
1141
      bgp_error(conn, 1, 2, pkt+16, 2);
1142
      return;
1143
    }
1144

    
1145
  unsigned code = pkt[19];
1146
  unsigned subcode = pkt[20];
1147
  int err = (code != 6);
1148

    
1149
  bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1150
  bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1151

    
1152
#ifndef IPV6
1153
  if ((code == 2) && ((subcode == 4) || (subcode == 7))
1154
      /* Error related to capability:
1155
       * 4 - Peer does not support capabilities at all.
1156
       * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1157
       */
1158
      && (p->cf->capabilities == 2)
1159
      /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1160
      && (conn->start_state == BSS_CONNECT)
1161
      /* Failed connection attempt have used capabilities */
1162
      && (p->cf->remote_as <= 0xFFFF))
1163
      /* Not possible with disabled capabilities */
1164
    {
1165
      /* We try connect without capabilities */
1166
      log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1167
      p->start_state = BSS_CONNECT_NOCAP;
1168
      err = 0;
1169
    }
1170
#endif
1171

    
1172
  bgp_conn_enter_close_state(conn);
1173
  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1174

    
1175
  if (err) 
1176
    {
1177
      bgp_update_startup_delay(p);
1178
      bgp_stop(p, 0);
1179
    }
1180
}
1181

    
1182
static void
1183
bgp_rx_keepalive(struct bgp_conn *conn)
1184
{
1185
  struct bgp_proto *p = conn->bgp;
1186

    
1187
  BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1188
  bgp_start_timer(conn->hold_timer, conn->hold_time);
1189
  switch (conn->state)
1190
    {
1191
    case BS_OPENCONFIRM:
1192
      bgp_conn_enter_established_state(conn);
1193
      break;
1194
    case BS_ESTABLISHED:
1195
      break;
1196
    default:
1197
      bgp_error(conn, 5, 0, NULL, 0);
1198
    }
1199
}
1200

    
1201
static void
1202
bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len)
1203
{
1204
  struct bgp_proto *p = conn->bgp;
1205

    
1206
  BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1207

    
1208
  if (conn->state != BS_ESTABLISHED)
1209
    { bgp_error(conn, 5, 0, NULL, 0); return; }
1210

    
1211
  if (!p->cf->enable_refresh)
1212
    { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1213

    
1214
  if (len != (BGP_HEADER_LENGTH + 4))
1215
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1216

    
1217
  /* FIXME - we ignore AFI/SAFI values, as we support
1218
     just one value and even an error code for an invalid
1219
     request is not defined */
1220

    
1221
  proto_request_feeding(&p->p);
1222
}
1223

    
1224

    
1225
/**
1226
 * bgp_rx_packet - handle a received packet
1227
 * @conn: BGP connection
1228
 * @pkt: start of the packet
1229
 * @len: packet size
1230
 *
1231
 * bgp_rx_packet() takes a newly received packet and calls the corresponding
1232
 * packet handler according to the packet type.
1233
 */
1234
static void
1235
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1236
{
1237
  byte type = pkt[18];
1238

    
1239
  DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1240

    
1241
  if (conn->bgp->p.mrtdump & MD_MESSAGES)
1242
    mrt_dump_bgp_packet(conn, pkt, len);
1243

    
1244
  switch (type)
1245
    {
1246
    case PKT_OPEN:                return bgp_rx_open(conn, pkt, len);
1247
    case PKT_UPDATE:                return bgp_rx_update(conn, pkt, len);
1248
    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
1249
    case PKT_KEEPALIVE:                return bgp_rx_keepalive(conn);
1250
    case PKT_ROUTE_REFRESH:        return bgp_rx_route_refresh(conn, pkt, len);
1251
    default:                        bgp_error(conn, 1, 3, pkt+18, 1);
1252
    }
1253
}
1254

    
1255
/**
1256
 * bgp_rx - handle received data
1257
 * @sk: socket
1258
 * @size: amount of data received
1259
 *
1260
 * bgp_rx() is called by the socket layer whenever new data arrive from
1261
 * the underlying TCP connection. It assembles the data fragments to packets,
1262
 * checks their headers and framing and passes complete packets to
1263
 * bgp_rx_packet().
1264
 */
1265
int
1266
bgp_rx(sock *sk, int size)
1267
{
1268
  struct bgp_conn *conn = sk->data;
1269
  byte *pkt_start = sk->rbuf;
1270
  byte *end = pkt_start + size;
1271
  unsigned i, len;
1272

    
1273
  DBG("BGP: RX hook: Got %d bytes\n", size);
1274
  while (end >= pkt_start + BGP_HEADER_LENGTH)
1275
    {
1276
      if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1277
        return 0;
1278
      for(i=0; i<16; i++)
1279
        if (pkt_start[i] != 0xff)
1280
          {
1281
            bgp_error(conn, 1, 1, NULL, 0);
1282
            break;
1283
          }
1284
      len = get_u16(pkt_start+16);
1285
      if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
1286
        {
1287
          bgp_error(conn, 1, 2, pkt_start+16, 2);
1288
          break;
1289
        }
1290
      if (end < pkt_start + len)
1291
        break;
1292
      bgp_rx_packet(conn, pkt_start, len);
1293
      pkt_start += len;
1294
    }
1295
  if (pkt_start != sk->rbuf)
1296
    {
1297
      memmove(sk->rbuf, pkt_start, end - pkt_start);
1298
      sk->rpos = sk->rbuf + (end - pkt_start);
1299
    }
1300
  return 0;
1301
}