Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ c8cafc8e

History | View | Annotate | Download (46.3 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#ifndef _GNU_SOURCE
13
#define _GNU_SOURCE
14
#endif
15

    
16
#include <stdio.h>
17
#include <stdlib.h>
18
#include <time.h>
19
#include <sys/time.h>
20
#include <sys/types.h>
21
#include <sys/socket.h>
22
#include <sys/uio.h>
23
#include <sys/un.h>
24
#include <poll.h>
25
#include <unistd.h>
26
#include <fcntl.h>
27
#include <errno.h>
28
#include <net/if.h>
29
#include <netinet/in.h>
30
#include <netinet/tcp.h>
31
#include <netinet/udp.h>
32
#include <netinet/icmp6.h>
33

    
34
#include "nest/bird.h"
35
#include "lib/lists.h"
36
#include "lib/resource.h"
37
#include "lib/timer.h"
38
#include "lib/socket.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "nest/iface.h"
42

    
43
#include "lib/unix.h"
44
#include "lib/sysio.h"
45

    
46
/* Maximum number of calls of tx handler for one socket in one
47
 * poll iteration. Should be small enough to not monopolize CPU by
48
 * one protocol instance.
49
 */
50
#define MAX_STEPS 4
51

    
52
/* Maximum number of calls of rx handler for all sockets in one poll
53
   iteration. RX callbacks are often much more costly so we limit
54
   this to gen small latencies */
55
#define MAX_RX_STEPS 4
56

    
57
/*
58
 *        Tracked Files
59
 */
60

    
61
struct rfile {
62
  resource r;
63
  FILE *f;
64
};
65

    
66
static void
67
rf_free(resource *r)
68
{
69
  struct rfile *a = (struct rfile *) r;
70

    
71
  fclose(a->f);
72
}
73

    
74
static void
75
rf_dump(resource *r)
76
{
77
  struct rfile *a = (struct rfile *) r;
78

    
79
  debug("(FILE *%p)\n", a->f);
80
}
81

    
82
static struct resclass rf_class = {
83
  "FILE",
84
  sizeof(struct rfile),
85
  rf_free,
86
  rf_dump,
87
  NULL,
88
  NULL
89
};
90

    
91
void *
92
tracked_fopen(pool *p, char *name, char *mode)
93
{
94
  FILE *f = fopen(name, mode);
95

    
96
  if (f)
97
    {
98
      struct rfile *r = ralloc(p, &rf_class);
99
      r->f = f;
100
    }
101
  return f;
102
}
103

    
104
/**
105
 * DOC: Timers
106
 *
107
 * Timers are resources which represent a wish of a module to call
108
 * a function at the specified time. The platform dependent code
109
 * doesn't guarantee exact timing, only that a timer function
110
 * won't be called before the requested time.
111
 *
112
 * In BIRD, time is represented by values of the &bird_clock_t type
113
 * which are integral numbers interpreted as a relative number of seconds since
114
 * some fixed time point in past. The current time can be read
115
 * from variable @now with reasonable accuracy and is monotonic. There is also
116
 * a current 'absolute' time in variable @now_real reported by OS.
117
 *
118
 * Each timer is described by a &timer structure containing a pointer
119
 * to the handler function (@hook), data private to this function (@data),
120
 * time the function should be called at (@expires, 0 for inactive timers),
121
 * for the other fields see |timer.h|.
122
 */
123

    
124
#define NEAR_TIMER_LIMIT 4
125

    
126
static list near_timers, far_timers;
127
static bird_clock_t first_far_timer = TIME_INFINITY;
128

    
129
/* now must be different from 0, because 0 is a special value in timer->expires */
130
bird_clock_t now = 1, now_real, boot_time;
131

    
132
static void
133
update_times_plain(void)
134
{
135
  bird_clock_t new_time = time(NULL);
136
  int delta = new_time - now_real;
137

    
138
  if ((delta >= 0) && (delta < 60))
139
    now += delta;
140
  else if (now_real != 0)
141
   log(L_WARN "Time jump, delta %d s", delta);
142

    
143
  now_real = new_time;
144
}
145

    
146
static void
147
update_times_gettime(void)
148
{
149
  struct timespec ts;
150
  int rv;
151

    
152
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
153
  if (rv != 0)
154
    die("clock_gettime: %m");
155

    
156
  if (ts.tv_sec != now) {
157
    if (ts.tv_sec < now)
158
      log(L_ERR "Monotonic timer is broken");
159

    
160
    now = ts.tv_sec;
161
    now_real = time(NULL);
162
  }
163
}
164

    
165
static int clock_monotonic_available;
166

    
167
static inline void
168
update_times(void)
169
{
170
  if (clock_monotonic_available)
171
    update_times_gettime();
172
  else
173
    update_times_plain();
174
}
175

    
176
static inline void
177
init_times(void)
178
{
179
 struct timespec ts;
180
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
181
 if (!clock_monotonic_available)
182
   log(L_WARN "Monotonic timer is missing");
183
}
184

    
185

    
186
static void
187
tm_free(resource *r)
188
{
189
  timer *t = (timer *) r;
190

    
191
  tm_stop(t);
192
}
193

    
194
static void
195
tm_dump(resource *r)
196
{
197
  timer *t = (timer *) r;
198

    
199
  debug("(code %p, data %p, ", t->hook, t->data);
200
  if (t->randomize)
201
    debug("rand %d, ", t->randomize);
202
  if (t->recurrent)
203
    debug("recur %d, ", t->recurrent);
204
  if (t->expires)
205
    debug("expires in %d sec)\n", t->expires - now);
206
  else
207
    debug("inactive)\n");
208
}
209

    
210
static struct resclass tm_class = {
211
  "Timer",
212
  sizeof(timer),
213
  tm_free,
214
  tm_dump,
215
  NULL,
216
  NULL
217
};
218

    
219
/**
220
 * tm_new - create a timer
221
 * @p: pool
222
 *
223
 * This function creates a new timer resource and returns
224
 * a pointer to it. To use the timer, you need to fill in
225
 * the structure fields and call tm_start() to start timing.
226
 */
227
timer *
228
tm_new(pool *p)
229
{
230
  timer *t = ralloc(p, &tm_class);
231
  return t;
232
}
233

    
234
static inline void
235
tm_insert_near(timer *t)
236
{
237
  node *n = HEAD(near_timers);
238

    
239
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
240
    n = n->next;
241
  insert_node(&t->n, n->prev);
242
}
243

    
244
/**
245
 * tm_start - start a timer
246
 * @t: timer
247
 * @after: number of seconds the timer should be run after
248
 *
249
 * This function schedules the hook function of the timer to
250
 * be called after @after seconds. If the timer has been already
251
 * started, it's @expire time is replaced by the new value.
252
 *
253
 * You can have set the @randomize field of @t, the timeout
254
 * will be increased by a random number of seconds chosen
255
 * uniformly from range 0 .. @randomize.
256
 *
257
 * You can call tm_start() from the handler function of the timer
258
 * to request another run of the timer. Also, you can set the @recurrent
259
 * field to have the timer re-added automatically with the same timeout.
260
 */
261
void
262
tm_start(timer *t, unsigned after)
263
{
264
  bird_clock_t when;
265

    
266
  if (t->randomize)
267
    after += random() % (t->randomize + 1);
268
  when = now + after;
269
  if (t->expires == when)
270
    return;
271
  if (t->expires)
272
    rem_node(&t->n);
273
  t->expires = when;
274
  if (after <= NEAR_TIMER_LIMIT)
275
    tm_insert_near(t);
276
  else
277
    {
278
      if (!first_far_timer || first_far_timer > when)
279
        first_far_timer = when;
280
      add_tail(&far_timers, &t->n);
281
    }
282
}
283

    
284
/**
285
 * tm_stop - stop a timer
286
 * @t: timer
287
 *
288
 * This function stops a timer. If the timer is already stopped,
289
 * nothing happens.
290
 */
291
void
292
tm_stop(timer *t)
293
{
294
  if (t->expires)
295
    {
296
      rem_node(&t->n);
297
      t->expires = 0;
298
    }
299
}
300

    
301
static void
302
tm_dump_them(char *name, list *l)
303
{
304
  node *n;
305
  timer *t;
306

    
307
  debug("%s timers:\n", name);
308
  WALK_LIST(n, *l)
309
    {
310
      t = SKIP_BACK(timer, n, n);
311
      debug("%p ", t);
312
      tm_dump(&t->r);
313
    }
314
  debug("\n");
315
}
316

    
317
void
318
tm_dump_all(void)
319
{
320
  tm_dump_them("Near", &near_timers);
321
  tm_dump_them("Far", &far_timers);
322
}
323

    
324
static inline time_t
325
tm_first_shot(void)
326
{
327
  time_t x = first_far_timer;
328

    
329
  if (!EMPTY_LIST(near_timers))
330
    {
331
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
332
      if (t->expires < x)
333
        x = t->expires;
334
    }
335
  return x;
336
}
337

    
338
void io_log_event(void *hook, void *data);
339

    
340
static void
341
tm_shot(void)
342
{
343
  timer *t;
344
  node *n, *m;
345

    
346
  if (first_far_timer <= now)
347
    {
348
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
349
      first_far_timer = TIME_INFINITY;
350
      n = HEAD(far_timers);
351
      while (m = n->next)
352
        {
353
          t = SKIP_BACK(timer, n, n);
354
          if (t->expires <= limit)
355
            {
356
              rem_node(n);
357
              tm_insert_near(t);
358
            }
359
          else if (t->expires < first_far_timer)
360
            first_far_timer = t->expires;
361
          n = m;
362
        }
363
    }
364
  while ((n = HEAD(near_timers)) -> next)
365
    {
366
      int delay;
367
      t = SKIP_BACK(timer, n, n);
368
      if (t->expires > now)
369
        break;
370
      rem_node(n);
371
      delay = t->expires - now;
372
      t->expires = 0;
373
      if (t->recurrent)
374
        {
375
          int i = t->recurrent - delay;
376
          if (i < 0)
377
            i = 0;
378
          tm_start(t, i);
379
        }
380
      io_log_event(t->hook, t->data);
381
      t->hook(t);
382
    }
383
}
384

    
385
/**
386
 * tm_parse_datetime - parse a date and time
387
 * @x: datetime string
388
 *
389
 * tm_parse_datetime() takes a textual representation of
390
 * a date and time (dd-mm-yyyy hh:mm:ss)
391
 * and converts it to the corresponding value of type &bird_clock_t.
392
 */
393
bird_clock_t
394
tm_parse_datetime(char *x)
395
{
396
  struct tm tm;
397
  int n;
398
  time_t t;
399

    
400
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
401
    return tm_parse_date(x);
402
  tm.tm_mon--;
403
  tm.tm_year -= 1900;
404
  t = mktime(&tm);
405
  if (t == (time_t) -1)
406
    return 0;
407
  return t;
408
}
409
/**
410
 * tm_parse_date - parse a date
411
 * @x: date string
412
 *
413
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
414
 * and converts it to the corresponding value of type &bird_clock_t.
415
 */
416
bird_clock_t
417
tm_parse_date(char *x)
418
{
419
  struct tm tm;
420
  int n;
421
  time_t t;
422

    
423
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
424
    return 0;
425
  tm.tm_mon--;
426
  tm.tm_year -= 1900;
427
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
428
  t = mktime(&tm);
429
  if (t == (time_t) -1)
430
    return 0;
431
  return t;
432
}
433

    
434
static void
435
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
436
{
437
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
438
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
439

    
440
  if (delta < 20*3600)
441
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
442
  else if (delta < 360*86400)
443
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
444
  else
445
    bsprintf(x, "%d", tm->tm_year+1900);
446
}
447

    
448
#include "conf/conf.h"
449

    
450
/**
451
 * tm_format_datetime - convert date and time to textual representation
452
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
453
 * @fmt_spec: specification of resulting textual representation of the time
454
 * @t: time
455
 *
456
 * This function formats the given relative time value @t to a textual
457
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
458
 */
459
void
460
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
461
{
462
  const char *fmt_used;
463
  struct tm *tm;
464
  bird_clock_t delta = now - t;
465
  t = now_real - delta;
466
  tm = localtime(&t);
467

    
468
  if (fmt_spec->fmt1 == NULL)
469
    return tm_format_reltime(x, tm, delta);
470

    
471
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
472
    fmt_used = fmt_spec->fmt1;
473
  else
474
    fmt_used = fmt_spec->fmt2;
475

    
476
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
477
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
478
    strcpy(x, "<too-long>");
479
}
480

    
481

    
482
/**
483
 * DOC: Sockets
484
 *
485
 * Socket resources represent network connections. Their data structure (&socket)
486
 * contains a lot of fields defining the exact type of the socket, the local and
487
 * remote addresses and ports, pointers to socket buffers and finally pointers to
488
 * hook functions to be called when new data have arrived to the receive buffer
489
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
490
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
491
 *
492
 * Freeing of sockets from inside socket hooks is perfectly safe.
493
 */
494

    
495
#ifndef SOL_IP
496
#define SOL_IP IPPROTO_IP
497
#endif
498

    
499
#ifndef SOL_IPV6
500
#define SOL_IPV6 IPPROTO_IPV6
501
#endif
502

    
503
#ifndef SOL_ICMPV6
504
#define SOL_ICMPV6 IPPROTO_ICMPV6
505
#endif
506

    
507

    
508
/*
509
 *        Sockaddr helper functions
510
 */
511

    
512
static inline int UNUSED sockaddr_length(int af)
513
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
514

    
515
static inline void
516
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
517
{
518
  memset(sa, 0, sizeof(struct sockaddr_in));
519
#ifdef HAVE_SIN_LEN
520
  sa->sin_len = sizeof(struct sockaddr_in);
521
#endif
522
  sa->sin_family = AF_INET;
523
  sa->sin_port = htons(port);
524
  sa->sin_addr = ipa_to_in4(a);
525
}
526

    
527
static inline void
528
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
529
{
530
  memset(sa, 0, sizeof(struct sockaddr_in6));
531
#ifdef SIN6_LEN
532
  sa->sin6_len = sizeof(struct sockaddr_in6);
533
#endif
534
  sa->sin6_family = AF_INET6;
535
  sa->sin6_port = htons(port);
536
  sa->sin6_flowinfo = 0;
537
  sa->sin6_addr = ipa_to_in6(a);
538

    
539
  if (ifa && ipa_is_link_local(a))
540
    sa->sin6_scope_id = ifa->index;
541
}
542

    
543
void
544
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
545
{
546
  if (af == AF_INET)
547
    sockaddr_fill4((struct sockaddr_in *) sa, a, port);
548
  else if (af == AF_INET6)
549
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
550
  else
551
    bug("Unknown AF");
552
}
553

    
554
static inline void
555
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
556
{
557
  *port = ntohs(sa->sin_port);
558
  *a = ipa_from_in4(sa->sin_addr);
559
}
560

    
561
static inline void
562
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
563
{
564
  *port = ntohs(sa->sin6_port);
565
  *a = ipa_from_in6(sa->sin6_addr);
566

    
567
  if (ifa && ipa_is_link_local(*a))
568
    *ifa = if_find_by_index(sa->sin6_scope_id);
569
}
570

    
571
int
572
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
573
{
574
  if (sa->sa.sa_family != af)
575
    goto fail;
576

    
577
  if (af == AF_INET)
578
    sockaddr_read4((struct sockaddr_in *) sa, a, port);
579
  else if (af == AF_INET6)
580
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
581
  else
582
    goto fail;
583

    
584
  return 0;
585

    
586
 fail:
587
  *a = IPA_NONE;
588
  *port = 0;
589
  return -1;
590
}
591

    
592

    
593
/*
594
 *        IPv6 multicast syscalls
595
 */
596

    
597
/* Fortunately standardized in RFC 3493 */
598

    
599
#define INIT_MREQ6(maddr,ifa) \
600
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
601

    
602
static inline int
603
sk_setup_multicast6(sock *s)
604
{
605
  int index = s->iface->index;
606
  int ttl = s->ttl;
607
  int n = 0;
608

    
609
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
610
    ERR("IPV6_MULTICAST_IF");
611

    
612
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
613
    ERR("IPV6_MULTICAST_HOPS");
614

    
615
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
616
    ERR("IPV6_MULTICAST_LOOP");
617

    
618
  return 0;
619
}
620

    
621
static inline int
622
sk_join_group6(sock *s, ip_addr maddr)
623
{
624
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
625

    
626
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
627
    ERR("IPV6_JOIN_GROUP");
628

    
629
  return 0;
630
}
631

    
632
static inline int
633
sk_leave_group6(sock *s, ip_addr maddr)
634
{
635
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
636

    
637
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
638
    ERR("IPV6_LEAVE_GROUP");
639

    
640
  return 0;
641
}
642

    
643

    
644
/*
645
 *        IPv6 packet control messages
646
 */
647

    
648
/* Also standardized, in RFC 3542 */
649

    
650
/*
651
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
652
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
653
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
654
 * RFC and we use IPV6_PKTINFO.
655
 */
656
#ifndef IPV6_RECVPKTINFO
657
#define IPV6_RECVPKTINFO IPV6_PKTINFO
658
#endif
659
/*
660
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
661
 */
662
#ifndef IPV6_RECVHOPLIMIT
663
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
664
#endif
665

    
666

    
667
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
668
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
669

    
670
static inline int
671
sk_request_cmsg6_pktinfo(sock *s)
672
{
673
  int y = 1;
674

    
675
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
676
    ERR("IPV6_RECVPKTINFO");
677

    
678
  return 0;
679
}
680

    
681
static inline int
682
sk_request_cmsg6_ttl(sock *s)
683
{
684
  int y = 1;
685

    
686
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
687
    ERR("IPV6_RECVHOPLIMIT");
688

    
689
  return 0;
690
}
691

    
692
static inline void
693
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
694
{
695
  if (cm->cmsg_type == IPV6_PKTINFO)
696
  {
697
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
698
    s->laddr = ipa_from_in6(pi->ipi6_addr);
699
    s->lifindex = pi->ipi6_ifindex;
700
  }
701
}
702

    
703
static inline void
704
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
705
{
706
  if (cm->cmsg_type == IPV6_HOPLIMIT)
707
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
708
}
709

    
710
static inline void
711
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
712
{
713
  struct cmsghdr *cm;
714
  struct in6_pktinfo *pi;
715
  int controllen = 0;
716

    
717
  msg->msg_control = cbuf;
718
  msg->msg_controllen = cbuflen;
719

    
720
  cm = CMSG_FIRSTHDR(msg);
721
  cm->cmsg_level = SOL_IPV6;
722
  cm->cmsg_type = IPV6_PKTINFO;
723
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
724
  controllen += CMSG_SPACE(sizeof(*pi));
725

    
726
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
727
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
728
  pi->ipi6_addr = ipa_to_in6(s->saddr);
729

    
730
  msg->msg_controllen = controllen;
731
}
732

    
733

    
734
/*
735
 *        Miscellaneous socket syscalls
736
 */
737

    
738
static inline int
739
sk_set_ttl4(sock *s, int ttl)
740
{
741
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
742
    ERR("IP_TTL");
743

    
744
  return 0;
745
}
746

    
747
static inline int
748
sk_set_ttl6(sock *s, int ttl)
749
{
750
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
751
    ERR("IPV6_UNICAST_HOPS");
752

    
753
  return 0;
754
}
755

    
756
static inline int
757
sk_set_tos4(sock *s, int tos)
758
{
759
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
760
    ERR("IP_TOS");
761

    
762
  return 0;
763
}
764

    
765
static inline int
766
sk_set_tos6(sock *s, int tos)
767
{
768
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
769
    ERR("IPV6_TCLASS");
770

    
771
  return 0;
772
}
773

    
774
static inline int
775
sk_set_high_port(sock *s UNUSED)
776
{
777
  /* Port range setting is optional, ignore it if not supported */
778

    
779
#ifdef IP_PORTRANGE
780
  if (sk_is_ipv4(s))
781
  {
782
    int range = IP_PORTRANGE_HIGH;
783
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
784
      ERR("IP_PORTRANGE");
785
  }
786
#endif
787

    
788
#ifdef IPV6_PORTRANGE
789
  if (sk_is_ipv6(s))
790
  {
791
    int range = IPV6_PORTRANGE_HIGH;
792
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
793
      ERR("IPV6_PORTRANGE");
794
  }
795
#endif
796

    
797
  return 0;
798
}
799

    
800
static inline byte *
801
sk_skip_ip_header(byte *pkt, int *len)
802
{
803
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
804
    return NULL;
805

    
806
  int hlen = (*pkt & 0x0f) * 4;
807
  if ((hlen < 20) || (hlen > *len))
808
    return NULL;
809

    
810
  *len -= hlen;
811
  return pkt + hlen;
812
}
813

    
814
byte *
815
sk_rx_buffer(sock *s, int *len)
816
{
817
  if (sk_is_ipv4(s) && (s->type == SK_IP))
818
    return sk_skip_ip_header(s->rbuf, len);
819
  else
820
    return s->rbuf;
821
}
822

    
823

    
824
/*
825
 *        Public socket functions
826
 */
827

    
828
/**
829
 * sk_setup_multicast - enable multicast for given socket
830
 * @s: socket
831
 *
832
 * Prepare transmission of multicast packets for given datagram socket.
833
 * The socket must have defined @iface.
834
 *
835
 * Result: 0 for success, -1 for an error.
836
 */
837

    
838
int
839
sk_setup_multicast(sock *s)
840
{
841
  ASSERT(s->iface);
842

    
843
  if (sk_is_ipv4(s))
844
    return sk_setup_multicast4(s);
845
  else
846
    return sk_setup_multicast6(s);
847
}
848

    
849
/**
850
 * sk_join_group - join multicast group for given socket
851
 * @s: socket
852
 * @maddr: multicast address
853
 *
854
 * Join multicast group for given datagram socket and associated interface.
855
 * The socket must have defined @iface.
856
 *
857
 * Result: 0 for success, -1 for an error.
858
 */
859

    
860
int
861
sk_join_group(sock *s, ip_addr maddr)
862
{
863
  if (sk_is_ipv4(s))
864
    return sk_join_group4(s, maddr);
865
  else
866
    return sk_join_group6(s, maddr);
867
}
868

    
869
/**
870
 * sk_leave_group - leave multicast group for given socket
871
 * @s: socket
872
 * @maddr: multicast address
873
 *
874
 * Leave multicast group for given datagram socket and associated interface.
875
 * The socket must have defined @iface.
876
 *
877
 * Result: 0 for success, -1 for an error.
878
 */
879

    
880
int
881
sk_leave_group(sock *s, ip_addr maddr)
882
{
883
  if (sk_is_ipv4(s))
884
    return sk_leave_group4(s, maddr);
885
  else
886
    return sk_leave_group6(s, maddr);
887
}
888

    
889
/**
890
 * sk_setup_broadcast - enable broadcast for given socket
891
 * @s: socket
892
 *
893
 * Allow reception and transmission of broadcast packets for given datagram
894
 * socket. The socket must have defined @iface. For transmission, packets should
895
 * be send to @brd address of @iface.
896
 *
897
 * Result: 0 for success, -1 for an error.
898
 */
899

    
900
int
901
sk_setup_broadcast(sock *s)
902
{
903
  int y = 1;
904

    
905
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
906
    ERR("SO_BROADCAST");
907

    
908
  return 0;
909
}
910

    
911
/**
912
 * sk_set_ttl - set transmit TTL for given socket
913
 * @s: socket
914
 * @ttl: TTL value
915
 *
916
 * Set TTL for already opened connections when TTL was not set before. Useful
917
 * for accepted connections when different ones should have different TTL.
918
 *
919
 * Result: 0 for success, -1 for an error.
920
 */
921

    
922
int
923
sk_set_ttl(sock *s, int ttl)
924
{
925
  s->ttl = ttl;
926

    
927
  if (sk_is_ipv4(s))
928
    return sk_set_ttl4(s, ttl);
929
  else
930
    return sk_set_ttl6(s, ttl);
931
}
932

    
933
/**
934
 * sk_set_min_ttl - set minimal accepted TTL for given socket
935
 * @s: socket
936
 * @ttl: TTL value
937
 *
938
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
939
 * implementations.
940
 *
941
 * Result: 0 for success, -1 for an error.
942
 */
943

    
944
int
945
sk_set_min_ttl(sock *s, int ttl)
946
{
947
  if (sk_is_ipv4(s))
948
    return sk_set_min_ttl4(s, ttl);
949
  else
950
    return sk_set_min_ttl6(s, ttl);
951
}
952

    
953
#if 0
954
/**
955
 * sk_set_md5_auth - add / remove MD5 security association for given socket
956
 * @s: socket
957
 * @local: IP address of local side
958
 * @remote: IP address of remote side
959
 * @ifa: Interface for link-local IP address
960
 * @passwd: Password used for MD5 authentication
961
 * @setkey: Update also system SA/SP database
962
 *
963
 * In TCP MD5 handling code in kernel, there is a set of security associations
964
 * used for choosing password and other authentication parameters according to
965
 * the local and remote address. This function is useful for listening socket,
966
 * for active sockets it may be enough to set s->password field.
967
 *
968
 * When called with passwd != NULL, the new pair is added,
969
 * When called with passwd == NULL, the existing pair is removed.
970
 *
971
 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
972
 * stored in global SA/SP database (but the behavior also must be enabled on
973
 * per-socket basis). In case of multiple sockets to the same neighbor, the
974
 * socket-specific state must be configured for each socket while global state
975
 * just once per src-dst pair. The @setkey argument controls whether the global
976
 * state (SA/SP database) is also updated.
977
 *
978
 * Result: 0 for success, -1 for an error.
979
 */
980

981
int
982
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
983
{ DUMMY; }
984
#endif
985

    
986
/**
987
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
988
 * @s: socket
989
 * @offset: offset
990
 *
991
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
992
 * kernel will automatically fill it for outgoing packets and check it for
993
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
994
 * known to the kernel.
995
 *
996
 * Result: 0 for success, -1 for an error.
997
 */
998

    
999
int
1000
sk_set_ipv6_checksum(sock *s, int offset)
1001
{
1002
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
1003
    ERR("IPV6_CHECKSUM");
1004

    
1005
  return 0;
1006
}
1007

    
1008
int
1009
sk_set_icmp6_filter(sock *s, int p1, int p2)
1010
{
1011
  /* a bit of lame interface, but it is here only for Radv */
1012
  struct icmp6_filter f;
1013

    
1014
  ICMP6_FILTER_SETBLOCKALL(&f);
1015
  ICMP6_FILTER_SETPASS(p1, &f);
1016
  ICMP6_FILTER_SETPASS(p2, &f);
1017

    
1018
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
1019
    ERR("ICMP6_FILTER");
1020

    
1021
  return 0;
1022
}
1023

    
1024
void
1025
sk_log_error(sock *s, const char *p)
1026
{
1027
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
1028
}
1029

    
1030

    
1031
/*
1032
 *        Actual struct birdsock code
1033
 */
1034

    
1035
static list sock_list;
1036
static struct birdsock *current_sock;
1037
static struct birdsock *stored_sock;
1038

    
1039
static inline sock *
1040
sk_next(sock *s)
1041
{
1042
  if (!s->n.next->next)
1043
    return NULL;
1044
  else
1045
    return SKIP_BACK(sock, n, s->n.next);
1046
}
1047

    
1048
static void
1049
sk_alloc_bufs(sock *s)
1050
{
1051
  if (!s->rbuf && s->rbsize)
1052
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1053
  s->rpos = s->rbuf;
1054
  if (!s->tbuf && s->tbsize)
1055
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1056
  s->tpos = s->ttx = s->tbuf;
1057
}
1058

    
1059
static void
1060
sk_free_bufs(sock *s)
1061
{
1062
  if (s->rbuf_alloc)
1063
  {
1064
    xfree(s->rbuf_alloc);
1065
    s->rbuf = s->rbuf_alloc = NULL;
1066
  }
1067
  if (s->tbuf_alloc)
1068
  {
1069
    xfree(s->tbuf_alloc);
1070
    s->tbuf = s->tbuf_alloc = NULL;
1071
  }
1072
}
1073

    
1074
static void
1075
sk_free(resource *r)
1076
{
1077
  sock *s = (sock *) r;
1078

    
1079
  sk_free_bufs(s);
1080
  if (s->fd >= 0)
1081
  {
1082
    close(s->fd);
1083

    
1084
    /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1085
    if (s->flags & SKF_THREAD)
1086
      return;
1087

    
1088
    if (s == current_sock)
1089
      current_sock = sk_next(s);
1090
    if (s == stored_sock)
1091
      stored_sock = sk_next(s);
1092
    rem_node(&s->n);
1093
  }
1094
}
1095

    
1096
void
1097
sk_set_rbsize(sock *s, uint val)
1098
{
1099
  ASSERT(s->rbuf_alloc == s->rbuf);
1100

    
1101
  if (s->rbsize == val)
1102
    return;
1103

    
1104
  s->rbsize = val;
1105
  xfree(s->rbuf_alloc);
1106
  s->rbuf_alloc = xmalloc(val);
1107
  s->rpos = s->rbuf = s->rbuf_alloc;
1108
}
1109

    
1110
void
1111
sk_set_tbsize(sock *s, uint val)
1112
{
1113
  ASSERT(s->tbuf_alloc == s->tbuf);
1114

    
1115
  if (s->tbsize == val)
1116
    return;
1117

    
1118
  byte *old_tbuf = s->tbuf;
1119

    
1120
  s->tbsize = val;
1121
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1122
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
1123
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
1124
}
1125

    
1126
void
1127
sk_set_tbuf(sock *s, void *tbuf)
1128
{
1129
  s->tbuf = tbuf ?: s->tbuf_alloc;
1130
  s->ttx = s->tpos = s->tbuf;
1131
}
1132

    
1133
void
1134
sk_reallocate(sock *s)
1135
{
1136
  sk_free_bufs(s);
1137
  sk_alloc_bufs(s);
1138
}
1139

    
1140
static void
1141
sk_dump(resource *r)
1142
{
1143
  sock *s = (sock *) r;
1144
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1145

    
1146
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1147
        sk_type_names[s->type],
1148
        s->data,
1149
        s->saddr,
1150
        s->sport,
1151
        s->daddr,
1152
        s->dport,
1153
        s->tos,
1154
        s->ttl,
1155
        s->iface ? s->iface->name : "none");
1156
}
1157

    
1158
static struct resclass sk_class = {
1159
  "Socket",
1160
  sizeof(sock),
1161
  sk_free,
1162
  sk_dump,
1163
  NULL,
1164
  NULL
1165
};
1166

    
1167
/**
1168
 * sk_new - create a socket
1169
 * @p: pool
1170
 *
1171
 * This function creates a new socket resource. If you want to use it,
1172
 * you need to fill in all the required fields of the structure and
1173
 * call sk_open() to do the actual opening of the socket.
1174
 *
1175
 * The real function name is sock_new(), sk_new() is a macro wrapper
1176
 * to avoid collision with OpenSSL.
1177
 */
1178
sock *
1179
sock_new(pool *p)
1180
{
1181
  sock *s = ralloc(p, &sk_class);
1182
  s->pool = p;
1183
  // s->saddr = s->daddr = IPA_NONE;
1184
  s->tos = s->priority = s->ttl = -1;
1185
  s->fd = -1;
1186
  return s;
1187
}
1188

    
1189
static int
1190
sk_setup(sock *s)
1191
{
1192
  int y = 1;
1193
  int fd = s->fd;
1194

    
1195
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1196
    ERR("O_NONBLOCK");
1197

    
1198
  if (!s->af)
1199
    return 0;
1200

    
1201
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1202
    s->flags |= SKF_PKTINFO;
1203

    
1204
#ifdef CONFIG_USE_HDRINCL
1205
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1206
  {
1207
    s->flags &= ~SKF_PKTINFO;
1208
    s->flags |= SKF_HDRINCL;
1209
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1210
      ERR("IP_HDRINCL");
1211
  }
1212
#endif
1213

    
1214
  if (s->iface)
1215
  {
1216
#ifdef SO_BINDTODEVICE
1217
    struct ifreq ifr = {};
1218
    strcpy(ifr.ifr_name, s->iface->name);
1219
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1220
      ERR("SO_BINDTODEVICE");
1221
#endif
1222

    
1223
#ifdef CONFIG_UNIX_DONTROUTE
1224
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1225
      ERR("SO_DONTROUTE");
1226
#endif
1227
  }
1228

    
1229
  if (s->priority >= 0)
1230
    if (sk_set_priority(s, s->priority) < 0)
1231
      return -1;
1232

    
1233
  if (sk_is_ipv4(s))
1234
  {
1235
    if (s->flags & SKF_LADDR_RX)
1236
      if (sk_request_cmsg4_pktinfo(s) < 0)
1237
        return -1;
1238

    
1239
    if (s->flags & SKF_TTL_RX)
1240
      if (sk_request_cmsg4_ttl(s) < 0)
1241
        return -1;
1242

    
1243
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1244
      if (sk_disable_mtu_disc4(s) < 0)
1245
        return -1;
1246

    
1247
    if (s->ttl >= 0)
1248
      if (sk_set_ttl4(s, s->ttl) < 0)
1249
        return -1;
1250

    
1251
    if (s->tos >= 0)
1252
      if (sk_set_tos4(s, s->tos) < 0)
1253
        return -1;
1254
  }
1255

    
1256
  if (sk_is_ipv6(s))
1257
  {
1258
    if (s->flags & SKF_V6ONLY)
1259
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1260
        ERR("IPV6_V6ONLY");
1261

    
1262
    if (s->flags & SKF_LADDR_RX)
1263
      if (sk_request_cmsg6_pktinfo(s) < 0)
1264
        return -1;
1265

    
1266
    if (s->flags & SKF_TTL_RX)
1267
      if (sk_request_cmsg6_ttl(s) < 0)
1268
        return -1;
1269

    
1270
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1271
      if (sk_disable_mtu_disc6(s) < 0)
1272
        return -1;
1273

    
1274
    if (s->ttl >= 0)
1275
      if (sk_set_ttl6(s, s->ttl) < 0)
1276
        return -1;
1277

    
1278
    if (s->tos >= 0)
1279
      if (sk_set_tos6(s, s->tos) < 0)
1280
        return -1;
1281
  }
1282

    
1283
  return 0;
1284
}
1285

    
1286
static void
1287
sk_insert(sock *s)
1288
{
1289
  add_tail(&sock_list, &s->n);
1290
}
1291

    
1292
static void
1293
sk_tcp_connected(sock *s)
1294
{
1295
  sockaddr sa;
1296
  int sa_len = sizeof(sa);
1297

    
1298
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1299
      (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1300
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1301

    
1302
  s->type = SK_TCP;
1303
  sk_alloc_bufs(s);
1304
  s->tx_hook(s);
1305
}
1306

    
1307
static int
1308
sk_passive_connected(sock *s, int type)
1309
{
1310
  sockaddr loc_sa, rem_sa;
1311
  int loc_sa_len = sizeof(loc_sa);
1312
  int rem_sa_len = sizeof(rem_sa);
1313

    
1314
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1315
  if (fd < 0)
1316
  {
1317
    if ((errno != EINTR) && (errno != EAGAIN))
1318
      s->err_hook(s, errno);
1319
    return 0;
1320
  }
1321

    
1322
  sock *t = sk_new(s->pool);
1323
  t->type = type;
1324
  t->fd = fd;
1325
  t->af = s->af;
1326
  t->ttl = s->ttl;
1327
  t->tos = s->tos;
1328
  t->rbsize = s->rbsize;
1329
  t->tbsize = s->tbsize;
1330

    
1331
  if (type == SK_TCP)
1332
  {
1333
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1334
        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1335
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1336

    
1337
    if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1338
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1339
  }
1340

    
1341
  if (sk_setup(t) < 0)
1342
  {
1343
    /* FIXME: Call err_hook instead ? */
1344
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1345

    
1346
    /* FIXME: handle it better in rfree() */
1347
    close(t->fd);
1348
    t->fd = -1;
1349
    rfree(t);
1350
    return 1;
1351
  }
1352

    
1353
  sk_insert(t);
1354
  sk_alloc_bufs(t);
1355
  s->rx_hook(t, 0);
1356
  return 1;
1357
}
1358

    
1359
/**
1360
 * sk_open - open a socket
1361
 * @s: socket
1362
 *
1363
 * This function takes a socket resource created by sk_new() and
1364
 * initialized by the user and binds a corresponding network connection
1365
 * to it.
1366
 *
1367
 * Result: 0 for success, -1 for an error.
1368
 */
1369
int
1370
sk_open(sock *s)
1371
{
1372
  int af = BIRD_AF;
1373
  int fd = -1;
1374
  int do_bind = 0;
1375
  int bind_port = 0;
1376
  ip_addr bind_addr = IPA_NONE;
1377
  sockaddr sa;
1378

    
1379
  switch (s->type)
1380
  {
1381
  case SK_TCP_ACTIVE:
1382
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1383
    /* Fall thru */
1384
  case SK_TCP_PASSIVE:
1385
    fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1386
    bind_port = s->sport;
1387
    bind_addr = s->saddr;
1388
    do_bind = bind_port || ipa_nonzero(bind_addr);
1389
    break;
1390

    
1391
  case SK_UDP:
1392
    fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1393
    bind_port = s->sport;
1394
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1395
    do_bind = 1;
1396
    break;
1397

    
1398
  case SK_IP:
1399
    fd = socket(af, SOCK_RAW, s->dport);
1400
    bind_port = 0;
1401
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1402
    do_bind = ipa_nonzero(bind_addr);
1403
    break;
1404

    
1405
  case SK_MAGIC:
1406
    af = 0;
1407
    fd = s->fd;
1408
    break;
1409

    
1410
  default:
1411
    bug("sk_open() called for invalid sock type %d", s->type);
1412
  }
1413

    
1414
  if (fd < 0)
1415
    ERR("socket");
1416

    
1417
  s->af = af;
1418
  s->fd = fd;
1419

    
1420
  if (sk_setup(s) < 0)
1421
    goto err;
1422

    
1423
  if (do_bind)
1424
  {
1425
    if (bind_port)
1426
    {
1427
      int y = 1;
1428

    
1429
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1430
        ERR2("SO_REUSEADDR");
1431

    
1432
#ifdef CONFIG_NO_IFACE_BIND
1433
      /* Workaround missing ability to bind to an iface */
1434
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1435
      {
1436
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1437
          ERR2("SO_REUSEPORT");
1438
      }
1439
#endif
1440
    }
1441
    else
1442
      if (s->flags & SKF_HIGH_PORT)
1443
        if (sk_set_high_port(s) < 0)
1444
          log(L_WARN "Socket error: %s%#m", s->err);
1445

    
1446
    sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1447
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1448
      ERR2("bind");
1449
  }
1450

    
1451
  if (s->password)
1452
    if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
1453
      goto err;
1454

    
1455
  switch (s->type)
1456
  {
1457
  case SK_TCP_ACTIVE:
1458
    sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1459
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1460
      sk_tcp_connected(s);
1461
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1462
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1463
      ERR2("connect");
1464
    break;
1465

    
1466
  case SK_TCP_PASSIVE:
1467
    if (listen(fd, 8) < 0)
1468
      ERR2("listen");
1469
    break;
1470

    
1471
  case SK_MAGIC:
1472
    break;
1473

    
1474
  default:
1475
    sk_alloc_bufs(s);
1476
  }
1477

    
1478
  if (!(s->flags & SKF_THREAD))
1479
    sk_insert(s);
1480
  return 0;
1481

    
1482
err:
1483
  close(fd);
1484
  s->fd = -1;
1485
  return -1;
1486
}
1487

    
1488
int
1489
sk_open_unix(sock *s, char *name)
1490
{
1491
  struct sockaddr_un sa;
1492
  int fd;
1493

    
1494
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1495

    
1496
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1497
  if (fd < 0)
1498
    return -1;
1499

    
1500
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1501
    return -1;
1502

    
1503
  /* Path length checked in test_old_bird() */
1504
  sa.sun_family = AF_UNIX;
1505
  strcpy(sa.sun_path, name);
1506

    
1507
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1508
    return -1;
1509

    
1510
  if (listen(fd, 8) < 0)
1511
    return -1;
1512

    
1513
  s->fd = fd;
1514
  sk_insert(s);
1515
  return 0;
1516
}
1517

    
1518

    
1519
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1520
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1521
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1522

    
1523
static void
1524
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1525
{
1526
  if (sk_is_ipv4(s))
1527
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1528
  else
1529
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1530
}
1531

    
1532
static void
1533
sk_process_cmsgs(sock *s, struct msghdr *msg)
1534
{
1535
  struct cmsghdr *cm;
1536

    
1537
  s->laddr = IPA_NONE;
1538
  s->lifindex = 0;
1539
  s->rcv_ttl = -1;
1540

    
1541
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1542
  {
1543
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1544
    {
1545
      sk_process_cmsg4_pktinfo(s, cm);
1546
      sk_process_cmsg4_ttl(s, cm);
1547
    }
1548

    
1549
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1550
    {
1551
      sk_process_cmsg6_pktinfo(s, cm);
1552
      sk_process_cmsg6_ttl(s, cm);
1553
    }
1554
  }
1555
}
1556

    
1557

    
1558
static inline int
1559
sk_sendmsg(sock *s)
1560
{
1561
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1562
  byte cmsg_buf[CMSG_TX_SPACE];
1563
  sockaddr dst;
1564

    
1565
  sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1566

    
1567
  struct msghdr msg = {
1568
    .msg_name = &dst.sa,
1569
    .msg_namelen = SA_LEN(dst),
1570
    .msg_iov = &iov,
1571
    .msg_iovlen = 1
1572
  };
1573

    
1574
#ifdef CONFIG_USE_HDRINCL
1575
  byte hdr[20];
1576
  struct iovec iov2[2] = { {hdr, 20}, iov };
1577

    
1578
  if (s->flags & SKF_HDRINCL)
1579
  {
1580
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1581
    msg.msg_iov = iov2;
1582
    msg.msg_iovlen = 2;
1583
  }
1584
#endif
1585

    
1586
  if (s->flags & SKF_PKTINFO)
1587
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1588

    
1589
  return sendmsg(s->fd, &msg, 0);
1590
}
1591

    
1592
static inline int
1593
sk_recvmsg(sock *s)
1594
{
1595
  struct iovec iov = {s->rbuf, s->rbsize};
1596
  byte cmsg_buf[CMSG_RX_SPACE];
1597
  sockaddr src;
1598

    
1599
  struct msghdr msg = {
1600
    .msg_name = &src.sa,
1601
    .msg_namelen = sizeof(src), // XXXX ??
1602
    .msg_iov = &iov,
1603
    .msg_iovlen = 1,
1604
    .msg_control = cmsg_buf,
1605
    .msg_controllen = sizeof(cmsg_buf),
1606
    .msg_flags = 0
1607
  };
1608

    
1609
  int rv = recvmsg(s->fd, &msg, 0);
1610
  if (rv < 0)
1611
    return rv;
1612

    
1613
  //ifdef IPV4
1614
  //  if (cf_type == SK_IP)
1615
  //    rv = ipv4_skip_header(pbuf, rv);
1616
  //endif
1617

    
1618
  sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1619
  sk_process_cmsgs(s, &msg);
1620

    
1621
  if (msg.msg_flags & MSG_TRUNC)
1622
    s->flags |= SKF_TRUNCATED;
1623
  else
1624
    s->flags &= ~SKF_TRUNCATED;
1625

    
1626
  return rv;
1627
}
1628

    
1629

    
1630
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1631

    
1632
static int
1633
sk_maybe_write(sock *s)
1634
{
1635
  int e;
1636

    
1637
  switch (s->type)
1638
  {
1639
  case SK_TCP:
1640
  case SK_MAGIC:
1641
  case SK_UNIX:
1642
    while (s->ttx != s->tpos)
1643
    {
1644
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1645

    
1646
      if (e < 0)
1647
      {
1648
        if (errno != EINTR && errno != EAGAIN)
1649
        {
1650
          reset_tx_buffer(s);
1651
          /* EPIPE is just a connection close notification during TX */
1652
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1653
          return -1;
1654
        }
1655
        return 0;
1656
      }
1657
      s->ttx += e;
1658
    }
1659
    reset_tx_buffer(s);
1660
    return 1;
1661

    
1662
  case SK_UDP:
1663
  case SK_IP:
1664
    {
1665
      if (s->tbuf == s->tpos)
1666
        return 1;
1667

    
1668
      e = sk_sendmsg(s);
1669

    
1670
      if (e < 0)
1671
      {
1672
        if (errno != EINTR && errno != EAGAIN)
1673
        {
1674
          reset_tx_buffer(s);
1675
          s->err_hook(s, errno);
1676
          return -1;
1677
        }
1678

    
1679
        if (!s->tx_hook)
1680
          reset_tx_buffer(s);
1681
        return 0;
1682
      }
1683
      reset_tx_buffer(s);
1684
      return 1;
1685
    }
1686
  default:
1687
    bug("sk_maybe_write: unknown socket type %d", s->type);
1688
  }
1689
}
1690

    
1691
int
1692
sk_rx_ready(sock *s)
1693
{
1694
  int rv;
1695
  struct pollfd pfd = { .fd = s->fd };
1696
  pfd.events |= POLLIN;
1697

    
1698
 redo:
1699
  rv = poll(&pfd, 1, 0);
1700

    
1701
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1702
    goto redo;
1703

    
1704
  return rv;
1705
}
1706

    
1707
/**
1708
 * sk_send - send data to a socket
1709
 * @s: socket
1710
 * @len: number of bytes to send
1711
 *
1712
 * This function sends @len bytes of data prepared in the
1713
 * transmit buffer of the socket @s to the network connection.
1714
 * If the packet can be sent immediately, it does so and returns
1715
 * 1, else it queues the packet for later processing, returns 0
1716
 * and calls the @tx_hook of the socket when the tranmission
1717
 * takes place.
1718
 */
1719
int
1720
sk_send(sock *s, unsigned len)
1721
{
1722
  s->ttx = s->tbuf;
1723
  s->tpos = s->tbuf + len;
1724
  return sk_maybe_write(s);
1725
}
1726

    
1727
/**
1728
 * sk_send_to - send data to a specific destination
1729
 * @s: socket
1730
 * @len: number of bytes to send
1731
 * @addr: IP address to send the packet to
1732
 * @port: port to send the packet to
1733
 *
1734
 * This is a sk_send() replacement for connection-less packet sockets
1735
 * which allows destination of the packet to be chosen dynamically.
1736
 * Raw IP sockets should use 0 for @port.
1737
 */
1738
int
1739
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1740
{
1741
  s->daddr = addr;
1742
  if (port)
1743
    s->dport = port;
1744

    
1745
  s->ttx = s->tbuf;
1746
  s->tpos = s->tbuf + len;
1747
  return sk_maybe_write(s);
1748
}
1749

    
1750
/*
1751
int
1752
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1753
             ip_addr saddr, ip_addr daddr, unsigned dport)
1754
{
1755
  s->iface = ifa;
1756
  s->saddr = saddr;
1757
  s->daddr = daddr;
1758
  s->dport = dport;
1759
  s->ttx = s->tbuf;
1760
  s->tpos = s->tbuf + len;
1761
  return sk_maybe_write(s);
1762
}
1763
*/
1764

    
1765
 /* sk_read() and sk_write() are called from BFD's event loop */
1766

    
1767
int
1768
sk_read(sock *s, int revents)
1769
{
1770
  switch (s->type)
1771
  {
1772
  case SK_TCP_PASSIVE:
1773
    return sk_passive_connected(s, SK_TCP);
1774

    
1775
  case SK_UNIX_PASSIVE:
1776
    return sk_passive_connected(s, SK_UNIX);
1777

    
1778
  case SK_TCP:
1779
  case SK_UNIX:
1780
    {
1781
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1782

    
1783
      if (c < 0)
1784
      {
1785
        if (errno != EINTR && errno != EAGAIN)
1786
          s->err_hook(s, errno);
1787
        else if (errno == EAGAIN && !(revents & POLLIN))
1788
        {
1789
          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
1790
          s->err_hook(s, 0);
1791
        }
1792
      }
1793
      else if (!c)
1794
        s->err_hook(s, 0);
1795
      else
1796
      {
1797
        s->rpos += c;
1798
        if (s->rx_hook(s, s->rpos - s->rbuf))
1799
        {
1800
          /* We need to be careful since the socket could have been deleted by the hook */
1801
          if (current_sock == s)
1802
            s->rpos = s->rbuf;
1803
        }
1804
        return 1;
1805
      }
1806
      return 0;
1807
    }
1808

    
1809
  case SK_MAGIC:
1810
    return s->rx_hook(s, 0);
1811

    
1812
  default:
1813
    {
1814
      int e = sk_recvmsg(s);
1815

    
1816
      if (e < 0)
1817
      {
1818
        if (errno != EINTR && errno != EAGAIN)
1819
          s->err_hook(s, errno);
1820
        return 0;
1821
      }
1822

    
1823
      s->rpos = s->rbuf + e;
1824
      s->rx_hook(s, e);
1825
      return 1;
1826
    }
1827
  }
1828
}
1829

    
1830
int
1831
sk_write(sock *s)
1832
{
1833
  switch (s->type)
1834
  {
1835
  case SK_TCP_ACTIVE:
1836
    {
1837
      sockaddr sa;
1838
      sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1839

    
1840
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1841
        sk_tcp_connected(s);
1842
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1843
        s->err_hook(s, errno);
1844
      return 0;
1845
    }
1846

    
1847
  default:
1848
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1849
    {
1850
      if (s->tx_hook)
1851
        s->tx_hook(s);
1852
      return 1;
1853
    }
1854
    return 0;
1855
  }
1856
}
1857

    
1858
void
1859
sk_err(sock *s, int revents)
1860
{
1861
  int se = 0, sse = sizeof(se);
1862
  if ((s->type != SK_MAGIC) && (revents & POLLERR))
1863
    if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
1864
    {
1865
      log(L_ERR "IO: Socket error: SO_ERROR: %m");
1866
      se = 0;
1867
    }
1868

    
1869
  s->err_hook(s, se);
1870
}
1871

    
1872
void
1873
sk_dump_all(void)
1874
{
1875
  node *n;
1876
  sock *s;
1877

    
1878
  debug("Open sockets:\n");
1879
  WALK_LIST(n, sock_list)
1880
  {
1881
    s = SKIP_BACK(sock, n, n);
1882
    debug("%p ", s);
1883
    sk_dump(&s->r);
1884
  }
1885
  debug("\n");
1886
}
1887

    
1888

    
1889
/*
1890
 *        Internal event log and watchdog
1891
 */
1892

    
1893
#define EVENT_LOG_LENGTH 32
1894

    
1895
struct event_log_entry
1896
{
1897
  void *hook;
1898
  void *data;
1899
  btime timestamp;
1900
  btime duration;
1901
};
1902

    
1903
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1904
static struct event_log_entry *event_open;
1905
static int event_log_pos, event_log_num, watchdog_active;
1906
static btime last_time;
1907
static btime loop_time;
1908

    
1909
static void
1910
io_update_time(void)
1911
{
1912
  struct timespec ts;
1913
  int rv;
1914

    
1915
  if (!clock_monotonic_available)
1916
    return;
1917

    
1918
  /*
1919
   * This is third time-tracking procedure (after update_times() above and
1920
   * times_update() in BFD), dedicated to internal event log and latency
1921
   * tracking. Hopefully, we consolidate these sometimes.
1922
   */
1923

    
1924
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1925
  if (rv < 0)
1926
    die("clock_gettime: %m");
1927

    
1928
  last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
1929

    
1930
  if (event_open)
1931
  {
1932
    event_open->duration = last_time - event_open->timestamp;
1933

    
1934
    if (event_open->duration > config->latency_limit)
1935
      log(L_WARN "Event 0x%p 0x%p took %d ms",
1936
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
1937

    
1938
    event_open = NULL;
1939
  }
1940
}
1941

    
1942
/**
1943
 * io_log_event - mark approaching event into event log
1944
 * @hook: event hook address
1945
 * @data: event data address
1946
 *
1947
 * Store info (hook, data, timestamp) about the following internal event into
1948
 * a circular event log (@event_log). When latency tracking is enabled, the log
1949
 * entry is kept open (in @event_open) so the duration can be filled later.
1950
 */
1951
void
1952
io_log_event(void *hook, void *data)
1953
{
1954
  if (config->latency_debug)
1955
    io_update_time();
1956

    
1957
  struct event_log_entry *en = event_log + event_log_pos;
1958

    
1959
  en->hook = hook;
1960
  en->data = data;
1961
  en->timestamp = last_time;
1962
  en->duration = 0;
1963

    
1964
  event_log_num++;
1965
  event_log_pos++;
1966
  event_log_pos %= EVENT_LOG_LENGTH;
1967

    
1968
  event_open = config->latency_debug ? en : NULL;
1969
}
1970

    
1971
static inline void
1972
io_close_event(void)
1973
{
1974
  if (event_open)
1975
    io_update_time();
1976
}
1977

    
1978
void
1979
io_log_dump(void)
1980
{
1981
  int i;
1982

    
1983
  log(L_DEBUG "Event log:");
1984
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
1985
  {
1986
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
1987
    if (en->hook)
1988
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
1989
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
1990
  }
1991
}
1992

    
1993
void
1994
watchdog_sigalrm(int sig UNUSED)
1995
{
1996
  /* Update last_time and duration, but skip latency check */
1997
  config->latency_limit = 0xffffffff;
1998
  io_update_time();
1999

    
2000
  /* We want core dump */
2001
  abort();
2002
}
2003

    
2004
static inline void
2005
watchdog_start1(void)
2006
{
2007
  io_update_time();
2008

    
2009
  loop_time = last_time;
2010
}
2011

    
2012
static inline void
2013
watchdog_start(void)
2014
{
2015
  io_update_time();
2016

    
2017
  loop_time = last_time;
2018
  event_log_num = 0;
2019

    
2020
  if (config->watchdog_timeout)
2021
  {
2022
    alarm(config->watchdog_timeout);
2023
    watchdog_active = 1;
2024
  }
2025
}
2026

    
2027
static inline void
2028
watchdog_stop(void)
2029
{
2030
  io_update_time();
2031

    
2032
  if (watchdog_active)
2033
  {
2034
    alarm(0);
2035
    watchdog_active = 0;
2036
  }
2037

    
2038
  btime duration = last_time - loop_time;
2039
  if (duration > config->watchdog_warning)
2040
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2041
        (int) (duration TO_MS), event_log_num);
2042
}
2043

    
2044

    
2045
/*
2046
 *        Main I/O Loop
2047
 */
2048

    
2049
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2050
volatile int async_dump_flag;
2051
volatile int async_shutdown_flag;
2052

    
2053
void
2054
io_init(void)
2055
{
2056
  init_list(&near_timers);
2057
  init_list(&far_timers);
2058
  init_list(&sock_list);
2059
  init_list(&global_event_list);
2060
  krt_io_init();
2061
  init_times();
2062
  update_times();
2063
  boot_time = now;
2064
  srandom((int) now_real);
2065
}
2066

    
2067
static int short_loops = 0;
2068
#define SHORT_LOOP_MAX 10
2069

    
2070
void
2071
io_loop(void)
2072
{
2073
  int poll_tout;
2074
  time_t tout;
2075
  int nfds, events, pout;
2076
  sock *s;
2077
  node *n;
2078
  int fdmax = 256;
2079
  struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
2080

    
2081
  watchdog_start1();
2082
  for(;;)
2083
    {
2084
      events = ev_run_list(&global_event_list);
2085
    timers:
2086
      update_times();
2087
      tout = tm_first_shot();
2088
      if (tout <= now)
2089
        {
2090
          tm_shot();
2091
          goto timers;
2092
        }
2093
      poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
2094

    
2095
      io_close_event();
2096

    
2097
      nfds = 0;
2098
      WALK_LIST(n, sock_list)
2099
        {
2100
          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
2101
          s = SKIP_BACK(sock, n, n);
2102
          if (s->rx_hook)
2103
            {
2104
              pfd[nfds].fd = s->fd;
2105
              pfd[nfds].events |= POLLIN;
2106
            }
2107
          if (s->tx_hook && s->ttx != s->tpos)
2108
            {
2109
              pfd[nfds].fd = s->fd;
2110
              pfd[nfds].events |= POLLOUT;
2111
            }
2112
          if (pfd[nfds].fd != -1)
2113
            {
2114
              s->index = nfds;
2115
              nfds++;
2116
            }
2117
          else
2118
            s->index = -1;
2119

    
2120
          if (nfds >= fdmax)
2121
            {
2122
              fdmax *= 2;
2123
              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2124
            }
2125
        }
2126

    
2127
      /*
2128
       * Yes, this is racy. But even if the signal comes before this test
2129
       * and entering poll(), it gets caught on the next timer tick.
2130
       */
2131

    
2132
      if (async_config_flag)
2133
        {
2134
          io_log_event(async_config, NULL);
2135
          async_config();
2136
          async_config_flag = 0;
2137
          continue;
2138
        }
2139
      if (async_dump_flag)
2140
        {
2141
          io_log_event(async_dump, NULL);
2142
          async_dump();
2143
          async_dump_flag = 0;
2144
          continue;
2145
        }
2146
      if (async_shutdown_flag)
2147
        {
2148
          io_log_event(async_shutdown, NULL);
2149
          async_shutdown();
2150
          async_shutdown_flag = 0;
2151
          continue;
2152
        }
2153

    
2154
      /* And finally enter poll() to find active sockets */
2155
      watchdog_stop();
2156
      pout = poll(pfd, nfds, poll_tout);
2157
      watchdog_start();
2158

    
2159
      if (pout < 0)
2160
        {
2161
          if (errno == EINTR || errno == EAGAIN)
2162
            continue;
2163
          die("poll: %m");
2164
        }
2165
      if (pout)
2166
        {
2167
          /* guaranteed to be non-empty */
2168
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2169

    
2170
          while (current_sock)
2171
            {
2172
              sock *s = current_sock;
2173
              if (s->index == -1)
2174
                {
2175
                  current_sock = sk_next(s);
2176
                  goto next;
2177
                }
2178

    
2179
              int e;
2180
              int steps;
2181

    
2182
              steps = MAX_STEPS;
2183
              if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2184
                do
2185
                  {
2186
                    steps--;
2187
                    io_log_event(s->rx_hook, s->data);
2188
                    e = sk_read(s, pfd[s->index].revents);
2189
                    if (s != current_sock)
2190
                      goto next;
2191
                  }
2192
                while (e && s->rx_hook && steps);
2193

    
2194
              steps = MAX_STEPS;
2195
              if (pfd[s->index].revents & POLLOUT)
2196
                do
2197
                  {
2198
                    steps--;
2199
                    io_log_event(s->tx_hook, s->data);
2200
                    e = sk_write(s);
2201
                    if (s != current_sock)
2202
                      goto next;
2203
                  }
2204
                while (e && steps);
2205

    
2206
              current_sock = sk_next(s);
2207
            next: ;
2208
            }
2209

    
2210
          short_loops++;
2211
          if (events && (short_loops < SHORT_LOOP_MAX))
2212
            continue;
2213
          short_loops = 0;
2214

    
2215
          int count = 0;
2216
          current_sock = stored_sock;
2217
          if (current_sock == NULL)
2218
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2219

    
2220
          while (current_sock && count < MAX_RX_STEPS)
2221
            {
2222
              sock *s = current_sock;
2223
              if (s->index == -1)
2224
                {
2225
                  current_sock = sk_next(s);
2226
                  goto next2;
2227
                }
2228

    
2229
              if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2230
                {
2231
                  count++;
2232
                  io_log_event(s->rx_hook, s->data);
2233
                  sk_read(s, pfd[s->index].revents);
2234
                  if (s != current_sock)
2235
                    goto next2;
2236
                }
2237

    
2238
              if (pfd[s->index].revents & (POLLHUP | POLLERR))
2239
                {
2240
                  sk_err(s, pfd[s->index].revents);
2241
                  goto next2;
2242
                }
2243

    
2244
              current_sock = sk_next(s);
2245
            next2: ;
2246
            }
2247

    
2248

    
2249
          stored_sock = current_sock;
2250
        }
2251
    }
2252
}
2253

    
2254
void
2255
test_old_bird(char *path)
2256
{
2257
  int fd;
2258
  struct sockaddr_un sa;
2259

    
2260
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2261
  if (fd < 0)
2262
    die("Cannot create socket: %m");
2263
  if (strlen(path) >= sizeof(sa.sun_path))
2264
    die("Socket path too long");
2265
  bzero(&sa, sizeof(sa));
2266
  sa.sun_family = AF_UNIX;
2267
  strcpy(sa.sun_path, path);
2268
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2269
    die("I found another BIRD running.");
2270
  close(fd);
2271
}