Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 8e433d6a

History | View | Annotate | Download (45.9 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#define _GNU_SOURCE 1
13

    
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <time.h>
17
#include <sys/time.h>
18
#include <sys/types.h>
19
#include <sys/socket.h>
20
#include <sys/uio.h>
21
#include <sys/un.h>
22
#include <poll.h>
23
#include <unistd.h>
24
#include <fcntl.h>
25
#include <errno.h>
26
#include <net/if.h>
27
#include <netinet/in.h>
28
#include <netinet/tcp.h>
29
#include <netinet/udp.h>
30
#include <netinet/icmp6.h>
31

    
32
#include "nest/bird.h"
33
#include "lib/lists.h"
34
#include "lib/resource.h"
35
#include "lib/timer.h"
36
#include "lib/socket.h"
37
#include "lib/event.h"
38
#include "lib/string.h"
39
#include "nest/iface.h"
40

    
41
#include "lib/unix.h"
42
#include "lib/sysio.h"
43

    
44
/* Maximum number of calls of tx handler for one socket in one
45
 * poll iteration. Should be small enough to not monopolize CPU by
46
 * one protocol instance.
47
 */
48
#define MAX_STEPS 4
49

    
50
/* Maximum number of calls of rx handler for all sockets in one poll
51
   iteration. RX callbacks are often much more costly so we limit
52
   this to gen small latencies */
53
#define MAX_RX_STEPS 4
54

    
55
/*
56
 *        Tracked Files
57
 */
58

    
59
struct rfile {
60
  resource r;
61
  FILE *f;
62
};
63

    
64
static void
65
rf_free(resource *r)
66
{
67
  struct rfile *a = (struct rfile *) r;
68

    
69
  fclose(a->f);
70
}
71

    
72
static void
73
rf_dump(resource *r)
74
{
75
  struct rfile *a = (struct rfile *) r;
76

    
77
  debug("(FILE *%p)\n", a->f);
78
}
79

    
80
static struct resclass rf_class = {
81
  "FILE",
82
  sizeof(struct rfile),
83
  rf_free,
84
  rf_dump,
85
  NULL,
86
  NULL
87
};
88

    
89
void *
90
tracked_fopen(pool *p, char *name, char *mode)
91
{
92
  FILE *f = fopen(name, mode);
93

    
94
  if (f)
95
    {
96
      struct rfile *r = ralloc(p, &rf_class);
97
      r->f = f;
98
    }
99
  return f;
100
}
101

    
102
/**
103
 * DOC: Timers
104
 *
105
 * Timers are resources which represent a wish of a module to call
106
 * a function at the specified time. The platform dependent code
107
 * doesn't guarantee exact timing, only that a timer function
108
 * won't be called before the requested time.
109
 *
110
 * In BIRD, time is represented by values of the &bird_clock_t type
111
 * which are integral numbers interpreted as a relative number of seconds since
112
 * some fixed time point in past. The current time can be read
113
 * from variable @now with reasonable accuracy and is monotonic. There is also
114
 * a current 'absolute' time in variable @now_real reported by OS.
115
 *
116
 * Each timer is described by a &timer structure containing a pointer
117
 * to the handler function (@hook), data private to this function (@data),
118
 * time the function should be called at (@expires, 0 for inactive timers),
119
 * for the other fields see |timer.h|.
120
 */
121

    
122
#define NEAR_TIMER_LIMIT 4
123

    
124
static list near_timers, far_timers;
125
static bird_clock_t first_far_timer = TIME_INFINITY;
126

    
127
/* now must be different from 0, because 0 is a special value in timer->expires */
128
bird_clock_t now = 1, now_real, boot_time;
129

    
130
static void
131
update_times_plain(void)
132
{
133
  bird_clock_t new_time = time(NULL);
134
  int delta = new_time - now_real;
135

    
136
  if ((delta >= 0) && (delta < 60))
137
    now += delta;
138
  else if (now_real != 0)
139
   log(L_WARN "Time jump, delta %d s", delta);
140

    
141
  now_real = new_time;
142
}
143

    
144
static void
145
update_times_gettime(void)
146
{
147
  struct timespec ts;
148
  int rv;
149

    
150
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
151
  if (rv != 0)
152
    die("clock_gettime: %m");
153

    
154
  if (ts.tv_sec != now) {
155
    if (ts.tv_sec < now)
156
      log(L_ERR "Monotonic timer is broken");
157

    
158
    now = ts.tv_sec;
159
    now_real = time(NULL);
160
  }
161
}
162

    
163
static int clock_monotonic_available;
164

    
165
static inline void
166
update_times(void)
167
{
168
  if (clock_monotonic_available)
169
    update_times_gettime();
170
  else
171
    update_times_plain();
172
}
173

    
174
static inline void
175
init_times(void)
176
{
177
 struct timespec ts;
178
 clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
179
 if (!clock_monotonic_available)
180
   log(L_WARN "Monotonic timer is missing");
181
}
182

    
183

    
184
static void
185
tm_free(resource *r)
186
{
187
  timer *t = (timer *) r;
188

    
189
  tm_stop(t);
190
}
191

    
192
static void
193
tm_dump(resource *r)
194
{
195
  timer *t = (timer *) r;
196

    
197
  debug("(code %p, data %p, ", t->hook, t->data);
198
  if (t->randomize)
199
    debug("rand %d, ", t->randomize);
200
  if (t->recurrent)
201
    debug("recur %d, ", t->recurrent);
202
  if (t->expires)
203
    debug("expires in %d sec)\n", t->expires - now);
204
  else
205
    debug("inactive)\n");
206
}
207

    
208
static struct resclass tm_class = {
209
  "Timer",
210
  sizeof(timer),
211
  tm_free,
212
  tm_dump,
213
  NULL,
214
  NULL
215
};
216

    
217
/**
218
 * tm_new - create a timer
219
 * @p: pool
220
 *
221
 * This function creates a new timer resource and returns
222
 * a pointer to it. To use the timer, you need to fill in
223
 * the structure fields and call tm_start() to start timing.
224
 */
225
timer *
226
tm_new(pool *p)
227
{
228
  timer *t = ralloc(p, &tm_class);
229
  return t;
230
}
231

    
232
static inline void
233
tm_insert_near(timer *t)
234
{
235
  node *n = HEAD(near_timers);
236

    
237
  while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
238
    n = n->next;
239
  insert_node(&t->n, n->prev);
240
}
241

    
242
/**
243
 * tm_start - start a timer
244
 * @t: timer
245
 * @after: number of seconds the timer should be run after
246
 *
247
 * This function schedules the hook function of the timer to
248
 * be called after @after seconds. If the timer has been already
249
 * started, it's @expire time is replaced by the new value.
250
 *
251
 * You can have set the @randomize field of @t, the timeout
252
 * will be increased by a random number of seconds chosen
253
 * uniformly from range 0 .. @randomize.
254
 *
255
 * You can call tm_start() from the handler function of the timer
256
 * to request another run of the timer. Also, you can set the @recurrent
257
 * field to have the timer re-added automatically with the same timeout.
258
 */
259
void
260
tm_start(timer *t, unsigned after)
261
{
262
  bird_clock_t when;
263

    
264
  if (t->randomize)
265
    after += random() % (t->randomize + 1);
266
  when = now + after;
267
  if (t->expires == when)
268
    return;
269
  if (t->expires)
270
    rem_node(&t->n);
271
  t->expires = when;
272
  if (after <= NEAR_TIMER_LIMIT)
273
    tm_insert_near(t);
274
  else
275
    {
276
      if (!first_far_timer || first_far_timer > when)
277
        first_far_timer = when;
278
      add_tail(&far_timers, &t->n);
279
    }
280
}
281

    
282
/**
283
 * tm_stop - stop a timer
284
 * @t: timer
285
 *
286
 * This function stops a timer. If the timer is already stopped,
287
 * nothing happens.
288
 */
289
void
290
tm_stop(timer *t)
291
{
292
  if (t->expires)
293
    {
294
      rem_node(&t->n);
295
      t->expires = 0;
296
    }
297
}
298

    
299
static void
300
tm_dump_them(char *name, list *l)
301
{
302
  node *n;
303
  timer *t;
304

    
305
  debug("%s timers:\n", name);
306
  WALK_LIST(n, *l)
307
    {
308
      t = SKIP_BACK(timer, n, n);
309
      debug("%p ", t);
310
      tm_dump(&t->r);
311
    }
312
  debug("\n");
313
}
314

    
315
void
316
tm_dump_all(void)
317
{
318
  tm_dump_them("Near", &near_timers);
319
  tm_dump_them("Far", &far_timers);
320
}
321

    
322
static inline time_t
323
tm_first_shot(void)
324
{
325
  time_t x = first_far_timer;
326

    
327
  if (!EMPTY_LIST(near_timers))
328
    {
329
      timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
330
      if (t->expires < x)
331
        x = t->expires;
332
    }
333
  return x;
334
}
335

    
336
void io_log_event(void *hook, void *data);
337

    
338
static void
339
tm_shot(void)
340
{
341
  timer *t;
342
  node *n, *m;
343

    
344
  if (first_far_timer <= now)
345
    {
346
      bird_clock_t limit = now + NEAR_TIMER_LIMIT;
347
      first_far_timer = TIME_INFINITY;
348
      n = HEAD(far_timers);
349
      while (m = n->next)
350
        {
351
          t = SKIP_BACK(timer, n, n);
352
          if (t->expires <= limit)
353
            {
354
              rem_node(n);
355
              tm_insert_near(t);
356
            }
357
          else if (t->expires < first_far_timer)
358
            first_far_timer = t->expires;
359
          n = m;
360
        }
361
    }
362
  while ((n = HEAD(near_timers)) -> next)
363
    {
364
      int delay;
365
      t = SKIP_BACK(timer, n, n);
366
      if (t->expires > now)
367
        break;
368
      rem_node(n);
369
      delay = t->expires - now;
370
      t->expires = 0;
371
      if (t->recurrent)
372
        {
373
          int i = t->recurrent - delay;
374
          if (i < 0)
375
            i = 0;
376
          tm_start(t, i);
377
        }
378
      io_log_event(t->hook, t->data);
379
      t->hook(t);
380
    }
381
}
382

    
383
/**
384
 * tm_parse_datetime - parse a date and time
385
 * @x: datetime string
386
 *
387
 * tm_parse_datetime() takes a textual representation of
388
 * a date and time (dd-mm-yyyy hh:mm:ss)
389
 * and converts it to the corresponding value of type &bird_clock_t.
390
 */
391
bird_clock_t
392
tm_parse_datetime(char *x)
393
{
394
  struct tm tm;
395
  int n;
396
  time_t t;
397

    
398
  if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
399
    return tm_parse_date(x);
400
  tm.tm_mon--;
401
  tm.tm_year -= 1900;
402
  t = mktime(&tm);
403
  if (t == (time_t) -1)
404
    return 0;
405
  return t;
406
}
407
/**
408
 * tm_parse_date - parse a date
409
 * @x: date string
410
 *
411
 * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
412
 * and converts it to the corresponding value of type &bird_clock_t.
413
 */
414
bird_clock_t
415
tm_parse_date(char *x)
416
{
417
  struct tm tm;
418
  int n;
419
  time_t t;
420

    
421
  if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
422
    return 0;
423
  tm.tm_mon--;
424
  tm.tm_year -= 1900;
425
  tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
426
  t = mktime(&tm);
427
  if (t == (time_t) -1)
428
    return 0;
429
  return t;
430
}
431

    
432
static void
433
tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
434
{
435
  static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
436
                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
437

    
438
  if (delta < 20*3600)
439
    bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
440
  else if (delta < 360*86400)
441
    bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
442
  else
443
    bsprintf(x, "%d", tm->tm_year+1900);
444
}
445

    
446
#include "conf/conf.h"
447

    
448
/**
449
 * tm_format_datetime - convert date and time to textual representation
450
 * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
451
 * @fmt_spec: specification of resulting textual representation of the time
452
 * @t: time
453
 *
454
 * This function formats the given relative time value @t to a textual
455
 * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
456
 */
457
void
458
tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
459
{
460
  const char *fmt_used;
461
  struct tm *tm;
462
  bird_clock_t delta = now - t;
463
  t = now_real - delta;
464
  tm = localtime(&t);
465

    
466
  if (fmt_spec->fmt1 == NULL)
467
    return tm_format_reltime(x, tm, delta);
468

    
469
  if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
470
    fmt_used = fmt_spec->fmt1;
471
  else
472
    fmt_used = fmt_spec->fmt2;
473

    
474
  int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
475
  if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
476
    strcpy(x, "<too-long>");
477
}
478

    
479

    
480
/**
481
 * DOC: Sockets
482
 *
483
 * Socket resources represent network connections. Their data structure (&socket)
484
 * contains a lot of fields defining the exact type of the socket, the local and
485
 * remote addresses and ports, pointers to socket buffers and finally pointers to
486
 * hook functions to be called when new data have arrived to the receive buffer
487
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
488
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
489
 *
490
 * Freeing of sockets from inside socket hooks is perfectly safe.
491
 */
492

    
493
#ifndef SOL_IP
494
#define SOL_IP IPPROTO_IP
495
#endif
496

    
497
#ifndef SOL_IPV6
498
#define SOL_IPV6 IPPROTO_IPV6
499
#endif
500

    
501
#ifndef SOL_ICMPV6
502
#define SOL_ICMPV6 IPPROTO_ICMPV6
503
#endif
504

    
505

    
506
/*
507
 *        Sockaddr helper functions
508
 */
509

    
510
static inline int sockaddr_length(int af)
511
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
512

    
513
static inline void
514
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, struct iface *ifa, uint port)
515
{
516
  memset(sa, 0, sizeof(struct sockaddr_in));
517
#ifdef HAVE_SIN_LEN
518
  sa->sin_len = sizeof(struct sockaddr_in);
519
#endif
520
  sa->sin_family = AF_INET;
521
  sa->sin_port = htons(port);
522
  sa->sin_addr = ipa_to_in4(a);
523
}
524

    
525
static inline void
526
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
527
{
528
  memset(sa, 0, sizeof(struct sockaddr_in6));
529
#ifdef SIN6_LEN
530
  sa->sin6_len = sizeof(struct sockaddr_in6);
531
#endif
532
  sa->sin6_family = AF_INET6;
533
  sa->sin6_port = htons(port);
534
  sa->sin6_flowinfo = 0;
535
  sa->sin6_addr = ipa_to_in6(a);
536

    
537
  if (ifa && ipa_is_link_local(a))
538
    sa->sin6_scope_id = ifa->index;
539
}
540

    
541
void
542
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
543
{
544
  if (af == AF_INET)
545
    sockaddr_fill4((struct sockaddr_in *) sa, a, ifa, port);
546
  else if (af == AF_INET6)
547
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
548
  else
549
    bug("Unknown AF");
550
}
551

    
552
static inline void
553
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, struct iface **ifa, uint *port)
554
{
555
  *port = ntohs(sa->sin_port);
556
  *a = ipa_from_in4(sa->sin_addr);
557
}
558

    
559
static inline void
560
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
561
{
562
  *port = ntohs(sa->sin6_port);
563
  *a = ipa_from_in6(sa->sin6_addr);
564

    
565
  if (ifa && ipa_is_link_local(*a))
566
    *ifa = if_find_by_index(sa->sin6_scope_id);
567
}
568

    
569
int
570
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
571
{
572
  if (sa->sa.sa_family != af)
573
    goto fail;
574

    
575
  if (af == AF_INET)
576
    sockaddr_read4((struct sockaddr_in *) sa, a, ifa, port);
577
  else if (af == AF_INET6)
578
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
579
  else
580
    goto fail;
581

    
582
  return 0;
583

    
584
 fail:
585
  *a = IPA_NONE;
586
  *port = 0;
587
  return -1;
588
}
589

    
590

    
591
/*
592
 *        IPv6 multicast syscalls
593
 */
594

    
595
/* Fortunately standardized in RFC 3493 */
596

    
597
#define INIT_MREQ6(maddr,ifa) \
598
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
599

    
600
static inline int
601
sk_setup_multicast6(sock *s)
602
{
603
  int index = s->iface->index;
604
  int ttl = s->ttl;
605
  int n = 0;
606

    
607
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
608
    ERR("IPV6_MULTICAST_IF");
609

    
610
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
611
    ERR("IPV6_MULTICAST_HOPS");
612

    
613
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
614
    ERR("IPV6_MULTICAST_LOOP");
615

    
616
  return 0;
617
}
618

    
619
static inline int
620
sk_join_group6(sock *s, ip_addr maddr)
621
{
622
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
623

    
624
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
625
    ERR("IPV6_JOIN_GROUP");
626

    
627
  return 0;
628
}
629

    
630
static inline int
631
sk_leave_group6(sock *s, ip_addr maddr)
632
{
633
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
634

    
635
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
636
    ERR("IPV6_LEAVE_GROUP");
637

    
638
  return 0;
639
}
640

    
641

    
642
/*
643
 *        IPv6 packet control messages
644
 */
645

    
646
/* Also standardized, in RFC 3542 */
647

    
648
/*
649
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
650
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
651
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
652
 * RFC and we use IPV6_PKTINFO.
653
 */
654
#ifndef IPV6_RECVPKTINFO
655
#define IPV6_RECVPKTINFO IPV6_PKTINFO
656
#endif
657
/*
658
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
659
 */
660
#ifndef IPV6_RECVHOPLIMIT
661
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
662
#endif
663

    
664

    
665
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
666
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
667

    
668
static inline int
669
sk_request_cmsg6_pktinfo(sock *s)
670
{
671
  int y = 1;
672

    
673
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
674
    ERR("IPV6_RECVPKTINFO");
675

    
676
  return 0;
677
}
678

    
679
static inline int
680
sk_request_cmsg6_ttl(sock *s)
681
{
682
  int y = 1;
683

    
684
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
685
    ERR("IPV6_RECVHOPLIMIT");
686

    
687
  return 0;
688
}
689

    
690
static inline void
691
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
692
{
693
  if (cm->cmsg_type == IPV6_PKTINFO)
694
  {
695
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
696
    s->laddr = ipa_from_in6(pi->ipi6_addr);
697
    s->lifindex = pi->ipi6_ifindex;
698
  }
699
}
700

    
701
static inline void
702
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
703
{
704
  if (cm->cmsg_type == IPV6_HOPLIMIT)
705
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
706
}
707

    
708
static inline void
709
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
710
{
711
  struct cmsghdr *cm;
712
  struct in6_pktinfo *pi;
713
  int controllen = 0;
714

    
715
  msg->msg_control = cbuf;
716
  msg->msg_controllen = cbuflen;
717

    
718
  cm = CMSG_FIRSTHDR(msg);
719
  cm->cmsg_level = SOL_IPV6;
720
  cm->cmsg_type = IPV6_PKTINFO;
721
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
722
  controllen += CMSG_SPACE(sizeof(*pi));
723

    
724
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
725
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
726
  pi->ipi6_addr = ipa_to_in6(s->saddr);
727

    
728
  msg->msg_controllen = controllen;
729
}
730

    
731

    
732
/*
733
 *        Miscellaneous socket syscalls
734
 */
735

    
736
static inline int
737
sk_set_ttl4(sock *s, int ttl)
738
{
739
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
740
    ERR("IP_TTL");
741

    
742
  return 0;
743
}
744

    
745
static inline int
746
sk_set_ttl6(sock *s, int ttl)
747
{
748
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
749
    ERR("IPV6_UNICAST_HOPS");
750

    
751
  return 0;
752
}
753

    
754
static inline int
755
sk_set_tos4(sock *s, int tos)
756
{
757
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
758
    ERR("IP_TOS");
759

    
760
  return 0;
761
}
762

    
763
static inline int
764
sk_set_tos6(sock *s, int tos)
765
{
766
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
767
    ERR("IPV6_TCLASS");
768

    
769
  return 0;
770
}
771

    
772
static inline int
773
sk_set_high_port(sock *s)
774
{
775
  /* Port range setting is optional, ignore it if not supported */
776

    
777
#ifdef IP_PORTRANGE
778
  if (sk_is_ipv4(s))
779
  {
780
    int range = IP_PORTRANGE_HIGH;
781
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
782
      ERR("IP_PORTRANGE");
783
  }
784
#endif
785

    
786
#ifdef IPV6_PORTRANGE
787
  if (sk_is_ipv6(s))
788
  {
789
    int range = IPV6_PORTRANGE_HIGH;
790
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
791
      ERR("IPV6_PORTRANGE");
792
  }
793
#endif
794

    
795
  return 0;
796
}
797

    
798
static inline byte *
799
sk_skip_ip_header(byte *pkt, int *len)
800
{
801
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
802
    return NULL;
803

    
804
  int hlen = (*pkt & 0x0f) * 4;
805
  if ((hlen < 20) || (hlen > *len))
806
    return NULL;
807

    
808
  *len -= hlen;
809
  return pkt + hlen;
810
}
811

    
812
byte *
813
sk_rx_buffer(sock *s, int *len)
814
{
815
  if (sk_is_ipv4(s) && (s->type == SK_IP))
816
    return sk_skip_ip_header(s->rbuf, len);
817
  else
818
    return s->rbuf;
819
}
820

    
821

    
822
/*
823
 *        Public socket functions
824
 */
825

    
826
/**
827
 * sk_setup_multicast - enable multicast for given socket
828
 * @s: socket
829
 *
830
 * Prepare transmission of multicast packets for given datagram socket.
831
 * The socket must have defined @iface.
832
 *
833
 * Result: 0 for success, -1 for an error.
834
 */
835

    
836
int
837
sk_setup_multicast(sock *s)
838
{
839
  ASSERT(s->iface);
840

    
841
  if (sk_is_ipv4(s))
842
    return sk_setup_multicast4(s);
843
  else
844
    return sk_setup_multicast6(s);
845
}
846

    
847
/**
848
 * sk_join_group - join multicast group for given socket
849
 * @s: socket
850
 * @maddr: multicast address
851
 *
852
 * Join multicast group for given datagram socket and associated interface.
853
 * The socket must have defined @iface.
854
 *
855
 * Result: 0 for success, -1 for an error.
856
 */
857

    
858
int
859
sk_join_group(sock *s, ip_addr maddr)
860
{
861
  if (sk_is_ipv4(s))
862
    return sk_join_group4(s, maddr);
863
  else
864
    return sk_join_group6(s, maddr);
865
}
866

    
867
/**
868
 * sk_leave_group - leave multicast group for given socket
869
 * @s: socket
870
 * @maddr: multicast address
871
 *
872
 * Leave multicast group for given datagram socket and associated interface.
873
 * The socket must have defined @iface.
874
 *
875
 * Result: 0 for success, -1 for an error.
876
 */
877

    
878
int
879
sk_leave_group(sock *s, ip_addr maddr)
880
{
881
  if (sk_is_ipv4(s))
882
    return sk_leave_group4(s, maddr);
883
  else
884
    return sk_leave_group6(s, maddr);
885
}
886

    
887
/**
888
 * sk_setup_broadcast - enable broadcast for given socket
889
 * @s: socket
890
 *
891
 * Allow reception and transmission of broadcast packets for given datagram
892
 * socket. The socket must have defined @iface. For transmission, packets should
893
 * be send to @brd address of @iface.
894
 *
895
 * Result: 0 for success, -1 for an error.
896
 */
897

    
898
int
899
sk_setup_broadcast(sock *s)
900
{
901
  int y = 1;
902

    
903
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
904
    ERR("SO_BROADCAST");
905

    
906
  return 0;
907
}
908

    
909
/**
910
 * sk_set_ttl - set transmit TTL for given socket
911
 * @s: socket
912
 * @ttl: TTL value
913
 *
914
 * Set TTL for already opened connections when TTL was not set before. Useful
915
 * for accepted connections when different ones should have different TTL.
916
 *
917
 * Result: 0 for success, -1 for an error.
918
 */
919

    
920
int
921
sk_set_ttl(sock *s, int ttl)
922
{
923
  s->ttl = ttl;
924

    
925
  if (sk_is_ipv4(s))
926
    return sk_set_ttl4(s, ttl);
927
  else
928
    return sk_set_ttl6(s, ttl);
929
}
930

    
931
/**
932
 * sk_set_min_ttl - set minimal accepted TTL for given socket
933
 * @s: socket
934
 * @ttl: TTL value
935
 *
936
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
937
 * implementations.
938
 *
939
 * Result: 0 for success, -1 for an error.
940
 */
941

    
942
int
943
sk_set_min_ttl(sock *s, int ttl)
944
{
945
  if (sk_is_ipv4(s))
946
    return sk_set_min_ttl4(s, ttl);
947
  else
948
    return sk_set_min_ttl6(s, ttl);
949
}
950

    
951
#if 0
952
/**
953
 * sk_set_md5_auth - add / remove MD5 security association for given socket
954
 * @s: socket
955
 * @local: IP address of local side
956
 * @remote: IP address of remote side
957
 * @ifa: Interface for link-local IP address
958
 * @passwd: Password used for MD5 authentication
959
 * @setkey: Update also system SA/SP database
960
 *
961
 * In TCP MD5 handling code in kernel, there is a set of security associations
962
 * used for choosing password and other authentication parameters according to
963
 * the local and remote address. This function is useful for listening socket,
964
 * for active sockets it may be enough to set s->password field.
965
 *
966
 * When called with passwd != NULL, the new pair is added,
967
 * When called with passwd == NULL, the existing pair is removed.
968
 *
969
 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
970
 * stored in global SA/SP database (but the behavior also must be enabled on
971
 * per-socket basis). In case of multiple sockets to the same neighbor, the
972
 * socket-specific state must be configured for each socket while global state
973
 * just once per src-dst pair. The @setkey argument controls whether the global
974
 * state (SA/SP database) is also updated.
975
 *
976
 * Result: 0 for success, -1 for an error.
977
 */
978

979
int
980
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
981
{ DUMMY; }
982
#endif
983

    
984
/**
985
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
986
 * @s: socket
987
 * @offset: offset
988
 *
989
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
990
 * kernel will automatically fill it for outgoing packets and check it for
991
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
992
 * known to the kernel.
993
 *
994
 * Result: 0 for success, -1 for an error.
995
 */
996

    
997
int
998
sk_set_ipv6_checksum(sock *s, int offset)
999
{
1000
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
1001
    ERR("IPV6_CHECKSUM");
1002

    
1003
  return 0;
1004
}
1005

    
1006
int
1007
sk_set_icmp6_filter(sock *s, int p1, int p2)
1008
{
1009
  /* a bit of lame interface, but it is here only for Radv */
1010
  struct icmp6_filter f;
1011

    
1012
  ICMP6_FILTER_SETBLOCKALL(&f);
1013
  ICMP6_FILTER_SETPASS(p1, &f);
1014
  ICMP6_FILTER_SETPASS(p2, &f);
1015

    
1016
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
1017
    ERR("ICMP6_FILTER");
1018

    
1019
  return 0;
1020
}
1021

    
1022
void
1023
sk_log_error(sock *s, const char *p)
1024
{
1025
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
1026
}
1027

    
1028

    
1029
/*
1030
 *        Actual struct birdsock code
1031
 */
1032

    
1033
static list sock_list;
1034
static struct birdsock *current_sock;
1035
static struct birdsock *stored_sock;
1036

    
1037
static inline sock *
1038
sk_next(sock *s)
1039
{
1040
  if (!s->n.next->next)
1041
    return NULL;
1042
  else
1043
    return SKIP_BACK(sock, n, s->n.next);
1044
}
1045

    
1046
static void
1047
sk_alloc_bufs(sock *s)
1048
{
1049
  if (!s->rbuf && s->rbsize)
1050
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
1051
  s->rpos = s->rbuf;
1052
  if (!s->tbuf && s->tbsize)
1053
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
1054
  s->tpos = s->ttx = s->tbuf;
1055
}
1056

    
1057
static void
1058
sk_free_bufs(sock *s)
1059
{
1060
  if (s->rbuf_alloc)
1061
  {
1062
    xfree(s->rbuf_alloc);
1063
    s->rbuf = s->rbuf_alloc = NULL;
1064
  }
1065
  if (s->tbuf_alloc)
1066
  {
1067
    xfree(s->tbuf_alloc);
1068
    s->tbuf = s->tbuf_alloc = NULL;
1069
  }
1070
}
1071

    
1072
static void
1073
sk_free(resource *r)
1074
{
1075
  sock *s = (sock *) r;
1076

    
1077
  sk_free_bufs(s);
1078
  if (s->fd >= 0)
1079
  {
1080
    close(s->fd);
1081

    
1082
    /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
1083
    if (s->flags & SKF_THREAD)
1084
      return;
1085

    
1086
    if (s == current_sock)
1087
      current_sock = sk_next(s);
1088
    if (s == stored_sock)
1089
      stored_sock = sk_next(s);
1090
    rem_node(&s->n);
1091
  }
1092
}
1093

    
1094
void
1095
sk_set_rbsize(sock *s, uint val)
1096
{
1097
  ASSERT(s->rbuf_alloc == s->rbuf);
1098

    
1099
  if (s->rbsize == val)
1100
    return;
1101

    
1102
  s->rbsize = val;
1103
  xfree(s->rbuf_alloc);
1104
  s->rbuf_alloc = xmalloc(val);
1105
  s->rpos = s->rbuf = s->rbuf_alloc;
1106
}
1107

    
1108
void
1109
sk_set_tbsize(sock *s, uint val)
1110
{
1111
  ASSERT(s->tbuf_alloc == s->tbuf);
1112

    
1113
  if (s->tbsize == val)
1114
    return;
1115

    
1116
  byte *old_tbuf = s->tbuf;
1117

    
1118
  s->tbsize = val;
1119
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
1120
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
1121
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
1122
}
1123

    
1124
void
1125
sk_set_tbuf(sock *s, void *tbuf)
1126
{
1127
  s->tbuf = tbuf ?: s->tbuf_alloc;
1128
  s->ttx = s->tpos = s->tbuf;
1129
}
1130

    
1131
void
1132
sk_reallocate(sock *s)
1133
{
1134
  sk_free_bufs(s);
1135
  sk_alloc_bufs(s);
1136
}
1137

    
1138
static void
1139
sk_dump(resource *r)
1140
{
1141
  sock *s = (sock *) r;
1142
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
1143

    
1144
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
1145
        sk_type_names[s->type],
1146
        s->data,
1147
        s->saddr,
1148
        s->sport,
1149
        s->daddr,
1150
        s->dport,
1151
        s->tos,
1152
        s->ttl,
1153
        s->iface ? s->iface->name : "none");
1154
}
1155

    
1156
static struct resclass sk_class = {
1157
  "Socket",
1158
  sizeof(sock),
1159
  sk_free,
1160
  sk_dump,
1161
  NULL,
1162
  NULL
1163
};
1164

    
1165
/**
1166
 * sk_new - create a socket
1167
 * @p: pool
1168
 *
1169
 * This function creates a new socket resource. If you want to use it,
1170
 * you need to fill in all the required fields of the structure and
1171
 * call sk_open() to do the actual opening of the socket.
1172
 *
1173
 * The real function name is sock_new(), sk_new() is a macro wrapper
1174
 * to avoid collision with OpenSSL.
1175
 */
1176
sock *
1177
sock_new(pool *p)
1178
{
1179
  sock *s = ralloc(p, &sk_class);
1180
  s->pool = p;
1181
  // s->saddr = s->daddr = IPA_NONE;
1182
  s->tos = s->priority = s->ttl = -1;
1183
  s->fd = -1;
1184
  return s;
1185
}
1186

    
1187
static int
1188
sk_setup(sock *s)
1189
{
1190
  int y = 1;
1191
  int fd = s->fd;
1192

    
1193
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1194
    ERR("O_NONBLOCK");
1195

    
1196
  if (!s->af)
1197
    return 0;
1198

    
1199
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
1200
    s->flags |= SKF_PKTINFO;
1201

    
1202
#ifdef CONFIG_USE_HDRINCL
1203
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
1204
  {
1205
    s->flags &= ~SKF_PKTINFO;
1206
    s->flags |= SKF_HDRINCL;
1207
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
1208
      ERR("IP_HDRINCL");
1209
  }
1210
#endif
1211

    
1212
  if (s->iface)
1213
  {
1214
#ifdef SO_BINDTODEVICE
1215
    struct ifreq ifr = {};
1216
    strcpy(ifr.ifr_name, s->iface->name);
1217
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
1218
      ERR("SO_BINDTODEVICE");
1219
#endif
1220

    
1221
#ifdef CONFIG_UNIX_DONTROUTE
1222
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
1223
      ERR("SO_DONTROUTE");
1224
#endif
1225
  }
1226

    
1227
  if (s->priority >= 0)
1228
    if (sk_set_priority(s, s->priority) < 0)
1229
      return -1;
1230

    
1231
  if (sk_is_ipv4(s))
1232
  {
1233
    if (s->flags & SKF_LADDR_RX)
1234
      if (sk_request_cmsg4_pktinfo(s) < 0)
1235
        return -1;
1236

    
1237
    if (s->flags & SKF_TTL_RX)
1238
      if (sk_request_cmsg4_ttl(s) < 0)
1239
        return -1;
1240

    
1241
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1242
      if (sk_disable_mtu_disc4(s) < 0)
1243
        return -1;
1244

    
1245
    if (s->ttl >= 0)
1246
      if (sk_set_ttl4(s, s->ttl) < 0)
1247
        return -1;
1248

    
1249
    if (s->tos >= 0)
1250
      if (sk_set_tos4(s, s->tos) < 0)
1251
        return -1;
1252
  }
1253

    
1254
  if (sk_is_ipv6(s))
1255
  {
1256
    if (s->flags & SKF_V6ONLY)
1257
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
1258
        ERR("IPV6_V6ONLY");
1259

    
1260
    if (s->flags & SKF_LADDR_RX)
1261
      if (sk_request_cmsg6_pktinfo(s) < 0)
1262
        return -1;
1263

    
1264
    if (s->flags & SKF_TTL_RX)
1265
      if (sk_request_cmsg6_ttl(s) < 0)
1266
        return -1;
1267

    
1268
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1269
      if (sk_disable_mtu_disc6(s) < 0)
1270
        return -1;
1271

    
1272
    if (s->ttl >= 0)
1273
      if (sk_set_ttl6(s, s->ttl) < 0)
1274
        return -1;
1275

    
1276
    if (s->tos >= 0)
1277
      if (sk_set_tos6(s, s->tos) < 0)
1278
        return -1;
1279
  }
1280

    
1281
  return 0;
1282
}
1283

    
1284
static void
1285
sk_insert(sock *s)
1286
{
1287
  add_tail(&sock_list, &s->n);
1288
}
1289

    
1290
static void
1291
sk_tcp_connected(sock *s)
1292
{
1293
  sockaddr sa;
1294
  int sa_len = sizeof(sa);
1295

    
1296
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1297
      (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1298
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1299

    
1300
  s->type = SK_TCP;
1301
  sk_alloc_bufs(s);
1302
  s->tx_hook(s);
1303
}
1304

    
1305
static int
1306
sk_passive_connected(sock *s, int type)
1307
{
1308
  sockaddr loc_sa, rem_sa;
1309
  int loc_sa_len = sizeof(loc_sa);
1310
  int rem_sa_len = sizeof(rem_sa);
1311

    
1312
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1313
  if (fd < 0)
1314
  {
1315
    if ((errno != EINTR) && (errno != EAGAIN))
1316
      s->err_hook(s, errno);
1317
    return 0;
1318
  }
1319

    
1320
  sock *t = sk_new(s->pool);
1321
  t->type = type;
1322
  t->fd = fd;
1323
  t->af = s->af;
1324
  t->ttl = s->ttl;
1325
  t->tos = s->tos;
1326
  t->rbsize = s->rbsize;
1327
  t->tbsize = s->tbsize;
1328

    
1329
  if (type == SK_TCP)
1330
  {
1331
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1332
        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1333
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1334

    
1335
    if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1336
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1337
  }
1338

    
1339
  if (sk_setup(t) < 0)
1340
  {
1341
    /* FIXME: Call err_hook instead ? */
1342
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1343

    
1344
    /* FIXME: handle it better in rfree() */
1345
    close(t->fd);
1346
    t->fd = -1;
1347
    rfree(t);
1348
    return 1;
1349
  }
1350

    
1351
  sk_insert(t);
1352
  sk_alloc_bufs(t);
1353
  s->rx_hook(t, 0);
1354
  return 1;
1355
}
1356

    
1357
/**
1358
 * sk_open - open a socket
1359
 * @s: socket
1360
 *
1361
 * This function takes a socket resource created by sk_new() and
1362
 * initialized by the user and binds a corresponding network connection
1363
 * to it.
1364
 *
1365
 * Result: 0 for success, -1 for an error.
1366
 */
1367
int
1368
sk_open(sock *s)
1369
{
1370
  int af = BIRD_AF;
1371
  int fd = -1;
1372
  int do_bind = 0;
1373
  int bind_port = 0;
1374
  ip_addr bind_addr = IPA_NONE;
1375
  sockaddr sa;
1376

    
1377
  switch (s->type)
1378
  {
1379
  case SK_TCP_ACTIVE:
1380
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1381
    /* Fall thru */
1382
  case SK_TCP_PASSIVE:
1383
    fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1384
    bind_port = s->sport;
1385
    bind_addr = s->saddr;
1386
    do_bind = bind_port || ipa_nonzero(bind_addr);
1387
    break;
1388

    
1389
  case SK_UDP:
1390
    fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1391
    bind_port = s->sport;
1392
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1393
    do_bind = 1;
1394
    break;
1395

    
1396
  case SK_IP:
1397
    fd = socket(af, SOCK_RAW, s->dport);
1398
    bind_port = 0;
1399
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1400
    do_bind = ipa_nonzero(bind_addr);
1401
    break;
1402

    
1403
  case SK_MAGIC:
1404
    af = 0;
1405
    fd = s->fd;
1406
    break;
1407

    
1408
  default:
1409
    bug("sk_open() called for invalid sock type %d", s->type);
1410
  }
1411

    
1412
  if (fd < 0)
1413
    ERR("socket");
1414

    
1415
  s->af = af;
1416
  s->fd = fd;
1417

    
1418
  if (sk_setup(s) < 0)
1419
    goto err;
1420

    
1421
  if (do_bind)
1422
  {
1423
    if (bind_port)
1424
    {
1425
      int y = 1;
1426

    
1427
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1428
        ERR2("SO_REUSEADDR");
1429

    
1430
#ifdef CONFIG_NO_IFACE_BIND
1431
      /* Workaround missing ability to bind to an iface */
1432
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1433
      {
1434
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1435
          ERR2("SO_REUSEPORT");
1436
      }
1437
#endif
1438
    }
1439
    else
1440
      if (s->flags & SKF_HIGH_PORT)
1441
        if (sk_set_high_port(s) < 0)
1442
          log(L_WARN "Socket error: %s%#m", s->err);
1443

    
1444
    sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
1445
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1446
      ERR2("bind");
1447
  }
1448

    
1449
  if (s->password)
1450
    if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
1451
      goto err;
1452

    
1453
  switch (s->type)
1454
  {
1455
  case SK_TCP_ACTIVE:
1456
    sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
1457
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1458
      sk_tcp_connected(s);
1459
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1460
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1461
      ERR2("connect");
1462
    break;
1463

    
1464
  case SK_TCP_PASSIVE:
1465
    if (listen(fd, 8) < 0)
1466
      ERR2("listen");
1467
    break;
1468

    
1469
  case SK_MAGIC:
1470
    break;
1471

    
1472
  default:
1473
    sk_alloc_bufs(s);
1474
  }
1475

    
1476
  if (!(s->flags & SKF_THREAD))
1477
    sk_insert(s);
1478
  return 0;
1479

    
1480
err:
1481
  close(fd);
1482
  s->fd = -1;
1483
  return -1;
1484
}
1485

    
1486
int
1487
sk_open_unix(sock *s, char *name)
1488
{
1489
  struct sockaddr_un sa;
1490
  int fd;
1491

    
1492
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1493

    
1494
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1495
  if (fd < 0)
1496
    return -1;
1497

    
1498
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1499
    return -1;
1500

    
1501
  /* Path length checked in test_old_bird() */
1502
  sa.sun_family = AF_UNIX;
1503
  strcpy(sa.sun_path, name);
1504

    
1505
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1506
    return -1;
1507

    
1508
  if (listen(fd, 8) < 0)
1509
    return -1;
1510

    
1511
  s->fd = fd;
1512
  sk_insert(s);
1513
  return 0;
1514
}
1515

    
1516

    
1517
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1518
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1519
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1520

    
1521
static void
1522
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1523
{
1524
  if (sk_is_ipv4(s))
1525
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1526
  else
1527
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1528
}
1529

    
1530
static void
1531
sk_process_cmsgs(sock *s, struct msghdr *msg)
1532
{
1533
  struct cmsghdr *cm;
1534

    
1535
  s->laddr = IPA_NONE;
1536
  s->lifindex = 0;
1537
  s->rcv_ttl = -1;
1538

    
1539
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1540
  {
1541
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1542
    {
1543
      sk_process_cmsg4_pktinfo(s, cm);
1544
      sk_process_cmsg4_ttl(s, cm);
1545
    }
1546

    
1547
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1548
    {
1549
      sk_process_cmsg6_pktinfo(s, cm);
1550
      sk_process_cmsg6_ttl(s, cm);
1551
    }
1552
  }
1553
}
1554

    
1555

    
1556
static inline int
1557
sk_sendmsg(sock *s)
1558
{
1559
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1560
  byte cmsg_buf[CMSG_TX_SPACE];
1561
  sockaddr dst;
1562

    
1563
  sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1564

    
1565
  struct msghdr msg = {
1566
    .msg_name = &dst.sa,
1567
    .msg_namelen = SA_LEN(dst),
1568
    .msg_iov = &iov,
1569
    .msg_iovlen = 1
1570
  };
1571

    
1572
#ifdef CONFIG_USE_HDRINCL
1573
  byte hdr[20];
1574
  struct iovec iov2[2] = { {hdr, 20}, iov };
1575

    
1576
  if (s->flags & SKF_HDRINCL)
1577
  {
1578
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1579
    msg.msg_iov = iov2;
1580
    msg.msg_iovlen = 2;
1581
  }
1582
#endif
1583

    
1584
  if (s->flags & SKF_PKTINFO)
1585
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1586

    
1587
  return sendmsg(s->fd, &msg, 0);
1588
}
1589

    
1590
static inline int
1591
sk_recvmsg(sock *s)
1592
{
1593
  struct iovec iov = {s->rbuf, s->rbsize};
1594
  byte cmsg_buf[CMSG_RX_SPACE];
1595
  sockaddr src;
1596

    
1597
  struct msghdr msg = {
1598
    .msg_name = &src.sa,
1599
    .msg_namelen = sizeof(src), // XXXX ??
1600
    .msg_iov = &iov,
1601
    .msg_iovlen = 1,
1602
    .msg_control = cmsg_buf,
1603
    .msg_controllen = sizeof(cmsg_buf),
1604
    .msg_flags = 0
1605
  };
1606

    
1607
  int rv = recvmsg(s->fd, &msg, 0);
1608
  if (rv < 0)
1609
    return rv;
1610

    
1611
  //ifdef IPV4
1612
  //  if (cf_type == SK_IP)
1613
  //    rv = ipv4_skip_header(pbuf, rv);
1614
  //endif
1615

    
1616
  sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1617
  sk_process_cmsgs(s, &msg);
1618

    
1619
  if (msg.msg_flags & MSG_TRUNC)
1620
    s->flags |= SKF_TRUNCATED;
1621
  else
1622
    s->flags &= ~SKF_TRUNCATED;
1623

    
1624
  return rv;
1625
}
1626

    
1627

    
1628
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1629

    
1630
static int
1631
sk_maybe_write(sock *s)
1632
{
1633
  int e;
1634

    
1635
  switch (s->type)
1636
  {
1637
  case SK_TCP:
1638
  case SK_MAGIC:
1639
  case SK_UNIX:
1640
    while (s->ttx != s->tpos)
1641
    {
1642
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1643

    
1644
      if (e < 0)
1645
      {
1646
        if (errno != EINTR && errno != EAGAIN)
1647
        {
1648
          reset_tx_buffer(s);
1649
          /* EPIPE is just a connection close notification during TX */
1650
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1651
          return -1;
1652
        }
1653
        return 0;
1654
      }
1655
      s->ttx += e;
1656
    }
1657
    reset_tx_buffer(s);
1658
    return 1;
1659

    
1660
  case SK_UDP:
1661
  case SK_IP:
1662
    {
1663
      if (s->tbuf == s->tpos)
1664
        return 1;
1665

    
1666
      e = sk_sendmsg(s);
1667

    
1668
      if (e < 0)
1669
      {
1670
        if (errno != EINTR && errno != EAGAIN)
1671
        {
1672
          reset_tx_buffer(s);
1673
          s->err_hook(s, errno);
1674
          return -1;
1675
        }
1676

    
1677
        if (!s->tx_hook)
1678
          reset_tx_buffer(s);
1679
        return 0;
1680
      }
1681
      reset_tx_buffer(s);
1682
      return 1;
1683
    }
1684
  default:
1685
    bug("sk_maybe_write: unknown socket type %d", s->type);
1686
  }
1687
}
1688

    
1689
int
1690
sk_rx_ready(sock *s)
1691
{
1692
  int rv;
1693
  struct pollfd pfd = { .fd = s->fd };
1694
  pfd.events |= POLLIN;
1695

    
1696
 redo:
1697
  rv = poll(&pfd, 1, 0);
1698

    
1699
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1700
    goto redo;
1701

    
1702
  return rv;
1703
}
1704

    
1705
/**
1706
 * sk_send - send data to a socket
1707
 * @s: socket
1708
 * @len: number of bytes to send
1709
 *
1710
 * This function sends @len bytes of data prepared in the
1711
 * transmit buffer of the socket @s to the network connection.
1712
 * If the packet can be sent immediately, it does so and returns
1713
 * 1, else it queues the packet for later processing, returns 0
1714
 * and calls the @tx_hook of the socket when the tranmission
1715
 * takes place.
1716
 */
1717
int
1718
sk_send(sock *s, unsigned len)
1719
{
1720
  s->ttx = s->tbuf;
1721
  s->tpos = s->tbuf + len;
1722
  return sk_maybe_write(s);
1723
}
1724

    
1725
/**
1726
 * sk_send_to - send data to a specific destination
1727
 * @s: socket
1728
 * @len: number of bytes to send
1729
 * @addr: IP address to send the packet to
1730
 * @port: port to send the packet to
1731
 *
1732
 * This is a sk_send() replacement for connection-less packet sockets
1733
 * which allows destination of the packet to be chosen dynamically.
1734
 * Raw IP sockets should use 0 for @port.
1735
 */
1736
int
1737
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1738
{
1739
  s->daddr = addr;
1740
  if (port)
1741
    s->dport = port;
1742

    
1743
  s->ttx = s->tbuf;
1744
  s->tpos = s->tbuf + len;
1745
  return sk_maybe_write(s);
1746
}
1747

    
1748
/*
1749
int
1750
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1751
             ip_addr saddr, ip_addr daddr, unsigned dport)
1752
{
1753
  s->iface = ifa;
1754
  s->saddr = saddr;
1755
  s->daddr = daddr;
1756
  s->dport = dport;
1757
  s->ttx = s->tbuf;
1758
  s->tpos = s->tbuf + len;
1759
  return sk_maybe_write(s);
1760
}
1761
*/
1762

    
1763
 /* sk_read() and sk_write() are called from BFD's event loop */
1764

    
1765
int
1766
sk_read(sock *s, int revents)
1767
{
1768
  switch (s->type)
1769
  {
1770
  case SK_TCP_PASSIVE:
1771
    return sk_passive_connected(s, SK_TCP);
1772

    
1773
  case SK_UNIX_PASSIVE:
1774
    return sk_passive_connected(s, SK_UNIX);
1775

    
1776
  case SK_TCP:
1777
  case SK_UNIX:
1778
    {
1779
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1780

    
1781
      if (c < 0)
1782
      {
1783
        if (errno != EINTR && errno != EAGAIN)
1784
          s->err_hook(s, errno);
1785
        else if (errno == EAGAIN && !(revents & POLLIN))
1786
        {
1787
          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
1788
          s->err_hook(s, 0);
1789
        }
1790
      }
1791
      else if (!c)
1792
        s->err_hook(s, 0);
1793
      else
1794
      {
1795
        s->rpos += c;
1796
        if (s->rx_hook(s, s->rpos - s->rbuf))
1797
        {
1798
          /* We need to be careful since the socket could have been deleted by the hook */
1799
          if (current_sock == s)
1800
            s->rpos = s->rbuf;
1801
        }
1802
        return 1;
1803
      }
1804
      return 0;
1805
    }
1806

    
1807
  case SK_MAGIC:
1808
    return s->rx_hook(s, 0);
1809

    
1810
  default:
1811
    {
1812
      int e = sk_recvmsg(s);
1813

    
1814
      if (e < 0)
1815
      {
1816
        if (errno != EINTR && errno != EAGAIN)
1817
          s->err_hook(s, errno);
1818
        return 0;
1819
      }
1820

    
1821
      s->rpos = s->rbuf + e;
1822
      s->rx_hook(s, e);
1823
      return 1;
1824
    }
1825
  }
1826
}
1827

    
1828
int
1829
sk_write(sock *s)
1830
{
1831
  switch (s->type)
1832
  {
1833
  case SK_TCP_ACTIVE:
1834
    {
1835
      sockaddr sa;
1836
      sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1837

    
1838
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1839
        sk_tcp_connected(s);
1840
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1841
        s->err_hook(s, errno);
1842
      return 0;
1843
    }
1844

    
1845
  default:
1846
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1847
    {
1848
      if (s->tx_hook)
1849
        s->tx_hook(s);
1850
      return 1;
1851
    }
1852
    return 0;
1853
  }
1854
}
1855

    
1856
void
1857
sk_dump_all(void)
1858
{
1859
  node *n;
1860
  sock *s;
1861

    
1862
  debug("Open sockets:\n");
1863
  WALK_LIST(n, sock_list)
1864
  {
1865
    s = SKIP_BACK(sock, n, n);
1866
    debug("%p ", s);
1867
    sk_dump(&s->r);
1868
  }
1869
  debug("\n");
1870
}
1871

    
1872

    
1873
/*
1874
 *        Internal event log and watchdog
1875
 */
1876

    
1877
#define EVENT_LOG_LENGTH 32
1878

    
1879
struct event_log_entry
1880
{
1881
  void *hook;
1882
  void *data;
1883
  btime timestamp;
1884
  btime duration;
1885
};
1886

    
1887
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1888
static struct event_log_entry *event_open;
1889
static int event_log_pos, event_log_num, watchdog_active;
1890
static btime last_time;
1891
static btime loop_time;
1892

    
1893
static void
1894
io_update_time(void)
1895
{
1896
  struct timespec ts;
1897
  int rv;
1898

    
1899
  if (!clock_monotonic_available)
1900
    return;
1901

    
1902
  /*
1903
   * This is third time-tracking procedure (after update_times() above and
1904
   * times_update() in BFD), dedicated to internal event log and latency
1905
   * tracking. Hopefully, we consolidate these sometimes.
1906
   */
1907

    
1908
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1909
  if (rv < 0)
1910
    die("clock_gettime: %m");
1911

    
1912
  last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
1913

    
1914
  if (event_open)
1915
  {
1916
    event_open->duration = last_time - event_open->timestamp;
1917

    
1918
    if (event_open->duration > config->latency_limit)
1919
      log(L_WARN "Event 0x%p 0x%p took %d ms",
1920
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
1921

    
1922
    event_open = NULL;
1923
  }
1924
}
1925

    
1926
/**
1927
 * io_log_event - mark approaching event into event log
1928
 * @hook: event hook address
1929
 * @data: event data address
1930
 *
1931
 * Store info (hook, data, timestamp) about the following internal event into
1932
 * a circular event log (@event_log). When latency tracking is enabled, the log
1933
 * entry is kept open (in @event_open) so the duration can be filled later.
1934
 */
1935
void
1936
io_log_event(void *hook, void *data)
1937
{
1938
  if (config->latency_debug)
1939
    io_update_time();
1940

    
1941
  struct event_log_entry *en = event_log + event_log_pos;
1942

    
1943
  en->hook = hook;
1944
  en->data = data;
1945
  en->timestamp = last_time;
1946
  en->duration = 0;
1947

    
1948
  event_log_num++;
1949
  event_log_pos++;
1950
  event_log_pos %= EVENT_LOG_LENGTH;
1951

    
1952
  event_open = config->latency_debug ? en : NULL;
1953
}
1954

    
1955
static inline void
1956
io_close_event(void)
1957
{
1958
  if (event_open)
1959
    io_update_time();
1960
}
1961

    
1962
void
1963
io_log_dump(void)
1964
{
1965
  int i;
1966

    
1967
  log(L_DEBUG "Event log:");
1968
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
1969
  {
1970
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
1971
    if (en->hook)
1972
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
1973
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
1974
  }
1975
}
1976

    
1977
void
1978
watchdog_sigalrm(int sig UNUSED)
1979
{
1980
  /* Update last_time and duration, but skip latency check */
1981
  config->latency_limit = 0xffffffff;
1982
  io_update_time();
1983

    
1984
  /* We want core dump */
1985
  abort();
1986
}
1987

    
1988
static inline void
1989
watchdog_start1(void)
1990
{
1991
  io_update_time();
1992

    
1993
  loop_time = last_time;
1994
}
1995

    
1996
static inline void
1997
watchdog_start(void)
1998
{
1999
  io_update_time();
2000

    
2001
  loop_time = last_time;
2002
  event_log_num = 0;
2003

    
2004
  if (config->watchdog_timeout)
2005
  {
2006
    alarm(config->watchdog_timeout);
2007
    watchdog_active = 1;
2008
  }
2009
}
2010

    
2011
static inline void
2012
watchdog_stop(void)
2013
{
2014
  io_update_time();
2015

    
2016
  if (watchdog_active)
2017
  {
2018
    alarm(0);
2019
    watchdog_active = 0;
2020
  }
2021

    
2022
  btime duration = last_time - loop_time;
2023
  if (duration > config->watchdog_warning)
2024
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2025
        (int) (duration TO_MS), event_log_num);
2026
}
2027

    
2028

    
2029
/*
2030
 *        Main I/O Loop
2031
 */
2032

    
2033
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2034
volatile int async_dump_flag;
2035

    
2036
void
2037
io_init(void)
2038
{
2039
  init_list(&near_timers);
2040
  init_list(&far_timers);
2041
  init_list(&sock_list);
2042
  init_list(&global_event_list);
2043
  krt_io_init();
2044
  init_times();
2045
  update_times();
2046
  boot_time = now;
2047
  srandom((int) now_real);
2048
}
2049

    
2050
static int short_loops = 0;
2051
#define SHORT_LOOP_MAX 10
2052

    
2053
void
2054
io_loop(void)
2055
{
2056
  int poll_tout;
2057
  time_t tout;
2058
  int nfds, events, pout;
2059
  sock *s;
2060
  node *n;
2061
  int fdmax = 256;
2062
  struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
2063

    
2064
  watchdog_start1();
2065
  for(;;)
2066
    {
2067
      events = ev_run_list(&global_event_list);
2068
    timers:
2069
      update_times();
2070
      tout = tm_first_shot();
2071
      if (tout <= now)
2072
        {
2073
          tm_shot();
2074
          goto timers;
2075
        }
2076
      poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
2077

    
2078
      io_close_event();
2079

    
2080
      nfds = 0;
2081
      WALK_LIST(n, sock_list)
2082
        {
2083
          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
2084
          s = SKIP_BACK(sock, n, n);
2085
          if (s->rx_hook)
2086
            {
2087
              pfd[nfds].fd = s->fd;
2088
              pfd[nfds].events |= POLLIN;
2089
            }
2090
          if (s->tx_hook && s->ttx != s->tpos)
2091
            {
2092
              pfd[nfds].fd = s->fd;
2093
              pfd[nfds].events |= POLLOUT;
2094
            }
2095
          if (pfd[nfds].fd != -1)
2096
            {
2097
              s->index = nfds;
2098
              nfds++;
2099
            }
2100
          else
2101
            s->index = -1;
2102

    
2103
          if (nfds >= fdmax)
2104
            {
2105
              fdmax *= 2;
2106
              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2107
            }
2108
        }
2109

    
2110
      /*
2111
       * Yes, this is racy. But even if the signal comes before this test
2112
       * and entering poll(), it gets caught on the next timer tick.
2113
       */
2114

    
2115
      if (async_config_flag)
2116
        {
2117
          io_log_event(async_config, NULL);
2118
          async_config();
2119
          async_config_flag = 0;
2120
          continue;
2121
        }
2122
      if (async_dump_flag)
2123
        {
2124
          io_log_event(async_dump, NULL);
2125
          async_dump();
2126
          async_dump_flag = 0;
2127
          continue;
2128
        }
2129
      if (async_shutdown_flag)
2130
        {
2131
          io_log_event(async_shutdown, NULL);
2132
          async_shutdown();
2133
          async_shutdown_flag = 0;
2134
          continue;
2135
        }
2136

    
2137
      /* And finally enter poll() to find active sockets */
2138
      watchdog_stop();
2139
      pout = poll(pfd, nfds, poll_tout);
2140
      watchdog_start();
2141

    
2142
      if (pout < 0)
2143
        {
2144
          if (errno == EINTR || errno == EAGAIN)
2145
            continue;
2146
          die("poll: %m");
2147
        }
2148
      if (pout)
2149
        {
2150
          /* guaranteed to be non-empty */
2151
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2152

    
2153
          while (current_sock)
2154
            {
2155
              sock *s = current_sock;
2156
              if (s->index == -1)
2157
                {
2158
                  current_sock = sk_next(s);
2159
                  goto next;
2160
                }
2161

    
2162
              int e;
2163
              int steps;
2164

    
2165
              steps = MAX_STEPS;
2166
              if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
2167
                do
2168
                  {
2169
                    steps--;
2170
                    io_log_event(s->rx_hook, s->data);
2171
                    e = sk_read(s, pfd[s->index].revents);
2172
                    if (s != current_sock)
2173
                      goto next;
2174
                  }
2175
                while (e && s->rx_hook && steps);
2176

    
2177
              steps = MAX_STEPS;
2178
              if (pfd[s->index].revents & POLLOUT)
2179
                do
2180
                  {
2181
                    steps--;
2182
                    io_log_event(s->tx_hook, s->data);
2183
                    e = sk_write(s);
2184
                    if (s != current_sock)
2185
                      goto next;
2186
                  }
2187
                while (e && steps);
2188
              current_sock = sk_next(s);
2189
            next: ;
2190
            }
2191

    
2192
          short_loops++;
2193
          if (events && (short_loops < SHORT_LOOP_MAX))
2194
            continue;
2195
          short_loops = 0;
2196

    
2197
          int count = 0;
2198
          current_sock = stored_sock;
2199
          if (current_sock == NULL)
2200
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2201

    
2202
          while (current_sock && count < MAX_RX_STEPS)
2203
            {
2204
              sock *s = current_sock;
2205
              if (s->index == -1)
2206
                {
2207
                  current_sock = sk_next(s);
2208
                  goto next2;
2209
                }
2210

    
2211
              if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
2212
                {
2213
                  count++;
2214
                  io_log_event(s->rx_hook, s->data);
2215
                  sk_read(s, pfd[s->index].revents);
2216
                  if (s != current_sock)
2217
                      goto next2;
2218
                }
2219
              current_sock = sk_next(s);
2220
            next2: ;
2221
            }
2222

    
2223
          stored_sock = current_sock;
2224
        }
2225
    }
2226
}
2227

    
2228
void
2229
test_old_bird(char *path)
2230
{
2231
  int fd;
2232
  struct sockaddr_un sa;
2233

    
2234
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2235
  if (fd < 0)
2236
    die("Cannot create socket: %m");
2237
  if (strlen(path) >= sizeof(sa.sun_path))
2238
    die("Socket path too long");
2239
  bzero(&sa, sizeof(sa));
2240
  sa.sun_family = AF_UNIX;
2241
  strcpy(sa.sun_path, path);
2242
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2243
    die("I found another BIRD running.");
2244
  close(fd);
2245
}