Statistics
| Branch: | Revision:

iof-bird-daemon / sysdep / unix / io.c @ 574b2324

History | View | Annotate | Download (47.4 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#ifndef _GNU_SOURCE
13
#define _GNU_SOURCE
14
#endif
15

    
16
#include <stdio.h>
17
#include <stdlib.h>
18
#include <time.h>
19
#include <sys/time.h>
20
#include <sys/types.h>
21
#include <sys/socket.h>
22
#include <sys/uio.h>
23
#include <sys/un.h>
24
#include <poll.h>
25
#include <unistd.h>
26
#include <fcntl.h>
27
#include <errno.h>
28
#include <net/if.h>
29
#include <netinet/in.h>
30
#include <netinet/tcp.h>
31
#include <netinet/udp.h>
32
#include <netinet/icmp6.h>
33

    
34
#include "nest/bird.h"
35
#include "lib/lists.h"
36
#include "lib/resource.h"
37
#include "sysdep/unix/timer.h"
38
#include "lib/socket.h"
39
#include "lib/event.h"
40
#include "lib/timer.h"
41
#include "lib/string.h"
42
#include "nest/iface.h"
43
#include "conf/conf.h"
44

    
45
#include "sysdep/unix/unix.h"
46
#include CONFIG_INCLUDE_SYSIO_H
47

    
48
/* Maximum number of calls of tx handler for one socket in one
49
 * poll iteration. Should be small enough to not monopolize CPU by
50
 * one protocol instance.
51
 */
52
#define MAX_STEPS 4
53

    
54
/* Maximum number of calls of rx handler for all sockets in one poll
55
   iteration. RX callbacks are often much more costly so we limit
56
   this to gen small latencies */
57
#define MAX_RX_STEPS 4
58

    
59
/*
60
 *        Tracked Files
61
 */
62

    
63
struct rfile {
64
  resource r;
65
  FILE *f;
66
};
67

    
68
static void
69
rf_free(resource *r)
70
{
71
  struct rfile *a = (struct rfile *) r;
72

    
73
  fclose(a->f);
74
}
75

    
76
static void
77
rf_dump(resource *r)
78
{
79
  struct rfile *a = (struct rfile *) r;
80

    
81
  debug("(FILE *%p)\n", a->f);
82
}
83

    
84
static struct resclass rf_class = {
85
  "FILE",
86
  sizeof(struct rfile),
87
  rf_free,
88
  rf_dump,
89
  NULL,
90
  NULL
91
};
92

    
93
void *
94
tracked_fopen(pool *p, char *name, char *mode)
95
{
96
  FILE *f = fopen(name, mode);
97

    
98
  if (f)
99
    {
100
      struct rfile *r = ralloc(p, &rf_class);
101
      r->f = f;
102
    }
103
  return f;
104
}
105

    
106

    
107
/*
108
 *        Time clock
109
 */
110

    
111
btime boot_time;
112

    
113
void
114
times_init(struct timeloop *loop)
115
{
116
  struct timespec ts;
117
  int rv;
118

    
119
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
120
  if (rv < 0)
121
    die("Monotonic clock is missing");
122

    
123
  if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40)))
124
    log(L_WARN "Monotonic clock is crazy");
125

    
126
  loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
127
  loop->real_time = 0;
128
}
129

    
130
void
131
times_update(struct timeloop *loop)
132
{
133
  struct timespec ts;
134
  int rv;
135

    
136
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
137
  if (rv < 0)
138
    die("clock_gettime: %m");
139

    
140
  btime new_time = ts.tv_sec S + ts.tv_nsec NS;
141

    
142
  if (new_time < loop->last_time)
143
    log(L_ERR "Monotonic clock is broken");
144

    
145
  loop->last_time = new_time;
146
  loop->real_time = 0;
147
}
148

    
149
void
150
times_update_real_time(struct timeloop *loop)
151
{
152
  struct timespec ts;
153
  int rv;
154

    
155
  rv = clock_gettime(CLOCK_REALTIME, &ts);
156
  if (rv < 0)
157
    die("clock_gettime: %m");
158

    
159
  loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
160
}
161

    
162

    
163
/**
164
 * DOC: Sockets
165
 *
166
 * Socket resources represent network connections. Their data structure (&socket)
167
 * contains a lot of fields defining the exact type of the socket, the local and
168
 * remote addresses and ports, pointers to socket buffers and finally pointers to
169
 * hook functions to be called when new data have arrived to the receive buffer
170
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
171
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
172
 *
173
 * Freeing of sockets from inside socket hooks is perfectly safe.
174
 */
175

    
176
#ifndef SOL_IP
177
#define SOL_IP IPPROTO_IP
178
#endif
179

    
180
#ifndef SOL_IPV6
181
#define SOL_IPV6 IPPROTO_IPV6
182
#endif
183

    
184
#ifndef SOL_ICMPV6
185
#define SOL_ICMPV6 IPPROTO_ICMPV6
186
#endif
187

    
188

    
189
/*
190
 *        Sockaddr helper functions
191
 */
192

    
193
static inline int UNUSED sockaddr_length(int af)
194
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
195

    
196
static inline void
197
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
198
{
199
  memset(sa, 0, sizeof(struct sockaddr_in));
200
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
201
  sa->sin_len = sizeof(struct sockaddr_in);
202
#endif
203
  sa->sin_family = AF_INET;
204
  sa->sin_port = htons(port);
205
  sa->sin_addr = ipa_to_in4(a);
206
}
207

    
208
static inline void
209
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
210
{
211
  memset(sa, 0, sizeof(struct sockaddr_in6));
212
#ifdef SIN6_LEN
213
  sa->sin6_len = sizeof(struct sockaddr_in6);
214
#endif
215
  sa->sin6_family = AF_INET6;
216
  sa->sin6_port = htons(port);
217
  sa->sin6_flowinfo = 0;
218
  sa->sin6_addr = ipa_to_in6(a);
219

    
220
  if (ifa && ipa_is_link_local(a))
221
    sa->sin6_scope_id = ifa->index;
222
}
223

    
224
void
225
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
226
{
227
  if (af == AF_INET)
228
    sockaddr_fill4((struct sockaddr_in *) sa, a, port);
229
  else if (af == AF_INET6)
230
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
231
  else
232
    bug("Unknown AF");
233
}
234

    
235
static inline void
236
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
237
{
238
  *port = ntohs(sa->sin_port);
239
  *a = ipa_from_in4(sa->sin_addr);
240
}
241

    
242
static inline void
243
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
244
{
245
  *port = ntohs(sa->sin6_port);
246
  *a = ipa_from_in6(sa->sin6_addr);
247

    
248
  if (ifa && ipa_is_link_local(*a))
249
    *ifa = if_find_by_index(sa->sin6_scope_id);
250
}
251

    
252
int
253
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
254
{
255
  if (sa->sa.sa_family != af)
256
    goto fail;
257

    
258
  if (af == AF_INET)
259
    sockaddr_read4((struct sockaddr_in *) sa, a, port);
260
  else if (af == AF_INET6)
261
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
262
  else
263
    goto fail;
264

    
265
  return 0;
266

    
267
 fail:
268
  *a = IPA_NONE;
269
  *port = 0;
270
  return -1;
271
}
272

    
273

    
274
/*
275
 *        IPv6 multicast syscalls
276
 */
277

    
278
/* Fortunately standardized in RFC 3493 */
279

    
280
#define INIT_MREQ6(maddr,ifa) \
281
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
282

    
283
static inline int
284
sk_setup_multicast6(sock *s)
285
{
286
  int index = s->iface->index;
287
  int ttl = s->ttl;
288
  int n = 0;
289

    
290
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
291
    ERR("IPV6_MULTICAST_IF");
292

    
293
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
294
    ERR("IPV6_MULTICAST_HOPS");
295

    
296
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
297
    ERR("IPV6_MULTICAST_LOOP");
298

    
299
  return 0;
300
}
301

    
302
static inline int
303
sk_join_group6(sock *s, ip_addr maddr)
304
{
305
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
306

    
307
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
308
    ERR("IPV6_JOIN_GROUP");
309

    
310
  return 0;
311
}
312

    
313
static inline int
314
sk_leave_group6(sock *s, ip_addr maddr)
315
{
316
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
317

    
318
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
319
    ERR("IPV6_LEAVE_GROUP");
320

    
321
  return 0;
322
}
323

    
324

    
325
/*
326
 *        IPv6 packet control messages
327
 */
328

    
329
/* Also standardized, in RFC 3542 */
330

    
331
/*
332
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
333
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
334
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
335
 * RFC and we use IPV6_PKTINFO.
336
 */
337
#ifndef IPV6_RECVPKTINFO
338
#define IPV6_RECVPKTINFO IPV6_PKTINFO
339
#endif
340
/*
341
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
342
 */
343
#ifndef IPV6_RECVHOPLIMIT
344
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
345
#endif
346

    
347

    
348
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
349
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
350

    
351
static inline int
352
sk_request_cmsg6_pktinfo(sock *s)
353
{
354
  int y = 1;
355

    
356
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
357
    ERR("IPV6_RECVPKTINFO");
358

    
359
  return 0;
360
}
361

    
362
static inline int
363
sk_request_cmsg6_ttl(sock *s)
364
{
365
  int y = 1;
366

    
367
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
368
    ERR("IPV6_RECVHOPLIMIT");
369

    
370
  return 0;
371
}
372

    
373
static inline void
374
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
375
{
376
  if (cm->cmsg_type == IPV6_PKTINFO)
377
  {
378
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
379
    s->laddr = ipa_from_in6(pi->ipi6_addr);
380
    s->lifindex = pi->ipi6_ifindex;
381
  }
382
}
383

    
384
static inline void
385
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
386
{
387
  if (cm->cmsg_type == IPV6_HOPLIMIT)
388
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
389
}
390

    
391
static inline void
392
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
393
{
394
  struct cmsghdr *cm;
395
  struct in6_pktinfo *pi;
396
  int controllen = 0;
397

    
398
  msg->msg_control = cbuf;
399
  msg->msg_controllen = cbuflen;
400

    
401
  cm = CMSG_FIRSTHDR(msg);
402
  cm->cmsg_level = SOL_IPV6;
403
  cm->cmsg_type = IPV6_PKTINFO;
404
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
405
  controllen += CMSG_SPACE(sizeof(*pi));
406

    
407
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
408
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
409
  pi->ipi6_addr = ipa_to_in6(s->saddr);
410

    
411
  msg->msg_controllen = controllen;
412
}
413

    
414

    
415
/*
416
 *        Miscellaneous socket syscalls
417
 */
418

    
419
static inline int
420
sk_set_ttl4(sock *s, int ttl)
421
{
422
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
423
    ERR("IP_TTL");
424

    
425
  return 0;
426
}
427

    
428
static inline int
429
sk_set_ttl6(sock *s, int ttl)
430
{
431
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
432
    ERR("IPV6_UNICAST_HOPS");
433

    
434
  return 0;
435
}
436

    
437
static inline int
438
sk_set_tos4(sock *s, int tos)
439
{
440
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
441
    ERR("IP_TOS");
442

    
443
  return 0;
444
}
445

    
446
static inline int
447
sk_set_tos6(sock *s, int tos)
448
{
449
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
450
    ERR("IPV6_TCLASS");
451

    
452
  return 0;
453
}
454

    
455
static inline int
456
sk_set_high_port(sock *s UNUSED)
457
{
458
  /* Port range setting is optional, ignore it if not supported */
459

    
460
#ifdef IP_PORTRANGE
461
  if (sk_is_ipv4(s))
462
  {
463
    int range = IP_PORTRANGE_HIGH;
464
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
465
      ERR("IP_PORTRANGE");
466
  }
467
#endif
468

    
469
#ifdef IPV6_PORTRANGE
470
  if (sk_is_ipv6(s))
471
  {
472
    int range = IPV6_PORTRANGE_HIGH;
473
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
474
      ERR("IPV6_PORTRANGE");
475
  }
476
#endif
477

    
478
  return 0;
479
}
480

    
481
static inline byte *
482
sk_skip_ip_header(byte *pkt, int *len)
483
{
484
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
485
    return NULL;
486

    
487
  int hlen = (*pkt & 0x0f) * 4;
488
  if ((hlen < 20) || (hlen > *len))
489
    return NULL;
490

    
491
  *len -= hlen;
492
  return pkt + hlen;
493
}
494

    
495
byte *
496
sk_rx_buffer(sock *s, int *len)
497
{
498
  if (sk_is_ipv4(s) && (s->type == SK_IP))
499
    return sk_skip_ip_header(s->rbuf, len);
500
  else
501
    return s->rbuf;
502
}
503

    
504

    
505
/*
506
 *        Public socket functions
507
 */
508

    
509
/**
510
 * sk_setup_multicast - enable multicast for given socket
511
 * @s: socket
512
 *
513
 * Prepare transmission of multicast packets for given datagram socket.
514
 * The socket must have defined @iface.
515
 *
516
 * Result: 0 for success, -1 for an error.
517
 */
518

    
519
int
520
sk_setup_multicast(sock *s)
521
{
522
  ASSERT(s->iface);
523

    
524
  if (sk_is_ipv4(s))
525
    return sk_setup_multicast4(s);
526
  else
527
    return sk_setup_multicast6(s);
528
}
529

    
530
/**
531
 * sk_join_group - join multicast group for given socket
532
 * @s: socket
533
 * @maddr: multicast address
534
 *
535
 * Join multicast group for given datagram socket and associated interface.
536
 * The socket must have defined @iface.
537
 *
538
 * Result: 0 for success, -1 for an error.
539
 */
540

    
541
int
542
sk_join_group(sock *s, ip_addr maddr)
543
{
544
  if (sk_is_ipv4(s))
545
    return sk_join_group4(s, maddr);
546
  else
547
    return sk_join_group6(s, maddr);
548
}
549

    
550
/**
551
 * sk_leave_group - leave multicast group for given socket
552
 * @s: socket
553
 * @maddr: multicast address
554
 *
555
 * Leave multicast group for given datagram socket and associated interface.
556
 * The socket must have defined @iface.
557
 *
558
 * Result: 0 for success, -1 for an error.
559
 */
560

    
561
int
562
sk_leave_group(sock *s, ip_addr maddr)
563
{
564
  if (sk_is_ipv4(s))
565
    return sk_leave_group4(s, maddr);
566
  else
567
    return sk_leave_group6(s, maddr);
568
}
569

    
570
/**
571
 * sk_setup_broadcast - enable broadcast for given socket
572
 * @s: socket
573
 *
574
 * Allow reception and transmission of broadcast packets for given datagram
575
 * socket. The socket must have defined @iface. For transmission, packets should
576
 * be send to @brd address of @iface.
577
 *
578
 * Result: 0 for success, -1 for an error.
579
 */
580

    
581
int
582
sk_setup_broadcast(sock *s)
583
{
584
  int y = 1;
585

    
586
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
587
    ERR("SO_BROADCAST");
588

    
589
  return 0;
590
}
591

    
592
/**
593
 * sk_set_ttl - set transmit TTL for given socket
594
 * @s: socket
595
 * @ttl: TTL value
596
 *
597
 * Set TTL for already opened connections when TTL was not set before. Useful
598
 * for accepted connections when different ones should have different TTL.
599
 *
600
 * Result: 0 for success, -1 for an error.
601
 */
602

    
603
int
604
sk_set_ttl(sock *s, int ttl)
605
{
606
  s->ttl = ttl;
607

    
608
  if (sk_is_ipv4(s))
609
    return sk_set_ttl4(s, ttl);
610
  else
611
    return sk_set_ttl6(s, ttl);
612
}
613

    
614
/**
615
 * sk_set_min_ttl - set minimal accepted TTL for given socket
616
 * @s: socket
617
 * @ttl: TTL value
618
 *
619
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
620
 * implementations.
621
 *
622
 * Result: 0 for success, -1 for an error.
623
 */
624

    
625
int
626
sk_set_min_ttl(sock *s, int ttl)
627
{
628
  if (sk_is_ipv4(s))
629
    return sk_set_min_ttl4(s, ttl);
630
  else
631
    return sk_set_min_ttl6(s, ttl);
632
}
633

    
634
#if 0
635
/**
636
 * sk_set_md5_auth - add / remove MD5 security association for given socket
637
 * @s: socket
638
 * @local: IP address of local side
639
 * @remote: IP address of remote side
640
 * @ifa: Interface for link-local IP address
641
 * @passwd: Password used for MD5 authentication
642
 * @setkey: Update also system SA/SP database
643
 *
644
 * In TCP MD5 handling code in kernel, there is a set of security associations
645
 * used for choosing password and other authentication parameters according to
646
 * the local and remote address. This function is useful for listening socket,
647
 * for active sockets it may be enough to set s->password field.
648
 *
649
 * When called with passwd != NULL, the new pair is added,
650
 * When called with passwd == NULL, the existing pair is removed.
651
 *
652
 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
653
 * stored in global SA/SP database (but the behavior also must be enabled on
654
 * per-socket basis). In case of multiple sockets to the same neighbor, the
655
 * socket-specific state must be configured for each socket while global state
656
 * just once per src-dst pair. The @setkey argument controls whether the global
657
 * state (SA/SP database) is also updated.
658
 *
659
 * Result: 0 for success, -1 for an error.
660
 */
661

662
int
663
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
664
{ DUMMY; }
665
#endif
666

    
667
/**
668
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
669
 * @s: socket
670
 * @offset: offset
671
 *
672
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
673
 * kernel will automatically fill it for outgoing packets and check it for
674
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
675
 * known to the kernel.
676
 *
677
 * Result: 0 for success, -1 for an error.
678
 */
679

    
680
int
681
sk_set_ipv6_checksum(sock *s, int offset)
682
{
683
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
684
    ERR("IPV6_CHECKSUM");
685

    
686
  return 0;
687
}
688

    
689
int
690
sk_set_icmp6_filter(sock *s, int p1, int p2)
691
{
692
  /* a bit of lame interface, but it is here only for Radv */
693
  struct icmp6_filter f;
694

    
695
  ICMP6_FILTER_SETBLOCKALL(&f);
696
  ICMP6_FILTER_SETPASS(p1, &f);
697
  ICMP6_FILTER_SETPASS(p2, &f);
698

    
699
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
700
    ERR("ICMP6_FILTER");
701

    
702
  return 0;
703
}
704

    
705
void
706
sk_log_error(sock *s, const char *p)
707
{
708
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
709
}
710

    
711

    
712
/*
713
 *        Actual struct birdsock code
714
 */
715

    
716
static list sock_list;
717
static struct birdsock *current_sock;
718
static struct birdsock *stored_sock;
719

    
720
static inline sock *
721
sk_next(sock *s)
722
{
723
  if (!s->n.next->next)
724
    return NULL;
725
  else
726
    return SKIP_BACK(sock, n, s->n.next);
727
}
728

    
729
static void
730
sk_alloc_bufs(sock *s)
731
{
732
  if (!s->rbuf && s->rbsize)
733
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
734
  s->rpos = s->rbuf;
735
  if (!s->tbuf && s->tbsize)
736
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
737
  s->tpos = s->ttx = s->tbuf;
738
}
739

    
740
static void
741
sk_free_bufs(sock *s)
742
{
743
  if (s->rbuf_alloc)
744
  {
745
    xfree(s->rbuf_alloc);
746
    s->rbuf = s->rbuf_alloc = NULL;
747
  }
748
  if (s->tbuf_alloc)
749
  {
750
    xfree(s->tbuf_alloc);
751
    s->tbuf = s->tbuf_alloc = NULL;
752
  }
753
}
754

    
755
#ifdef HAVE_LIBSSH
756
static void
757
sk_ssh_free(sock *s)
758
{
759
  struct ssh_sock *ssh = s->ssh;
760

    
761
  if (s->ssh == NULL)
762
    return;
763

    
764
  s->ssh = NULL;
765

    
766
  if (ssh->channel)
767
  {
768
    if (ssh_channel_is_open(ssh->channel))
769
      ssh_channel_close(ssh->channel);
770
    ssh_channel_free(ssh->channel);
771
    ssh->channel = NULL;
772
  }
773

    
774
  if (ssh->session)
775
  {
776
    ssh_disconnect(ssh->session);
777
    ssh_free(ssh->session);
778
    ssh->session = NULL;
779
  }
780
}
781
#endif
782

    
783
static void
784
sk_free(resource *r)
785
{
786
  sock *s = (sock *) r;
787

    
788
  sk_free_bufs(s);
789

    
790
#ifdef HAVE_LIBSSH
791
  if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
792
    sk_ssh_free(s);
793
#endif
794

    
795
  if (s->fd < 0)
796
    return;
797

    
798
  /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
799
  if (!(s->flags & SKF_THREAD))
800
  {
801
    if (s == current_sock)
802
      current_sock = sk_next(s);
803
    if (s == stored_sock)
804
      stored_sock = sk_next(s);
805
    rem_node(&s->n);
806
  }
807

    
808
  if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
809
    close(s->fd);
810

    
811
  s->fd = -1;
812
}
813

    
814
void
815
sk_set_rbsize(sock *s, uint val)
816
{
817
  ASSERT(s->rbuf_alloc == s->rbuf);
818

    
819
  if (s->rbsize == val)
820
    return;
821

    
822
  s->rbsize = val;
823
  xfree(s->rbuf_alloc);
824
  s->rbuf_alloc = xmalloc(val);
825
  s->rpos = s->rbuf = s->rbuf_alloc;
826
}
827

    
828
void
829
sk_set_tbsize(sock *s, uint val)
830
{
831
  ASSERT(s->tbuf_alloc == s->tbuf);
832

    
833
  if (s->tbsize == val)
834
    return;
835

    
836
  byte *old_tbuf = s->tbuf;
837

    
838
  s->tbsize = val;
839
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
840
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
841
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
842
}
843

    
844
void
845
sk_set_tbuf(sock *s, void *tbuf)
846
{
847
  s->tbuf = tbuf ?: s->tbuf_alloc;
848
  s->ttx = s->tpos = s->tbuf;
849
}
850

    
851
void
852
sk_reallocate(sock *s)
853
{
854
  sk_free_bufs(s);
855
  sk_alloc_bufs(s);
856
}
857

    
858
static void
859
sk_dump(resource *r)
860
{
861
  sock *s = (sock *) r;
862
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
863

    
864
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
865
        sk_type_names[s->type],
866
        s->data,
867
        s->saddr,
868
        s->sport,
869
        s->daddr,
870
        s->dport,
871
        s->tos,
872
        s->ttl,
873
        s->iface ? s->iface->name : "none");
874
}
875

    
876
static struct resclass sk_class = {
877
  "Socket",
878
  sizeof(sock),
879
  sk_free,
880
  sk_dump,
881
  NULL,
882
  NULL
883
};
884

    
885
/**
886
 * sk_new - create a socket
887
 * @p: pool
888
 *
889
 * This function creates a new socket resource. If you want to use it,
890
 * you need to fill in all the required fields of the structure and
891
 * call sk_open() to do the actual opening of the socket.
892
 *
893
 * The real function name is sock_new(), sk_new() is a macro wrapper
894
 * to avoid collision with OpenSSL.
895
 */
896
sock *
897
sock_new(pool *p)
898
{
899
  sock *s = ralloc(p, &sk_class);
900
  s->pool = p;
901
  // s->saddr = s->daddr = IPA_NONE;
902
  s->tos = s->priority = s->ttl = -1;
903
  s->fd = -1;
904
  return s;
905
}
906

    
907
static int
908
sk_setup(sock *s)
909
{
910
  int y = 1;
911
  int fd = s->fd;
912

    
913
  if (s->type == SK_SSH_ACTIVE)
914
    return 0;
915

    
916
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
917
    ERR("O_NONBLOCK");
918

    
919
  if (!s->af)
920
    return 0;
921

    
922
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
923
    s->flags |= SKF_PKTINFO;
924

    
925
#ifdef CONFIG_USE_HDRINCL
926
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
927
  {
928
    s->flags &= ~SKF_PKTINFO;
929
    s->flags |= SKF_HDRINCL;
930
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
931
      ERR("IP_HDRINCL");
932
  }
933
#endif
934

    
935
  if (s->iface)
936
  {
937
#ifdef SO_BINDTODEVICE
938
    struct ifreq ifr = {};
939
    strcpy(ifr.ifr_name, s->iface->name);
940
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
941
      ERR("SO_BINDTODEVICE");
942
#endif
943

    
944
#ifdef CONFIG_UNIX_DONTROUTE
945
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
946
      ERR("SO_DONTROUTE");
947
#endif
948
  }
949

    
950
  if (s->priority >= 0)
951
    if (sk_set_priority(s, s->priority) < 0)
952
      return -1;
953

    
954
  if (sk_is_ipv4(s))
955
  {
956
    if (s->flags & SKF_LADDR_RX)
957
      if (sk_request_cmsg4_pktinfo(s) < 0)
958
        return -1;
959

    
960
    if (s->flags & SKF_TTL_RX)
961
      if (sk_request_cmsg4_ttl(s) < 0)
962
        return -1;
963

    
964
    if ((s->type == SK_UDP) || (s->type == SK_IP))
965
      if (sk_disable_mtu_disc4(s) < 0)
966
        return -1;
967

    
968
    if (s->ttl >= 0)
969
      if (sk_set_ttl4(s, s->ttl) < 0)
970
        return -1;
971

    
972
    if (s->tos >= 0)
973
      if (sk_set_tos4(s, s->tos) < 0)
974
        return -1;
975
  }
976

    
977
  if (sk_is_ipv6(s))
978
  {
979
    if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
980
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
981
        ERR("IPV6_V6ONLY");
982

    
983
    if (s->flags & SKF_LADDR_RX)
984
      if (sk_request_cmsg6_pktinfo(s) < 0)
985
        return -1;
986

    
987
    if (s->flags & SKF_TTL_RX)
988
      if (sk_request_cmsg6_ttl(s) < 0)
989
        return -1;
990

    
991
    if ((s->type == SK_UDP) || (s->type == SK_IP))
992
      if (sk_disable_mtu_disc6(s) < 0)
993
        return -1;
994

    
995
    if (s->ttl >= 0)
996
      if (sk_set_ttl6(s, s->ttl) < 0)
997
        return -1;
998

    
999
    if (s->tos >= 0)
1000
      if (sk_set_tos6(s, s->tos) < 0)
1001
        return -1;
1002
  }
1003

    
1004
  return 0;
1005
}
1006

    
1007
static void
1008
sk_insert(sock *s)
1009
{
1010
  add_tail(&sock_list, &s->n);
1011
}
1012

    
1013
static void
1014
sk_tcp_connected(sock *s)
1015
{
1016
  sockaddr sa;
1017
  int sa_len = sizeof(sa);
1018

    
1019
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1020
      (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1021
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1022

    
1023
  s->type = SK_TCP;
1024
  sk_alloc_bufs(s);
1025
  s->tx_hook(s);
1026
}
1027

    
1028
#ifdef HAVE_LIBSSH
1029
static void
1030
sk_ssh_connected(sock *s)
1031
{
1032
  sk_alloc_bufs(s);
1033
  s->type = SK_SSH;
1034
  s->tx_hook(s);
1035
}
1036
#endif
1037

    
1038
static int
1039
sk_passive_connected(sock *s, int type)
1040
{
1041
  sockaddr loc_sa, rem_sa;
1042
  int loc_sa_len = sizeof(loc_sa);
1043
  int rem_sa_len = sizeof(rem_sa);
1044

    
1045
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1046
  if (fd < 0)
1047
  {
1048
    if ((errno != EINTR) && (errno != EAGAIN))
1049
      s->err_hook(s, errno);
1050
    return 0;
1051
  }
1052

    
1053
  sock *t = sk_new(s->pool);
1054
  t->type = type;
1055
  t->af = s->af;
1056
  t->fd = fd;
1057
  t->ttl = s->ttl;
1058
  t->tos = s->tos;
1059
  t->rbsize = s->rbsize;
1060
  t->tbsize = s->tbsize;
1061

    
1062
  if (type == SK_TCP)
1063
  {
1064
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1065
        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1066
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1067

    
1068
    if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1069
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1070
  }
1071

    
1072
  if (sk_setup(t) < 0)
1073
  {
1074
    /* FIXME: Call err_hook instead ? */
1075
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1076

    
1077
    /* FIXME: handle it better in rfree() */
1078
    close(t->fd);
1079
    t->fd = -1;
1080
    rfree(t);
1081
    return 1;
1082
  }
1083

    
1084
  sk_insert(t);
1085
  sk_alloc_bufs(t);
1086
  s->rx_hook(t, 0);
1087
  return 1;
1088
}
1089

    
1090
#ifdef HAVE_LIBSSH
1091
/*
1092
 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1093
 */
1094
static int
1095
sk_ssh_connect(sock *s)
1096
{
1097
  s->fd = ssh_get_fd(s->ssh->session);
1098

    
1099
  /* Big fall thru automata */
1100
  switch (s->ssh->state)
1101
  {
1102
  case SK_SSH_CONNECT:
1103
  {
1104
    switch (ssh_connect(s->ssh->session))
1105
    {
1106
    case SSH_AGAIN:
1107
      /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1108
       * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1109
       * documented but our code relies on that.
1110
       */
1111
      return SSH_AGAIN;
1112

    
1113
    case SSH_OK:
1114
      break;
1115

    
1116
    default:
1117
      return SSH_ERROR;
1118
    }
1119
  }
1120

    
1121
  case SK_SSH_SERVER_KNOWN:
1122
  {
1123
    s->ssh->state = SK_SSH_SERVER_KNOWN;
1124

    
1125
    if (s->ssh->server_hostkey_path)
1126
    {
1127
      int server_identity_is_ok = 1;
1128

    
1129
      /* Check server identity */
1130
      switch (ssh_is_server_known(s->ssh->session))
1131
      {
1132
#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1133
      case SSH_SERVER_KNOWN_OK:
1134
        /* The server is known and has not changed. */
1135
        break;
1136

    
1137
      case SSH_SERVER_NOT_KNOWN:
1138
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
1139
        break;
1140

    
1141
      case SSH_SERVER_KNOWN_CHANGED:
1142
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
1143
        server_identity_is_ok = 0;
1144
        break;
1145

    
1146
      case SSH_SERVER_FILE_NOT_FOUND:
1147
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
1148
        server_identity_is_ok = 0;
1149
        break;
1150

    
1151
      case SSH_SERVER_ERROR:
1152
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
1153
        server_identity_is_ok = 0;
1154
        break;
1155

    
1156
      case SSH_SERVER_FOUND_OTHER:
1157
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
1158
                                             "It is a possible attack.");
1159
        server_identity_is_ok = 0;
1160
        break;
1161
      }
1162

    
1163
      if (!server_identity_is_ok)
1164
        return SSH_ERROR;
1165
    }
1166
  }
1167

    
1168
  case SK_SSH_USERAUTH:
1169
  {
1170
    s->ssh->state = SK_SSH_USERAUTH;
1171
    switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
1172
    {
1173
    case SSH_AUTH_AGAIN:
1174
      return SSH_AGAIN;
1175

    
1176
    case SSH_AUTH_SUCCESS:
1177
      break;
1178

    
1179
    default:
1180
      return SSH_ERROR;
1181
    }
1182
  }
1183

    
1184
  case SK_SSH_CHANNEL:
1185
  {
1186
    s->ssh->state = SK_SSH_CHANNEL;
1187
    s->ssh->channel = ssh_channel_new(s->ssh->session);
1188
    if (s->ssh->channel == NULL)
1189
      return SSH_ERROR;
1190
  }
1191

    
1192
  case SK_SSH_SESSION:
1193
  {
1194
    s->ssh->state = SK_SSH_SESSION;
1195
    switch (ssh_channel_open_session(s->ssh->channel))
1196
    {
1197
    case SSH_AGAIN:
1198
      return SSH_AGAIN;
1199

    
1200
    case SSH_OK:
1201
      break;
1202

    
1203
    default:
1204
      return SSH_ERROR;
1205
    }
1206
  }
1207

    
1208
  case SK_SSH_SUBSYSTEM:
1209
  {
1210
    s->ssh->state = SK_SSH_SUBSYSTEM;
1211
    if (s->ssh->subsystem)
1212
    {
1213
      switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
1214
      {
1215
      case SSH_AGAIN:
1216
        return SSH_AGAIN;
1217

    
1218
      case SSH_OK:
1219
        break;
1220

    
1221
      default:
1222
        return SSH_ERROR;
1223
      }
1224
    }
1225
  }
1226

    
1227
  case SK_SSH_ESTABLISHED:
1228
    s->ssh->state = SK_SSH_ESTABLISHED;
1229
  }
1230

    
1231
  return SSH_OK;
1232
}
1233

    
1234
/*
1235
 * Return file descriptor number if success
1236
 * Return -1 if failed
1237
 */
1238
static int
1239
sk_open_ssh(sock *s)
1240
{
1241
  if (!s->ssh)
1242
    bug("sk_open() sock->ssh is not allocated");
1243

    
1244
  ssh_session sess = ssh_new();
1245
  if (sess == NULL)
1246
    ERR2("Cannot create a ssh session");
1247
  s->ssh->session = sess;
1248

    
1249
  const int verbosity = SSH_LOG_NOLOG;
1250
  ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
1251
  ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
1252
  ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
1253
  /* TODO: Add SSH_OPTIONS_BINDADDR */
1254
  ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
1255

    
1256
  if (s->ssh->server_hostkey_path)
1257
    ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
1258

    
1259
  if (s->ssh->client_privkey_path)
1260
    ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
1261

    
1262
  ssh_set_blocking(sess, 0);
1263

    
1264
  switch (sk_ssh_connect(s))
1265
  {
1266
  case SSH_AGAIN:
1267
    break;
1268

    
1269
  case SSH_OK:
1270
    sk_ssh_connected(s);
1271
    break;
1272

    
1273
  case SSH_ERROR:
1274
    ERR2(ssh_get_error(sess));
1275
    break;
1276
  }
1277

    
1278
  return ssh_get_fd(sess);
1279

    
1280
 err:
1281
  return -1;
1282
}
1283
#endif
1284

    
1285
/**
1286
 * sk_open - open a socket
1287
 * @s: socket
1288
 *
1289
 * This function takes a socket resource created by sk_new() and
1290
 * initialized by the user and binds a corresponding network connection
1291
 * to it.
1292
 *
1293
 * Result: 0 for success, -1 for an error.
1294
 */
1295
int
1296
sk_open(sock *s)
1297
{
1298
  int af = AF_UNSPEC;
1299
  int fd = -1;
1300
  int do_bind = 0;
1301
  int bind_port = 0;
1302
  ip_addr bind_addr = IPA_NONE;
1303
  sockaddr sa;
1304

    
1305
  if (s->type <= SK_IP)
1306
  {
1307
    /*
1308
     * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1309
     * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1310
     * But the specifications have to be consistent.
1311
     */
1312

    
1313
    switch (s->subtype)
1314
    {
1315
    case 0:
1316
      ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
1317
             (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
1318
      af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
1319
      break;
1320

    
1321
    case SK_IPV4:
1322
      ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
1323
      ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
1324
      af = AF_INET;
1325
      break;
1326

    
1327
    case SK_IPV6:
1328
      ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
1329
      ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
1330
      af = AF_INET6;
1331
      break;
1332

    
1333
    default:
1334
      bug("Invalid subtype %d", s->subtype);
1335
    }
1336
  }
1337

    
1338
  switch (s->type)
1339
  {
1340
  case SK_TCP_ACTIVE:
1341
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1342
    /* Fall thru */
1343
  case SK_TCP_PASSIVE:
1344
    fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1345
    bind_port = s->sport;
1346
    bind_addr = s->saddr;
1347
    do_bind = bind_port || ipa_nonzero(bind_addr);
1348
    break;
1349

    
1350
#ifdef HAVE_LIBSSH
1351
  case SK_SSH_ACTIVE:
1352
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1353
    fd = sk_open_ssh(s);
1354
    break;
1355
#endif
1356

    
1357
  case SK_UDP:
1358
    fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1359
    bind_port = s->sport;
1360
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1361
    do_bind = 1;
1362
    break;
1363

    
1364
  case SK_IP:
1365
    fd = socket(af, SOCK_RAW, s->dport);
1366
    bind_port = 0;
1367
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1368
    do_bind = ipa_nonzero(bind_addr);
1369
    break;
1370

    
1371
  case SK_MAGIC:
1372
    af = 0;
1373
    fd = s->fd;
1374
    break;
1375

    
1376
  default:
1377
    bug("sk_open() called for invalid sock type %d", s->type);
1378
  }
1379

    
1380
  if (fd < 0)
1381
    ERR("socket");
1382

    
1383
  s->af = af;
1384
  s->fd = fd;
1385

    
1386
  if (sk_setup(s) < 0)
1387
    goto err;
1388

    
1389
  if (do_bind)
1390
  {
1391
    if (bind_port)
1392
    {
1393
      int y = 1;
1394

    
1395
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1396
        ERR2("SO_REUSEADDR");
1397

    
1398
#ifdef CONFIG_NO_IFACE_BIND
1399
      /* Workaround missing ability to bind to an iface */
1400
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1401
      {
1402
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1403
          ERR2("SO_REUSEPORT");
1404
      }
1405
#endif
1406
    }
1407
    else
1408
      if (s->flags & SKF_HIGH_PORT)
1409
        if (sk_set_high_port(s) < 0)
1410
          log(L_WARN "Socket error: %s%#m", s->err);
1411

    
1412
    sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
1413
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1414
      ERR2("bind");
1415
  }
1416

    
1417
  if (s->password)
1418
    if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
1419
      goto err;
1420

    
1421
  switch (s->type)
1422
  {
1423
  case SK_TCP_ACTIVE:
1424
    sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1425
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1426
      sk_tcp_connected(s);
1427
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1428
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1429
      ERR2("connect");
1430
    break;
1431

    
1432
  case SK_TCP_PASSIVE:
1433
    if (listen(fd, 8) < 0)
1434
      ERR2("listen");
1435
    break;
1436

    
1437
  case SK_SSH_ACTIVE:
1438
  case SK_MAGIC:
1439
    break;
1440

    
1441
  default:
1442
    sk_alloc_bufs(s);
1443
  }
1444

    
1445
  if (!(s->flags & SKF_THREAD))
1446
    sk_insert(s);
1447

    
1448
  return 0;
1449

    
1450
err:
1451
  close(fd);
1452
  s->fd = -1;
1453
  return -1;
1454
}
1455

    
1456
int
1457
sk_open_unix(sock *s, char *name)
1458
{
1459
  struct sockaddr_un sa;
1460
  int fd;
1461

    
1462
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1463

    
1464
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1465
  if (fd < 0)
1466
    return -1;
1467

    
1468
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1469
    return -1;
1470

    
1471
  /* Path length checked in test_old_bird() */
1472
  sa.sun_family = AF_UNIX;
1473
  strcpy(sa.sun_path, name);
1474

    
1475
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1476
    return -1;
1477

    
1478
  if (listen(fd, 8) < 0)
1479
    return -1;
1480

    
1481
  s->fd = fd;
1482
  sk_insert(s);
1483
  return 0;
1484
}
1485

    
1486

    
1487
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1488
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1489
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1490

    
1491
static void
1492
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1493
{
1494
  if (sk_is_ipv4(s))
1495
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1496
  else
1497
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1498
}
1499

    
1500
static void
1501
sk_process_cmsgs(sock *s, struct msghdr *msg)
1502
{
1503
  struct cmsghdr *cm;
1504

    
1505
  s->laddr = IPA_NONE;
1506
  s->lifindex = 0;
1507
  s->rcv_ttl = -1;
1508

    
1509
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1510
  {
1511
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1512
    {
1513
      sk_process_cmsg4_pktinfo(s, cm);
1514
      sk_process_cmsg4_ttl(s, cm);
1515
    }
1516

    
1517
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1518
    {
1519
      sk_process_cmsg6_pktinfo(s, cm);
1520
      sk_process_cmsg6_ttl(s, cm);
1521
    }
1522
  }
1523
}
1524

    
1525

    
1526
static inline int
1527
sk_sendmsg(sock *s)
1528
{
1529
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1530
  byte cmsg_buf[CMSG_TX_SPACE];
1531
  sockaddr dst;
1532

    
1533
  sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1534

    
1535
  struct msghdr msg = {
1536
    .msg_name = &dst.sa,
1537
    .msg_namelen = SA_LEN(dst),
1538
    .msg_iov = &iov,
1539
    .msg_iovlen = 1
1540
  };
1541

    
1542
#ifdef CONFIG_USE_HDRINCL
1543
  byte hdr[20];
1544
  struct iovec iov2[2] = { {hdr, 20}, iov };
1545

    
1546
  if (s->flags & SKF_HDRINCL)
1547
  {
1548
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1549
    msg.msg_iov = iov2;
1550
    msg.msg_iovlen = 2;
1551
  }
1552
#endif
1553

    
1554
  if (s->flags & SKF_PKTINFO)
1555
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1556

    
1557
  return sendmsg(s->fd, &msg, 0);
1558
}
1559

    
1560
static inline int
1561
sk_recvmsg(sock *s)
1562
{
1563
  struct iovec iov = {s->rbuf, s->rbsize};
1564
  byte cmsg_buf[CMSG_RX_SPACE];
1565
  sockaddr src;
1566

    
1567
  struct msghdr msg = {
1568
    .msg_name = &src.sa,
1569
    .msg_namelen = sizeof(src), // XXXX ??
1570
    .msg_iov = &iov,
1571
    .msg_iovlen = 1,
1572
    .msg_control = cmsg_buf,
1573
    .msg_controllen = sizeof(cmsg_buf),
1574
    .msg_flags = 0
1575
  };
1576

    
1577
  int rv = recvmsg(s->fd, &msg, 0);
1578
  if (rv < 0)
1579
    return rv;
1580

    
1581
  //ifdef IPV4
1582
  //  if (cf_type == SK_IP)
1583
  //    rv = ipv4_skip_header(pbuf, rv);
1584
  //endif
1585

    
1586
  sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1587
  sk_process_cmsgs(s, &msg);
1588

    
1589
  if (msg.msg_flags & MSG_TRUNC)
1590
    s->flags |= SKF_TRUNCATED;
1591
  else
1592
    s->flags &= ~SKF_TRUNCATED;
1593

    
1594
  return rv;
1595
}
1596

    
1597

    
1598
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1599

    
1600
static int
1601
sk_maybe_write(sock *s)
1602
{
1603
  int e;
1604

    
1605
  switch (s->type)
1606
  {
1607
  case SK_TCP:
1608
  case SK_MAGIC:
1609
  case SK_UNIX:
1610
    while (s->ttx != s->tpos)
1611
    {
1612
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1613

    
1614
      if (e < 0)
1615
      {
1616
        if (errno != EINTR && errno != EAGAIN)
1617
        {
1618
          reset_tx_buffer(s);
1619
          /* EPIPE is just a connection close notification during TX */
1620
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1621
          return -1;
1622
        }
1623
        return 0;
1624
      }
1625
      s->ttx += e;
1626
    }
1627
    reset_tx_buffer(s);
1628
    return 1;
1629

    
1630
#ifdef HAVE_LIBSSH
1631
  case SK_SSH:
1632
    while (s->ttx != s->tpos)
1633
    {
1634
      e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
1635

    
1636
      if (e < 0)
1637
      {
1638
        s->err = ssh_get_error(s->ssh->session);
1639
        s->err_hook(s, ssh_get_error_code(s->ssh->session));
1640

    
1641
        reset_tx_buffer(s);
1642
        /* EPIPE is just a connection close notification during TX */
1643
        s->err_hook(s, (errno != EPIPE) ? errno : 0);
1644
        return -1;
1645
      }
1646
      s->ttx += e;
1647
    }
1648
    reset_tx_buffer(s);
1649
    return 1;
1650
#endif
1651

    
1652
  case SK_UDP:
1653
  case SK_IP:
1654
    {
1655
      if (s->tbuf == s->tpos)
1656
        return 1;
1657

    
1658
      e = sk_sendmsg(s);
1659

    
1660
      if (e < 0)
1661
      {
1662
        if (errno != EINTR && errno != EAGAIN)
1663
        {
1664
          reset_tx_buffer(s);
1665
          s->err_hook(s, errno);
1666
          return -1;
1667
        }
1668

    
1669
        if (!s->tx_hook)
1670
          reset_tx_buffer(s);
1671
        return 0;
1672
      }
1673
      reset_tx_buffer(s);
1674
      return 1;
1675
    }
1676

    
1677
  default:
1678
    bug("sk_maybe_write: unknown socket type %d", s->type);
1679
  }
1680
}
1681

    
1682
int
1683
sk_rx_ready(sock *s)
1684
{
1685
  int rv;
1686
  struct pollfd pfd = { .fd = s->fd };
1687
  pfd.events |= POLLIN;
1688

    
1689
 redo:
1690
  rv = poll(&pfd, 1, 0);
1691

    
1692
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1693
    goto redo;
1694

    
1695
  return rv;
1696
}
1697

    
1698
/**
1699
 * sk_send - send data to a socket
1700
 * @s: socket
1701
 * @len: number of bytes to send
1702
 *
1703
 * This function sends @len bytes of data prepared in the
1704
 * transmit buffer of the socket @s to the network connection.
1705
 * If the packet can be sent immediately, it does so and returns
1706
 * 1, else it queues the packet for later processing, returns 0
1707
 * and calls the @tx_hook of the socket when the tranmission
1708
 * takes place.
1709
 */
1710
int
1711
sk_send(sock *s, unsigned len)
1712
{
1713
  s->ttx = s->tbuf;
1714
  s->tpos = s->tbuf + len;
1715
  return sk_maybe_write(s);
1716
}
1717

    
1718
/**
1719
 * sk_send_to - send data to a specific destination
1720
 * @s: socket
1721
 * @len: number of bytes to send
1722
 * @addr: IP address to send the packet to
1723
 * @port: port to send the packet to
1724
 *
1725
 * This is a sk_send() replacement for connection-less packet sockets
1726
 * which allows destination of the packet to be chosen dynamically.
1727
 * Raw IP sockets should use 0 for @port.
1728
 */
1729
int
1730
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1731
{
1732
  s->daddr = addr;
1733
  if (port)
1734
    s->dport = port;
1735

    
1736
  s->ttx = s->tbuf;
1737
  s->tpos = s->tbuf + len;
1738
  return sk_maybe_write(s);
1739
}
1740

    
1741
/*
1742
int
1743
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1744
             ip_addr saddr, ip_addr daddr, unsigned dport)
1745
{
1746
  s->iface = ifa;
1747
  s->saddr = saddr;
1748
  s->daddr = daddr;
1749
  s->dport = dport;
1750
  s->ttx = s->tbuf;
1751
  s->tpos = s->tbuf + len;
1752
  return sk_maybe_write(s);
1753
}
1754
*/
1755

    
1756
static void
1757
call_rx_hook(sock *s, int size)
1758
{
1759
  if (s->rx_hook(s, size))
1760
  {
1761
    /* We need to be careful since the socket could have been deleted by the hook */
1762
    if (current_sock == s)
1763
      s->rpos = s->rbuf;
1764
  }
1765
}
1766

    
1767
#ifdef HAVE_LIBSSH
1768
static int
1769
sk_read_ssh(sock *s)
1770
{
1771
  ssh_channel rchans[2] = { s->ssh->channel, NULL };
1772
  struct timeval timev = { 1, 0 };
1773

    
1774
  if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
1775
    return 1; /* Try again */
1776

    
1777
  if (ssh_channel_is_eof(s->ssh->channel) != 0)
1778
  {
1779
    /* The remote side is closing the connection */
1780
    s->err_hook(s, 0);
1781
    return 0;
1782
  }
1783

    
1784
  if (rchans[0] == NULL)
1785
    return 0; /* No data is available on the socket */
1786

    
1787
  const uint used_bytes = s->rpos - s->rbuf;
1788
  const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
1789
  if (read_bytes > 0)
1790
  {
1791
    /* Received data */
1792
    s->rpos += read_bytes;
1793
    call_rx_hook(s, used_bytes + read_bytes);
1794
    return 1;
1795
  }
1796
  else if (read_bytes == 0)
1797
  {
1798
    if (ssh_channel_is_eof(s->ssh->channel) != 0)
1799
    {
1800
        /* The remote side is closing the connection */
1801
        s->err_hook(s, 0);
1802
    }
1803
  }
1804
  else
1805
  {
1806
    s->err = ssh_get_error(s->ssh->session);
1807
    s->err_hook(s, ssh_get_error_code(s->ssh->session));
1808
  }
1809

    
1810
  return 0; /* No data is available on the socket */
1811
}
1812
#endif
1813

    
1814
 /* sk_read() and sk_write() are called from BFD's event loop */
1815

    
1816
int
1817
sk_read(sock *s, int revents)
1818
{
1819
  switch (s->type)
1820
  {
1821
  case SK_TCP_PASSIVE:
1822
    return sk_passive_connected(s, SK_TCP);
1823

    
1824
  case SK_UNIX_PASSIVE:
1825
    return sk_passive_connected(s, SK_UNIX);
1826

    
1827
  case SK_TCP:
1828
  case SK_UNIX:
1829
    {
1830
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1831

    
1832
      if (c < 0)
1833
      {
1834
        if (errno != EINTR && errno != EAGAIN)
1835
          s->err_hook(s, errno);
1836
        else if (errno == EAGAIN && !(revents & POLLIN))
1837
        {
1838
          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
1839
          s->err_hook(s, 0);
1840
        }
1841
      }
1842
      else if (!c)
1843
        s->err_hook(s, 0);
1844
      else
1845
      {
1846
        s->rpos += c;
1847
        call_rx_hook(s, s->rpos - s->rbuf);
1848
        return 1;
1849
      }
1850
      return 0;
1851
    }
1852

    
1853
#ifdef HAVE_LIBSSH
1854
  case SK_SSH:
1855
    return sk_read_ssh(s);
1856
#endif
1857

    
1858
  case SK_MAGIC:
1859
    return s->rx_hook(s, 0);
1860

    
1861
  default:
1862
    {
1863
      int e = sk_recvmsg(s);
1864

    
1865
      if (e < 0)
1866
      {
1867
        if (errno != EINTR && errno != EAGAIN)
1868
          s->err_hook(s, errno);
1869
        return 0;
1870
      }
1871

    
1872
      s->rpos = s->rbuf + e;
1873
      s->rx_hook(s, e);
1874
      return 1;
1875
    }
1876
  }
1877
}
1878

    
1879
int
1880
sk_write(sock *s)
1881
{
1882
  switch (s->type)
1883
  {
1884
  case SK_TCP_ACTIVE:
1885
    {
1886
      sockaddr sa;
1887
      sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1888

    
1889
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1890
        sk_tcp_connected(s);
1891
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1892
        s->err_hook(s, errno);
1893
      return 0;
1894
    }
1895

    
1896
#ifdef HAVE_LIBSSH
1897
  case SK_SSH_ACTIVE:
1898
    {
1899
      switch (sk_ssh_connect(s))
1900
      {
1901
        case SSH_OK:
1902
          sk_ssh_connected(s);
1903
          break;
1904

    
1905
        case SSH_AGAIN:
1906
          return 1;
1907

    
1908
        case SSH_ERROR:
1909
          s->err = ssh_get_error(s->ssh->session);
1910
          s->err_hook(s, ssh_get_error_code(s->ssh->session));
1911
          break;
1912
      }
1913
      return 0;
1914
    }
1915
#endif
1916

    
1917
  default:
1918
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1919
    {
1920
      if (s->tx_hook)
1921
        s->tx_hook(s);
1922
      return 1;
1923
    }
1924
    return 0;
1925
  }
1926
}
1927

    
1928
int sk_is_ipv4(sock *s)
1929
{ return s->af == AF_INET; }
1930

    
1931
int sk_is_ipv6(sock *s)
1932
{ return s->af == AF_INET6; }
1933

    
1934
void
1935
sk_err(sock *s, int revents)
1936
{
1937
  int se = 0, sse = sizeof(se);
1938
  if ((s->type != SK_MAGIC) && (revents & POLLERR))
1939
    if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
1940
    {
1941
      log(L_ERR "IO: Socket error: SO_ERROR: %m");
1942
      se = 0;
1943
    }
1944

    
1945
  s->err_hook(s, se);
1946
}
1947

    
1948
void
1949
sk_dump_all(void)
1950
{
1951
  node *n;
1952
  sock *s;
1953

    
1954
  debug("Open sockets:\n");
1955
  WALK_LIST(n, sock_list)
1956
  {
1957
    s = SKIP_BACK(sock, n, n);
1958
    debug("%p ", s);
1959
    sk_dump(&s->r);
1960
  }
1961
  debug("\n");
1962
}
1963

    
1964

    
1965
/*
1966
 *        Internal event log and watchdog
1967
 */
1968

    
1969
#define EVENT_LOG_LENGTH 32
1970

    
1971
struct event_log_entry
1972
{
1973
  void *hook;
1974
  void *data;
1975
  btime timestamp;
1976
  btime duration;
1977
};
1978

    
1979
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1980
static struct event_log_entry *event_open;
1981
static int event_log_pos, event_log_num, watchdog_active;
1982
static btime last_time;
1983
static btime loop_time;
1984

    
1985
static void
1986
io_update_time(void)
1987
{
1988
  struct timespec ts;
1989
  int rv;
1990

    
1991
  /*
1992
   * This is third time-tracking procedure (after update_times() above and
1993
   * times_update() in BFD), dedicated to internal event log and latency
1994
   * tracking. Hopefully, we consolidate these sometimes.
1995
   */
1996

    
1997
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
1998
  if (rv < 0)
1999
    die("clock_gettime: %m");
2000

    
2001
  last_time = ts.tv_sec S + ts.tv_nsec NS;
2002

    
2003
  if (event_open)
2004
  {
2005
    event_open->duration = last_time - event_open->timestamp;
2006

    
2007
    if (event_open->duration > config->latency_limit)
2008
      log(L_WARN "Event 0x%p 0x%p took %d ms",
2009
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
2010

    
2011
    event_open = NULL;
2012
  }
2013
}
2014

    
2015
/**
2016
 * io_log_event - mark approaching event into event log
2017
 * @hook: event hook address
2018
 * @data: event data address
2019
 *
2020
 * Store info (hook, data, timestamp) about the following internal event into
2021
 * a circular event log (@event_log). When latency tracking is enabled, the log
2022
 * entry is kept open (in @event_open) so the duration can be filled later.
2023
 */
2024
void
2025
io_log_event(void *hook, void *data)
2026
{
2027
  if (config->latency_debug)
2028
    io_update_time();
2029

    
2030
  struct event_log_entry *en = event_log + event_log_pos;
2031

    
2032
  en->hook = hook;
2033
  en->data = data;
2034
  en->timestamp = last_time;
2035
  en->duration = 0;
2036

    
2037
  event_log_num++;
2038
  event_log_pos++;
2039
  event_log_pos %= EVENT_LOG_LENGTH;
2040

    
2041
  event_open = config->latency_debug ? en : NULL;
2042
}
2043

    
2044
static inline void
2045
io_close_event(void)
2046
{
2047
  if (event_open)
2048
    io_update_time();
2049
}
2050

    
2051
void
2052
io_log_dump(void)
2053
{
2054
  int i;
2055

    
2056
  log(L_DEBUG "Event log:");
2057
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
2058
  {
2059
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
2060
    if (en->hook)
2061
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
2062
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
2063
  }
2064
}
2065

    
2066
void
2067
watchdog_sigalrm(int sig UNUSED)
2068
{
2069
  /* Update last_time and duration, but skip latency check */
2070
  config->latency_limit = 0xffffffff;
2071
  io_update_time();
2072

    
2073
  /* We want core dump */
2074
  abort();
2075
}
2076

    
2077
static inline void
2078
watchdog_start1(void)
2079
{
2080
  io_update_time();
2081

    
2082
  loop_time = last_time;
2083
}
2084

    
2085
static inline void
2086
watchdog_start(void)
2087
{
2088
  io_update_time();
2089

    
2090
  loop_time = last_time;
2091
  event_log_num = 0;
2092

    
2093
  if (config->watchdog_timeout)
2094
  {
2095
    alarm(config->watchdog_timeout);
2096
    watchdog_active = 1;
2097
  }
2098
}
2099

    
2100
static inline void
2101
watchdog_stop(void)
2102
{
2103
  io_update_time();
2104

    
2105
  if (watchdog_active)
2106
  {
2107
    alarm(0);
2108
    watchdog_active = 0;
2109
  }
2110

    
2111
  btime duration = last_time - loop_time;
2112
  if (duration > config->watchdog_warning)
2113
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2114
        (int) (duration TO_MS), event_log_num);
2115
}
2116

    
2117

    
2118
/*
2119
 *        Main I/O Loop
2120
 */
2121

    
2122
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2123
volatile int async_dump_flag;
2124
volatile int async_shutdown_flag;
2125

    
2126
void
2127
io_init(void)
2128
{
2129
  init_list(&sock_list);
2130
  init_list(&global_event_list);
2131
  krt_io_init();
2132
  // XXX init_times();
2133
  // XXX update_times();
2134
  boot_time = current_time();
2135
  srandom((uint) (current_real_time() TO_S));
2136
}
2137

    
2138
static int short_loops = 0;
2139
#define SHORT_LOOP_MAX 10
2140

    
2141
void
2142
io_loop(void)
2143
{
2144
  int poll_tout, timeout;
2145
  int nfds, events, pout;
2146
  timer2 *t;
2147
  sock *s;
2148
  node *n;
2149
  int fdmax = 256;
2150
  struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
2151

    
2152
  watchdog_start1();
2153
  for(;;)
2154
    {
2155
      times_update(&main_timeloop);
2156
      events = ev_run_list(&global_event_list);
2157
      timers_fire(&main_timeloop);
2158
      io_close_event();
2159

    
2160
      // FIXME
2161
      poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
2162
      if (t = timers_first(&main_timeloop))
2163
      {
2164
        times_update(&main_timeloop);
2165
        timeout = (tm2_remains(t) TO_MS) + 1;
2166
        poll_tout = MIN(poll_tout, timeout);
2167
      }
2168

    
2169
      nfds = 0;
2170
      WALK_LIST(n, sock_list)
2171
        {
2172
          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
2173
          s = SKIP_BACK(sock, n, n);
2174
          if (s->rx_hook)
2175
            {
2176
              pfd[nfds].fd = s->fd;
2177
              pfd[nfds].events |= POLLIN;
2178
            }
2179
          if (s->tx_hook && s->ttx != s->tpos)
2180
            {
2181
              pfd[nfds].fd = s->fd;
2182
              pfd[nfds].events |= POLLOUT;
2183
            }
2184
          if (pfd[nfds].fd != -1)
2185
            {
2186
              s->index = nfds;
2187
              nfds++;
2188
            }
2189
          else
2190
            s->index = -1;
2191

    
2192
          if (nfds >= fdmax)
2193
            {
2194
              fdmax *= 2;
2195
              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2196
            }
2197
        }
2198

    
2199
      /*
2200
       * Yes, this is racy. But even if the signal comes before this test
2201
       * and entering poll(), it gets caught on the next timer tick.
2202
       */
2203

    
2204
      if (async_config_flag)
2205
        {
2206
          io_log_event(async_config, NULL);
2207
          async_config();
2208
          async_config_flag = 0;
2209
          continue;
2210
        }
2211
      if (async_dump_flag)
2212
        {
2213
          io_log_event(async_dump, NULL);
2214
          async_dump();
2215
          async_dump_flag = 0;
2216
          continue;
2217
        }
2218
      if (async_shutdown_flag)
2219
        {
2220
          io_log_event(async_shutdown, NULL);
2221
          async_shutdown();
2222
          async_shutdown_flag = 0;
2223
          continue;
2224
        }
2225

    
2226
      /* And finally enter poll() to find active sockets */
2227
      watchdog_stop();
2228
      pout = poll(pfd, nfds, poll_tout);
2229
      watchdog_start();
2230

    
2231
      if (pout < 0)
2232
        {
2233
          if (errno == EINTR || errno == EAGAIN)
2234
            continue;
2235
          die("poll: %m");
2236
        }
2237
      if (pout)
2238
        {
2239
          times_update(&main_timeloop);
2240

    
2241
          /* guaranteed to be non-empty */
2242
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2243

    
2244
          while (current_sock)
2245
            {
2246
              sock *s = current_sock;
2247
              if (s->index == -1)
2248
                {
2249
                  current_sock = sk_next(s);
2250
                  goto next;
2251
                }
2252

    
2253
              int e;
2254
              int steps;
2255

    
2256
              steps = MAX_STEPS;
2257
              if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2258
                do
2259
                  {
2260
                    steps--;
2261
                    io_log_event(s->rx_hook, s->data);
2262
                    e = sk_read(s, pfd[s->index].revents);
2263
                    if (s != current_sock)
2264
                      goto next;
2265
                  }
2266
                while (e && s->rx_hook && steps);
2267

    
2268
              steps = MAX_STEPS;
2269
              if (pfd[s->index].revents & POLLOUT)
2270
                do
2271
                  {
2272
                    steps--;
2273
                    io_log_event(s->tx_hook, s->data);
2274
                    e = sk_write(s);
2275
                    if (s != current_sock)
2276
                      goto next;
2277
                  }
2278
                while (e && steps);
2279

    
2280
              current_sock = sk_next(s);
2281
            next: ;
2282
            }
2283

    
2284
          short_loops++;
2285
          if (events && (short_loops < SHORT_LOOP_MAX))
2286
            continue;
2287
          short_loops = 0;
2288

    
2289
          int count = 0;
2290
          current_sock = stored_sock;
2291
          if (current_sock == NULL)
2292
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2293

    
2294
          while (current_sock && count < MAX_RX_STEPS)
2295
            {
2296
              sock *s = current_sock;
2297
              if (s->index == -1)
2298
                {
2299
                  current_sock = sk_next(s);
2300
                  goto next2;
2301
                }
2302

    
2303
              if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2304
                {
2305
                  count++;
2306
                  io_log_event(s->rx_hook, s->data);
2307
                  sk_read(s, pfd[s->index].revents);
2308
                  if (s != current_sock)
2309
                    goto next2;
2310
                }
2311

    
2312
              if (pfd[s->index].revents & (POLLHUP | POLLERR))
2313
                {
2314
                  sk_err(s, pfd[s->index].revents);
2315
                  if (s != current_sock)
2316
                    goto next2;
2317
                }
2318

    
2319
              current_sock = sk_next(s);
2320
            next2: ;
2321
            }
2322

    
2323

    
2324
          stored_sock = current_sock;
2325
        }
2326
    }
2327
}
2328

    
2329
void
2330
test_old_bird(char *path)
2331
{
2332
  int fd;
2333
  struct sockaddr_un sa;
2334

    
2335
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2336
  if (fd < 0)
2337
    die("Cannot create socket: %m");
2338
  if (strlen(path) >= sizeof(sa.sun_path))
2339
    die("Socket path too long");
2340
  bzero(&sa, sizeof(sa));
2341
  sa.sun_family = AF_UNIX;
2342
  strcpy(sa.sun_path, path);
2343
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2344
    die("I found another BIRD running.");
2345
  close(fd);
2346
}