Statistics
| Branch: | Revision:

iof-bird / bird-2.0.1 / sysdep / unix / io.c @ 6b3f1a54

History | View | Annotate | Download (47.7 KB)

1
/*
2
 *        BIRD Internet Routing Daemon -- Unix I/O
3
 *
4
 *        (c) 1998--2004 Martin Mares <mj@ucw.cz>
5
 *      (c) 2004       Ondrej Filip <feela@network.cz>
6
 *
7
 *        Can be freely distributed and used under the terms of the GNU GPL.
8
 */
9

    
10
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
11
   if _GNU_SOURCE is not defined. */
12
#ifndef _GNU_SOURCE
13
#define _GNU_SOURCE
14
#endif
15

    
16
#include <stdio.h>
17
#include <stdlib.h>
18
#include <time.h>
19
#include <sys/time.h>
20
#include <sys/types.h>
21
#include <sys/socket.h>
22
#include <sys/uio.h>
23
#include <sys/un.h>
24
#include <poll.h>
25
#include <unistd.h>
26
#include <fcntl.h>
27
#include <errno.h>
28
#include <net/if.h>
29
#include <netinet/in.h>
30
#include <netinet/tcp.h>
31
#include <netinet/udp.h>
32
#include <netinet/icmp6.h>
33

    
34
#include "nest/bird.h"
35
#include "lib/lists.h"
36
#include "lib/resource.h"
37
#include "lib/socket.h"
38
#include "lib/event.h"
39
#include "lib/timer.h"
40
#include "lib/string.h"
41
#include "nest/iface.h"
42
#include "conf/conf.h"
43

    
44
#include "sysdep/unix/unix.h"
45
#include CONFIG_INCLUDE_SYSIO_H
46

    
47
/* Maximum number of calls of tx handler for one socket in one
48
 * poll iteration. Should be small enough to not monopolize CPU by
49
 * one protocol instance.
50
 */
51
#define MAX_STEPS 4
52

    
53
/* Maximum number of calls of rx handler for all sockets in one poll
54
   iteration. RX callbacks are often much more costly so we limit
55
   this to gen small latencies */
56
#define MAX_RX_STEPS 4
57

    
58
/*
59
 *        Tracked Files
60
 */
61

    
62
struct rfile {
63
  resource r;
64
  FILE *f;
65
};
66

    
67
static void
68
rf_free(resource *r)
69
{
70
  struct rfile *a = (struct rfile *) r;
71

    
72
  fclose(a->f);
73
}
74

    
75
static void
76
rf_dump(resource *r)
77
{
78
  struct rfile *a = (struct rfile *) r;
79

    
80
  debug("(FILE *%p)\n", a->f);
81
}
82

    
83
static struct resclass rf_class = {
84
  "FILE",
85
  sizeof(struct rfile),
86
  rf_free,
87
  rf_dump,
88
  NULL,
89
  NULL
90
};
91

    
92
void *
93
tracked_fopen(pool *p, char *name, char *mode)
94
{
95
  FILE *f = fopen(name, mode);
96

    
97
  if (f)
98
    {
99
      struct rfile *r = ralloc(p, &rf_class);
100
      r->f = f;
101
    }
102
  return f;
103
}
104

    
105

    
106
/*
107
 *        Time clock
108
 */
109

    
110
btime boot_time;
111

    
112
void
113
times_init(struct timeloop *loop)
114
{
115
  struct timespec ts;
116
  int rv;
117

    
118
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
119
  if (rv < 0)
120
    die("Monotonic clock is missing");
121

    
122
  if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40)))
123
    log(L_WARN "Monotonic clock is crazy");
124

    
125
  loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
126
  loop->real_time = 0;
127
}
128

    
129
void
130
times_update(struct timeloop *loop)
131
{
132
  struct timespec ts;
133
  int rv;
134

    
135
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
136
  if (rv < 0)
137
    die("clock_gettime: %m");
138

    
139
  btime new_time = ts.tv_sec S + ts.tv_nsec NS;
140

    
141
  if (new_time < loop->last_time)
142
    log(L_ERR "Monotonic clock is broken");
143

    
144
  loop->last_time = new_time;
145
  loop->real_time = 0;
146
}
147

    
148
void
149
times_update_real_time(struct timeloop *loop)
150
{
151
  struct timespec ts;
152
  int rv;
153

    
154
  rv = clock_gettime(CLOCK_REALTIME, &ts);
155
  if (rv < 0)
156
    die("clock_gettime: %m");
157

    
158
  loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
159
}
160

    
161

    
162
/**
163
 * DOC: Sockets
164
 *
165
 * Socket resources represent network connections. Their data structure (&socket)
166
 * contains a lot of fields defining the exact type of the socket, the local and
167
 * remote addresses and ports, pointers to socket buffers and finally pointers to
168
 * hook functions to be called when new data have arrived to the receive buffer
169
 * (@rx_hook), when the contents of the transmit buffer have been transmitted
170
 * (@tx_hook) and when an error or connection close occurs (@err_hook).
171
 *
172
 * Freeing of sockets from inside socket hooks is perfectly safe.
173
 */
174

    
175
#ifndef SOL_IP
176
#define SOL_IP IPPROTO_IP
177
#endif
178

    
179
#ifndef SOL_IPV6
180
#define SOL_IPV6 IPPROTO_IPV6
181
#endif
182

    
183
#ifndef SOL_ICMPV6
184
#define SOL_ICMPV6 IPPROTO_ICMPV6
185
#endif
186

    
187

    
188
/*
189
 *        Sockaddr helper functions
190
 */
191

    
192
static inline int UNUSED sockaddr_length(int af)
193
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
194

    
195
static inline void
196
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
197
{
198
  memset(sa, 0, sizeof(struct sockaddr_in));
199
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
200
  sa->sin_len = sizeof(struct sockaddr_in);
201
#endif
202
  sa->sin_family = AF_INET;
203
  sa->sin_port = htons(port);
204
  sa->sin_addr = ipa_to_in4(a);
205
}
206

    
207
static inline void
208
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
209
{
210
  memset(sa, 0, sizeof(struct sockaddr_in6));
211
#ifdef SIN6_LEN
212
  sa->sin6_len = sizeof(struct sockaddr_in6);
213
#endif
214
  sa->sin6_family = AF_INET6;
215
  sa->sin6_port = htons(port);
216
  sa->sin6_flowinfo = 0;
217
  sa->sin6_addr = ipa_to_in6(a);
218

    
219
  if (ifa && ipa_is_link_local(a))
220
    sa->sin6_scope_id = ifa->index;
221
}
222

    
223
void
224
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
225
{
226
  if (af == AF_INET)
227
    sockaddr_fill4((struct sockaddr_in *) sa, a, port);
228
  else if (af == AF_INET6)
229
    sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
230
  else
231
    bug("Unknown AF");
232
}
233

    
234
static inline void
235
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
236
{
237
  *port = ntohs(sa->sin_port);
238
  *a = ipa_from_in4(sa->sin_addr);
239
}
240

    
241
static inline void
242
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
243
{
244
  *port = ntohs(sa->sin6_port);
245
  *a = ipa_from_in6(sa->sin6_addr);
246

    
247
  if (ifa && ipa_is_link_local(*a))
248
    *ifa = if_find_by_index(sa->sin6_scope_id);
249
}
250

    
251
int
252
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
253
{
254
  if (sa->sa.sa_family != af)
255
    goto fail;
256

    
257
  if (af == AF_INET)
258
    sockaddr_read4((struct sockaddr_in *) sa, a, port);
259
  else if (af == AF_INET6)
260
    sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
261
  else
262
    goto fail;
263

    
264
  return 0;
265

    
266
 fail:
267
  *a = IPA_NONE;
268
  *port = 0;
269
  return -1;
270
}
271

    
272

    
273
/*
274
 *        IPv6 multicast syscalls
275
 */
276

    
277
/* Fortunately standardized in RFC 3493 */
278

    
279
#define INIT_MREQ6(maddr,ifa) \
280
  { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
281

    
282
static inline int
283
sk_setup_multicast6(sock *s)
284
{
285
  int index = s->iface->index;
286
  int ttl = s->ttl;
287
  int n = 0;
288

    
289
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
290
    ERR("IPV6_MULTICAST_IF");
291

    
292
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
293
    ERR("IPV6_MULTICAST_HOPS");
294

    
295
  if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
296
    ERR("IPV6_MULTICAST_LOOP");
297

    
298
  return 0;
299
}
300

    
301
static inline int
302
sk_join_group6(sock *s, ip_addr maddr)
303
{
304
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
305

    
306
  if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
307
    ERR("IPV6_JOIN_GROUP");
308

    
309
  return 0;
310
}
311

    
312
static inline int
313
sk_leave_group6(sock *s, ip_addr maddr)
314
{
315
  struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
316

    
317
  if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
318
    ERR("IPV6_LEAVE_GROUP");
319

    
320
  return 0;
321
}
322

    
323

    
324
/*
325
 *        IPv6 packet control messages
326
 */
327

    
328
/* Also standardized, in RFC 3542 */
329

    
330
/*
331
 * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
332
 * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
333
 * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
334
 * RFC and we use IPV6_PKTINFO.
335
 */
336
#ifndef IPV6_RECVPKTINFO
337
#define IPV6_RECVPKTINFO IPV6_PKTINFO
338
#endif
339
/*
340
 * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
341
 */
342
#ifndef IPV6_RECVHOPLIMIT
343
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
344
#endif
345

    
346

    
347
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
348
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
349

    
350
static inline int
351
sk_request_cmsg6_pktinfo(sock *s)
352
{
353
  int y = 1;
354

    
355
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
356
    ERR("IPV6_RECVPKTINFO");
357

    
358
  return 0;
359
}
360

    
361
static inline int
362
sk_request_cmsg6_ttl(sock *s)
363
{
364
  int y = 1;
365

    
366
  if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
367
    ERR("IPV6_RECVHOPLIMIT");
368

    
369
  return 0;
370
}
371

    
372
static inline void
373
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
374
{
375
  if (cm->cmsg_type == IPV6_PKTINFO)
376
  {
377
    struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
378
    s->laddr = ipa_from_in6(pi->ipi6_addr);
379
    s->lifindex = pi->ipi6_ifindex;
380
  }
381
}
382

    
383
static inline void
384
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
385
{
386
  if (cm->cmsg_type == IPV6_HOPLIMIT)
387
    s->rcv_ttl = * (int *) CMSG_DATA(cm);
388
}
389

    
390
static inline void
391
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
392
{
393
  struct cmsghdr *cm;
394
  struct in6_pktinfo *pi;
395
  int controllen = 0;
396

    
397
  msg->msg_control = cbuf;
398
  msg->msg_controllen = cbuflen;
399

    
400
  cm = CMSG_FIRSTHDR(msg);
401
  cm->cmsg_level = SOL_IPV6;
402
  cm->cmsg_type = IPV6_PKTINFO;
403
  cm->cmsg_len = CMSG_LEN(sizeof(*pi));
404
  controllen += CMSG_SPACE(sizeof(*pi));
405

    
406
  pi = (struct in6_pktinfo *) CMSG_DATA(cm);
407
  pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
408
  pi->ipi6_addr = ipa_to_in6(s->saddr);
409

    
410
  msg->msg_controllen = controllen;
411
}
412

    
413

    
414
/*
415
 *        Miscellaneous socket syscalls
416
 */
417

    
418
static inline int
419
sk_set_ttl4(sock *s, int ttl)
420
{
421
  if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
422
    ERR("IP_TTL");
423

    
424
  return 0;
425
}
426

    
427
static inline int
428
sk_set_ttl6(sock *s, int ttl)
429
{
430
  if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
431
    ERR("IPV6_UNICAST_HOPS");
432

    
433
  return 0;
434
}
435

    
436
static inline int
437
sk_set_tos4(sock *s, int tos)
438
{
439
  if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
440
    ERR("IP_TOS");
441

    
442
  return 0;
443
}
444

    
445
static inline int
446
sk_set_tos6(sock *s, int tos)
447
{
448
  if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
449
    ERR("IPV6_TCLASS");
450

    
451
  return 0;
452
}
453

    
454
static inline int
455
sk_set_high_port(sock *s UNUSED)
456
{
457
  /* Port range setting is optional, ignore it if not supported */
458

    
459
#ifdef IP_PORTRANGE
460
  if (sk_is_ipv4(s))
461
  {
462
    int range = IP_PORTRANGE_HIGH;
463
    if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
464
      ERR("IP_PORTRANGE");
465
  }
466
#endif
467

    
468
#ifdef IPV6_PORTRANGE
469
  if (sk_is_ipv6(s))
470
  {
471
    int range = IPV6_PORTRANGE_HIGH;
472
    if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
473
      ERR("IPV6_PORTRANGE");
474
  }
475
#endif
476

    
477
  return 0;
478
}
479

    
480
static inline byte *
481
sk_skip_ip_header(byte *pkt, int *len)
482
{
483
  if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
484
    return NULL;
485

    
486
  int hlen = (*pkt & 0x0f) * 4;
487
  if ((hlen < 20) || (hlen > *len))
488
    return NULL;
489

    
490
  *len -= hlen;
491
  return pkt + hlen;
492
}
493

    
494
byte *
495
sk_rx_buffer(sock *s, int *len)
496
{
497
  if (sk_is_ipv4(s) && (s->type == SK_IP))
498
    return sk_skip_ip_header(s->rbuf, len);
499
  else
500
    return s->rbuf;
501
}
502

    
503

    
504
/*
505
 *        Public socket functions
506
 */
507

    
508
/**
509
 * sk_setup_multicast - enable multicast for given socket
510
 * @s: socket
511
 *
512
 * Prepare transmission of multicast packets for given datagram socket.
513
 * The socket must have defined @iface.
514
 *
515
 * Result: 0 for success, -1 for an error.
516
 */
517

    
518
int
519
sk_setup_multicast(sock *s)
520
{
521
  ASSERT(s->iface);
522

    
523
  if (sk_is_ipv4(s))
524
    return sk_setup_multicast4(s);
525
  else
526
    return sk_setup_multicast6(s);
527
}
528

    
529
/**
530
 * sk_join_group - join multicast group for given socket
531
 * @s: socket
532
 * @maddr: multicast address
533
 *
534
 * Join multicast group for given datagram socket and associated interface.
535
 * The socket must have defined @iface.
536
 *
537
 * Result: 0 for success, -1 for an error.
538
 */
539

    
540
int
541
sk_join_group(sock *s, ip_addr maddr)
542
{
543
  if (sk_is_ipv4(s))
544
    return sk_join_group4(s, maddr);
545
  else
546
    return sk_join_group6(s, maddr);
547
}
548

    
549
/**
550
 * sk_leave_group - leave multicast group for given socket
551
 * @s: socket
552
 * @maddr: multicast address
553
 *
554
 * Leave multicast group for given datagram socket and associated interface.
555
 * The socket must have defined @iface.
556
 *
557
 * Result: 0 for success, -1 for an error.
558
 */
559

    
560
int
561
sk_leave_group(sock *s, ip_addr maddr)
562
{
563
  if (sk_is_ipv4(s))
564
    return sk_leave_group4(s, maddr);
565
  else
566
    return sk_leave_group6(s, maddr);
567
}
568

    
569
/**
570
 * sk_setup_broadcast - enable broadcast for given socket
571
 * @s: socket
572
 *
573
 * Allow reception and transmission of broadcast packets for given datagram
574
 * socket. The socket must have defined @iface. For transmission, packets should
575
 * be send to @brd address of @iface.
576
 *
577
 * Result: 0 for success, -1 for an error.
578
 */
579

    
580
int
581
sk_setup_broadcast(sock *s)
582
{
583
  int y = 1;
584

    
585
  if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
586
    ERR("SO_BROADCAST");
587

    
588
  return 0;
589
}
590

    
591
/**
592
 * sk_set_ttl - set transmit TTL for given socket
593
 * @s: socket
594
 * @ttl: TTL value
595
 *
596
 * Set TTL for already opened connections when TTL was not set before. Useful
597
 * for accepted connections when different ones should have different TTL.
598
 *
599
 * Result: 0 for success, -1 for an error.
600
 */
601

    
602
int
603
sk_set_ttl(sock *s, int ttl)
604
{
605
  s->ttl = ttl;
606

    
607
  if (sk_is_ipv4(s))
608
    return sk_set_ttl4(s, ttl);
609
  else
610
    return sk_set_ttl6(s, ttl);
611
}
612

    
613
/**
614
 * sk_set_min_ttl - set minimal accepted TTL for given socket
615
 * @s: socket
616
 * @ttl: TTL value
617
 *
618
 * Set minimal accepted TTL for given socket. Can be used for TTL security.
619
 * implementations.
620
 *
621
 * Result: 0 for success, -1 for an error.
622
 */
623

    
624
int
625
sk_set_min_ttl(sock *s, int ttl)
626
{
627
  if (sk_is_ipv4(s))
628
    return sk_set_min_ttl4(s, ttl);
629
  else
630
    return sk_set_min_ttl6(s, ttl);
631
}
632

    
633
#if 0
634
/**
635
 * sk_set_md5_auth - add / remove MD5 security association for given socket
636
 * @s: socket
637
 * @local: IP address of local side
638
 * @remote: IP address of remote side
639
 * @ifa: Interface for link-local IP address
640
 * @passwd: Password used for MD5 authentication
641
 * @setkey: Update also system SA/SP database
642
 *
643
 * In TCP MD5 handling code in kernel, there is a set of security associations
644
 * used for choosing password and other authentication parameters according to
645
 * the local and remote address. This function is useful for listening socket,
646
 * for active sockets it may be enough to set s->password field.
647
 *
648
 * When called with passwd != NULL, the new pair is added,
649
 * When called with passwd == NULL, the existing pair is removed.
650
 *
651
 * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
652
 * stored in global SA/SP database (but the behavior also must be enabled on
653
 * per-socket basis). In case of multiple sockets to the same neighbor, the
654
 * socket-specific state must be configured for each socket while global state
655
 * just once per src-dst pair. The @setkey argument controls whether the global
656
 * state (SA/SP database) is also updated.
657
 *
658
 * Result: 0 for success, -1 for an error.
659
 */
660

661
int
662
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
663
{ DUMMY; }
664
#endif
665

    
666
/**
667
 * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
668
 * @s: socket
669
 * @offset: offset
670
 *
671
 * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
672
 * kernel will automatically fill it for outgoing packets and check it for
673
 * incoming packets. Should not be used on ICMPv6 sockets, where the position is
674
 * known to the kernel.
675
 *
676
 * Result: 0 for success, -1 for an error.
677
 */
678

    
679
int
680
sk_set_ipv6_checksum(sock *s, int offset)
681
{
682
  if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
683
    ERR("IPV6_CHECKSUM");
684

    
685
  return 0;
686
}
687

    
688
int
689
sk_set_icmp6_filter(sock *s, int p1, int p2)
690
{
691
  /* a bit of lame interface, but it is here only for Radv */
692
  struct icmp6_filter f;
693

    
694
  ICMP6_FILTER_SETBLOCKALL(&f);
695
  ICMP6_FILTER_SETPASS(p1, &f);
696
  ICMP6_FILTER_SETPASS(p2, &f);
697

    
698
  if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
699
    ERR("ICMP6_FILTER");
700

    
701
  return 0;
702
}
703

    
704
void
705
sk_log_error(sock *s, const char *p)
706
{
707
  log(L_ERR "%s: Socket error: %s%#m", p, s->err);
708
}
709

    
710

    
711
/*
712
 *        Actual struct birdsock code
713
 */
714

    
715
static list sock_list;
716
static struct birdsock *current_sock;
717
static struct birdsock *stored_sock;
718

    
719
static inline sock *
720
sk_next(sock *s)
721
{
722
  if (!s->n.next->next)
723
    return NULL;
724
  else
725
    return SKIP_BACK(sock, n, s->n.next);
726
}
727

    
728
static void
729
sk_alloc_bufs(sock *s)
730
{
731
  if (!s->rbuf && s->rbsize)
732
    s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
733
  s->rpos = s->rbuf;
734
  if (!s->tbuf && s->tbsize)
735
    s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
736
  s->tpos = s->ttx = s->tbuf;
737
}
738

    
739
static void
740
sk_free_bufs(sock *s)
741
{
742
  if (s->rbuf_alloc)
743
  {
744
    xfree(s->rbuf_alloc);
745
    s->rbuf = s->rbuf_alloc = NULL;
746
  }
747
  if (s->tbuf_alloc)
748
  {
749
    xfree(s->tbuf_alloc);
750
    s->tbuf = s->tbuf_alloc = NULL;
751
  }
752
}
753

    
754
#ifdef HAVE_LIBSSH
755
static void
756
sk_ssh_free(sock *s)
757
{
758
  struct ssh_sock *ssh = s->ssh;
759

    
760
  if (s->ssh == NULL)
761
    return;
762

    
763
  s->ssh = NULL;
764

    
765
  if (ssh->channel)
766
  {
767
    if (ssh_channel_is_open(ssh->channel))
768
      ssh_channel_close(ssh->channel);
769
    ssh_channel_free(ssh->channel);
770
    ssh->channel = NULL;
771
  }
772

    
773
  if (ssh->session)
774
  {
775
    ssh_disconnect(ssh->session);
776
    ssh_free(ssh->session);
777
    ssh->session = NULL;
778
  }
779
}
780
#endif
781

    
782
static void
783
sk_free(resource *r)
784
{
785
  sock *s = (sock *) r;
786

    
787
  sk_free_bufs(s);
788

    
789
#ifdef HAVE_LIBSSH
790
  if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
791
    sk_ssh_free(s);
792
#endif
793

    
794
  if (s->fd < 0)
795
    return;
796

    
797
  /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
798
  if (!(s->flags & SKF_THREAD))
799
  {
800
    if (s == current_sock)
801
      current_sock = sk_next(s);
802
    if (s == stored_sock)
803
      stored_sock = sk_next(s);
804
    rem_node(&s->n);
805
  }
806

    
807
  if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
808
    close(s->fd);
809

    
810
  s->fd = -1;
811
}
812

    
813
void
814
sk_set_rbsize(sock *s, uint val)
815
{
816
  ASSERT(s->rbuf_alloc == s->rbuf);
817

    
818
  if (s->rbsize == val)
819
    return;
820

    
821
  s->rbsize = val;
822
  xfree(s->rbuf_alloc);
823
  s->rbuf_alloc = xmalloc(val);
824
  s->rpos = s->rbuf = s->rbuf_alloc;
825
}
826

    
827
void
828
sk_set_tbsize(sock *s, uint val)
829
{
830
  ASSERT(s->tbuf_alloc == s->tbuf);
831

    
832
  if (s->tbsize == val)
833
    return;
834

    
835
  byte *old_tbuf = s->tbuf;
836

    
837
  s->tbsize = val;
838
  s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
839
  s->tpos = s->tbuf + (s->tpos - old_tbuf);
840
  s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
841
}
842

    
843
void
844
sk_set_tbuf(sock *s, void *tbuf)
845
{
846
  s->tbuf = tbuf ?: s->tbuf_alloc;
847
  s->ttx = s->tpos = s->tbuf;
848
}
849

    
850
void
851
sk_reallocate(sock *s)
852
{
853
  sk_free_bufs(s);
854
  sk_alloc_bufs(s);
855
}
856

    
857
static void
858
sk_dump(resource *r)
859
{
860
  sock *s = (sock *) r;
861
  static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
862

    
863
  debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
864
        sk_type_names[s->type],
865
        s->data,
866
        s->saddr,
867
        s->sport,
868
        s->daddr,
869
        s->dport,
870
        s->tos,
871
        s->ttl,
872
        s->iface ? s->iface->name : "none");
873
}
874

    
875
static struct resclass sk_class = {
876
  "Socket",
877
  sizeof(sock),
878
  sk_free,
879
  sk_dump,
880
  NULL,
881
  NULL
882
};
883

    
884
/**
885
 * sk_new - create a socket
886
 * @p: pool
887
 *
888
 * This function creates a new socket resource. If you want to use it,
889
 * you need to fill in all the required fields of the structure and
890
 * call sk_open() to do the actual opening of the socket.
891
 *
892
 * The real function name is sock_new(), sk_new() is a macro wrapper
893
 * to avoid collision with OpenSSL.
894
 */
895
sock *
896
sock_new(pool *p)
897
{
898
  sock *s = ralloc(p, &sk_class);
899
  s->pool = p;
900
  // s->saddr = s->daddr = IPA_NONE;
901
  s->tos = s->priority = s->ttl = -1;
902
  s->fd = -1;
903
  return s;
904
}
905

    
906
static int
907
sk_setup(sock *s)
908
{
909
  int y = 1;
910
  int fd = s->fd;
911

    
912
  if (s->type == SK_SSH_ACTIVE)
913
    return 0;
914

    
915
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
916
    ERR("O_NONBLOCK");
917

    
918
  if (!s->af)
919
    return 0;
920

    
921
  if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
922
    s->flags |= SKF_PKTINFO;
923

    
924
#ifdef CONFIG_USE_HDRINCL
925
  if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
926
  {
927
    s->flags &= ~SKF_PKTINFO;
928
    s->flags |= SKF_HDRINCL;
929
    if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
930
      ERR("IP_HDRINCL");
931
  }
932
#endif
933

    
934
  if (s->vrf && !s->iface)
935
  {
936
    /* Bind socket to associated VRF interface.
937
       This is Linux-specific, but so is SO_BINDTODEVICE. */
938
#ifdef SO_BINDTODEVICE
939
    struct ifreq ifr = {};
940
    strcpy(ifr.ifr_name, s->vrf->name);
941
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
942
      ERR("SO_BINDTODEVICE");
943
#endif
944
  }
945

    
946
  if (s->iface)
947
  {
948
#ifdef SO_BINDTODEVICE
949
    struct ifreq ifr = {};
950
    strcpy(ifr.ifr_name, s->iface->name);
951
    if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
952
      ERR("SO_BINDTODEVICE");
953
#endif
954

    
955
#ifdef CONFIG_UNIX_DONTROUTE
956
    if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
957
      ERR("SO_DONTROUTE");
958
#endif
959
  }
960

    
961
  if (s->priority >= 0)
962
    if (sk_set_priority(s, s->priority) < 0)
963
      return -1;
964

    
965
  if (sk_is_ipv4(s))
966
  {
967
    if (s->flags & SKF_LADDR_RX)
968
      if (sk_request_cmsg4_pktinfo(s) < 0)
969
        return -1;
970

    
971
    if (s->flags & SKF_TTL_RX)
972
      if (sk_request_cmsg4_ttl(s) < 0)
973
        return -1;
974

    
975
    if ((s->type == SK_UDP) || (s->type == SK_IP))
976
      if (sk_disable_mtu_disc4(s) < 0)
977
        return -1;
978

    
979
    if (s->ttl >= 0)
980
      if (sk_set_ttl4(s, s->ttl) < 0)
981
        return -1;
982

    
983
    if (s->tos >= 0)
984
      if (sk_set_tos4(s, s->tos) < 0)
985
        return -1;
986
  }
987

    
988
  if (sk_is_ipv6(s))
989
  {
990
    if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
991
      if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
992
        ERR("IPV6_V6ONLY");
993

    
994
    if (s->flags & SKF_LADDR_RX)
995
      if (sk_request_cmsg6_pktinfo(s) < 0)
996
        return -1;
997

    
998
    if (s->flags & SKF_TTL_RX)
999
      if (sk_request_cmsg6_ttl(s) < 0)
1000
        return -1;
1001

    
1002
    if ((s->type == SK_UDP) || (s->type == SK_IP))
1003
      if (sk_disable_mtu_disc6(s) < 0)
1004
        return -1;
1005

    
1006
    if (s->ttl >= 0)
1007
      if (sk_set_ttl6(s, s->ttl) < 0)
1008
        return -1;
1009

    
1010
    if (s->tos >= 0)
1011
      if (sk_set_tos6(s, s->tos) < 0)
1012
        return -1;
1013
  }
1014

    
1015
  return 0;
1016
}
1017

    
1018
static void
1019
sk_insert(sock *s)
1020
{
1021
  add_tail(&sock_list, &s->n);
1022
}
1023

    
1024
static void
1025
sk_tcp_connected(sock *s)
1026
{
1027
  sockaddr sa;
1028
  int sa_len = sizeof(sa);
1029

    
1030
  if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
1031
      (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
1032
    log(L_WARN "SOCK: Cannot get local IP address for TCP>");
1033

    
1034
  s->type = SK_TCP;
1035
  sk_alloc_bufs(s);
1036
  s->tx_hook(s);
1037
}
1038

    
1039
#ifdef HAVE_LIBSSH
1040
static void
1041
sk_ssh_connected(sock *s)
1042
{
1043
  sk_alloc_bufs(s);
1044
  s->type = SK_SSH;
1045
  s->tx_hook(s);
1046
}
1047
#endif
1048

    
1049
static int
1050
sk_passive_connected(sock *s, int type)
1051
{
1052
  sockaddr loc_sa, rem_sa;
1053
  int loc_sa_len = sizeof(loc_sa);
1054
  int rem_sa_len = sizeof(rem_sa);
1055

    
1056
  int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
1057
  if (fd < 0)
1058
  {
1059
    if ((errno != EINTR) && (errno != EAGAIN))
1060
      s->err_hook(s, errno);
1061
    return 0;
1062
  }
1063

    
1064
  sock *t = sk_new(s->pool);
1065
  t->type = type;
1066
  t->af = s->af;
1067
  t->fd = fd;
1068
  t->ttl = s->ttl;
1069
  t->tos = s->tos;
1070
  t->rbsize = s->rbsize;
1071
  t->tbsize = s->tbsize;
1072

    
1073
  if (type == SK_TCP)
1074
  {
1075
    if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
1076
        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
1077
      log(L_WARN "SOCK: Cannot get local IP address for TCP<");
1078

    
1079
    if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
1080
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
1081
  }
1082

    
1083
  if (sk_setup(t) < 0)
1084
  {
1085
    /* FIXME: Call err_hook instead ? */
1086
    log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
1087

    
1088
    /* FIXME: handle it better in rfree() */
1089
    close(t->fd);
1090
    t->fd = -1;
1091
    rfree(t);
1092
    return 1;
1093
  }
1094

    
1095
  sk_insert(t);
1096
  sk_alloc_bufs(t);
1097
  s->rx_hook(t, 0);
1098
  return 1;
1099
}
1100

    
1101
#ifdef HAVE_LIBSSH
1102
/*
1103
 * Return SSH_OK or SSH_AGAIN or SSH_ERROR
1104
 */
1105
static int
1106
sk_ssh_connect(sock *s)
1107
{
1108
  s->fd = ssh_get_fd(s->ssh->session);
1109

    
1110
  /* Big fall thru automata */
1111
  switch (s->ssh->state)
1112
  {
1113
  case SK_SSH_CONNECT:
1114
  {
1115
    switch (ssh_connect(s->ssh->session))
1116
    {
1117
    case SSH_AGAIN:
1118
      /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
1119
       * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
1120
       * documented but our code relies on that.
1121
       */
1122
      return SSH_AGAIN;
1123

    
1124
    case SSH_OK:
1125
      break;
1126

    
1127
    default:
1128
      return SSH_ERROR;
1129
    }
1130
  }
1131

    
1132
  case SK_SSH_SERVER_KNOWN:
1133
  {
1134
    s->ssh->state = SK_SSH_SERVER_KNOWN;
1135

    
1136
    if (s->ssh->server_hostkey_path)
1137
    {
1138
      int server_identity_is_ok = 1;
1139

    
1140
      /* Check server identity */
1141
      switch (ssh_is_server_known(s->ssh->session))
1142
      {
1143
#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
1144
      case SSH_SERVER_KNOWN_OK:
1145
        /* The server is known and has not changed. */
1146
        break;
1147

    
1148
      case SSH_SERVER_NOT_KNOWN:
1149
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
1150
        break;
1151

    
1152
      case SSH_SERVER_KNOWN_CHANGED:
1153
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
1154
        server_identity_is_ok = 0;
1155
        break;
1156

    
1157
      case SSH_SERVER_FILE_NOT_FOUND:
1158
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
1159
        server_identity_is_ok = 0;
1160
        break;
1161

    
1162
      case SSH_SERVER_ERROR:
1163
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
1164
        server_identity_is_ok = 0;
1165
        break;
1166

    
1167
      case SSH_SERVER_FOUND_OTHER:
1168
        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
1169
                                             "It is a possible attack.");
1170
        server_identity_is_ok = 0;
1171
        break;
1172
      }
1173

    
1174
      if (!server_identity_is_ok)
1175
        return SSH_ERROR;
1176
    }
1177
  }
1178

    
1179
  case SK_SSH_USERAUTH:
1180
  {
1181
    s->ssh->state = SK_SSH_USERAUTH;
1182
    switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
1183
    {
1184
    case SSH_AUTH_AGAIN:
1185
      return SSH_AGAIN;
1186

    
1187
    case SSH_AUTH_SUCCESS:
1188
      break;
1189

    
1190
    default:
1191
      return SSH_ERROR;
1192
    }
1193
  }
1194

    
1195
  case SK_SSH_CHANNEL:
1196
  {
1197
    s->ssh->state = SK_SSH_CHANNEL;
1198
    s->ssh->channel = ssh_channel_new(s->ssh->session);
1199
    if (s->ssh->channel == NULL)
1200
      return SSH_ERROR;
1201
  }
1202

    
1203
  case SK_SSH_SESSION:
1204
  {
1205
    s->ssh->state = SK_SSH_SESSION;
1206
    switch (ssh_channel_open_session(s->ssh->channel))
1207
    {
1208
    case SSH_AGAIN:
1209
      return SSH_AGAIN;
1210

    
1211
    case SSH_OK:
1212
      break;
1213

    
1214
    default:
1215
      return SSH_ERROR;
1216
    }
1217
  }
1218

    
1219
  case SK_SSH_SUBSYSTEM:
1220
  {
1221
    s->ssh->state = SK_SSH_SUBSYSTEM;
1222
    if (s->ssh->subsystem)
1223
    {
1224
      switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
1225
      {
1226
      case SSH_AGAIN:
1227
        return SSH_AGAIN;
1228

    
1229
      case SSH_OK:
1230
        break;
1231

    
1232
      default:
1233
        return SSH_ERROR;
1234
      }
1235
    }
1236
  }
1237

    
1238
  case SK_SSH_ESTABLISHED:
1239
    s->ssh->state = SK_SSH_ESTABLISHED;
1240
  }
1241

    
1242
  return SSH_OK;
1243
}
1244

    
1245
/*
1246
 * Return file descriptor number if success
1247
 * Return -1 if failed
1248
 */
1249
static int
1250
sk_open_ssh(sock *s)
1251
{
1252
  if (!s->ssh)
1253
    bug("sk_open() sock->ssh is not allocated");
1254

    
1255
  ssh_session sess = ssh_new();
1256
  if (sess == NULL)
1257
    ERR2("Cannot create a ssh session");
1258
  s->ssh->session = sess;
1259

    
1260
  const int verbosity = SSH_LOG_NOLOG;
1261
  ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
1262
  ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
1263
  ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
1264
  /* TODO: Add SSH_OPTIONS_BINDADDR */
1265
  ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
1266

    
1267
  if (s->ssh->server_hostkey_path)
1268
    ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
1269

    
1270
  if (s->ssh->client_privkey_path)
1271
    ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
1272

    
1273
  ssh_set_blocking(sess, 0);
1274

    
1275
  switch (sk_ssh_connect(s))
1276
  {
1277
  case SSH_AGAIN:
1278
    break;
1279

    
1280
  case SSH_OK:
1281
    sk_ssh_connected(s);
1282
    break;
1283

    
1284
  case SSH_ERROR:
1285
    ERR2(ssh_get_error(sess));
1286
    break;
1287
  }
1288

    
1289
  return ssh_get_fd(sess);
1290

    
1291
 err:
1292
  return -1;
1293
}
1294
#endif
1295

    
1296
/**
1297
 * sk_open - open a socket
1298
 * @s: socket
1299
 *
1300
 * This function takes a socket resource created by sk_new() and
1301
 * initialized by the user and binds a corresponding network connection
1302
 * to it.
1303
 *
1304
 * Result: 0 for success, -1 for an error.
1305
 */
1306
int
1307
sk_open(sock *s)
1308
{
1309
  int af = AF_UNSPEC;
1310
  int fd = -1;
1311
  int do_bind = 0;
1312
  int bind_port = 0;
1313
  ip_addr bind_addr = IPA_NONE;
1314
  sockaddr sa;
1315

    
1316
  if (s->type <= SK_IP)
1317
  {
1318
    /*
1319
     * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
1320
     * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
1321
     * But the specifications have to be consistent.
1322
     */
1323

    
1324
    switch (s->subtype)
1325
    {
1326
    case 0:
1327
      ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
1328
             (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
1329
      af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
1330
      break;
1331

    
1332
    case SK_IPV4:
1333
      ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
1334
      ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
1335
      af = AF_INET;
1336
      break;
1337

    
1338
    case SK_IPV6:
1339
      ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
1340
      ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
1341
      af = AF_INET6;
1342
      break;
1343

    
1344
    default:
1345
      bug("Invalid subtype %d", s->subtype);
1346
    }
1347
  }
1348

    
1349
  switch (s->type)
1350
  {
1351
  case SK_TCP_ACTIVE:
1352
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1353
    /* Fall thru */
1354
  case SK_TCP_PASSIVE:
1355
    fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
1356
    bind_port = s->sport;
1357
    bind_addr = s->saddr;
1358
    do_bind = bind_port || ipa_nonzero(bind_addr);
1359
    break;
1360

    
1361
#ifdef HAVE_LIBSSH
1362
  case SK_SSH_ACTIVE:
1363
    s->ttx = "";                        /* Force s->ttx != s->tpos */
1364
    fd = sk_open_ssh(s);
1365
    break;
1366
#endif
1367

    
1368
  case SK_UDP:
1369
    fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
1370
    bind_port = s->sport;
1371
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1372
    do_bind = 1;
1373
    break;
1374

    
1375
  case SK_IP:
1376
    fd = socket(af, SOCK_RAW, s->dport);
1377
    bind_port = 0;
1378
    bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
1379
    do_bind = ipa_nonzero(bind_addr);
1380
    break;
1381

    
1382
  case SK_MAGIC:
1383
    af = 0;
1384
    fd = s->fd;
1385
    break;
1386

    
1387
  default:
1388
    bug("sk_open() called for invalid sock type %d", s->type);
1389
  }
1390

    
1391
  if (fd < 0)
1392
    ERR("socket");
1393

    
1394
  s->af = af;
1395
  s->fd = fd;
1396

    
1397
  if (sk_setup(s) < 0)
1398
    goto err;
1399

    
1400
  if (do_bind)
1401
  {
1402
    if (bind_port)
1403
    {
1404
      int y = 1;
1405

    
1406
      if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
1407
        ERR2("SO_REUSEADDR");
1408

    
1409
#ifdef CONFIG_NO_IFACE_BIND
1410
      /* Workaround missing ability to bind to an iface */
1411
      if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
1412
      {
1413
        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
1414
          ERR2("SO_REUSEPORT");
1415
      }
1416
#endif
1417
    }
1418
    else
1419
      if (s->flags & SKF_HIGH_PORT)
1420
        if (sk_set_high_port(s) < 0)
1421
          log(L_WARN "Socket error: %s%#m", s->err);
1422

    
1423
    sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
1424
    if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
1425
      ERR2("bind");
1426
  }
1427

    
1428
  if (s->password)
1429
    if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
1430
      goto err;
1431

    
1432
  switch (s->type)
1433
  {
1434
  case SK_TCP_ACTIVE:
1435
    sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1436
    if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
1437
      sk_tcp_connected(s);
1438
    else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
1439
             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
1440
      ERR2("connect");
1441
    break;
1442

    
1443
  case SK_TCP_PASSIVE:
1444
    if (listen(fd, 8) < 0)
1445
      ERR2("listen");
1446
    break;
1447

    
1448
  case SK_SSH_ACTIVE:
1449
  case SK_MAGIC:
1450
    break;
1451

    
1452
  default:
1453
    sk_alloc_bufs(s);
1454
  }
1455

    
1456
  if (!(s->flags & SKF_THREAD))
1457
    sk_insert(s);
1458

    
1459
  return 0;
1460

    
1461
err:
1462
  close(fd);
1463
  s->fd = -1;
1464
  return -1;
1465
}
1466

    
1467
int
1468
sk_open_unix(sock *s, char *name)
1469
{
1470
  struct sockaddr_un sa;
1471
  int fd;
1472

    
1473
  /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
1474

    
1475
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
1476
  if (fd < 0)
1477
    return -1;
1478

    
1479
  if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
1480
    return -1;
1481

    
1482
  /* Path length checked in test_old_bird() */
1483
  sa.sun_family = AF_UNIX;
1484
  strcpy(sa.sun_path, name);
1485

    
1486
  if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
1487
    return -1;
1488

    
1489
  if (listen(fd, 8) < 0)
1490
    return -1;
1491

    
1492
  s->fd = fd;
1493
  sk_insert(s);
1494
  return 0;
1495
}
1496

    
1497

    
1498
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
1499
                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
1500
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
1501

    
1502
static void
1503
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
1504
{
1505
  if (sk_is_ipv4(s))
1506
    sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
1507
  else
1508
    sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
1509
}
1510

    
1511
static void
1512
sk_process_cmsgs(sock *s, struct msghdr *msg)
1513
{
1514
  struct cmsghdr *cm;
1515

    
1516
  s->laddr = IPA_NONE;
1517
  s->lifindex = 0;
1518
  s->rcv_ttl = -1;
1519

    
1520
  for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
1521
  {
1522
    if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
1523
    {
1524
      sk_process_cmsg4_pktinfo(s, cm);
1525
      sk_process_cmsg4_ttl(s, cm);
1526
    }
1527

    
1528
    if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
1529
    {
1530
      sk_process_cmsg6_pktinfo(s, cm);
1531
      sk_process_cmsg6_ttl(s, cm);
1532
    }
1533
  }
1534
}
1535

    
1536

    
1537
static inline int
1538
sk_sendmsg(sock *s)
1539
{
1540
  struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
1541
  byte cmsg_buf[CMSG_TX_SPACE];
1542
  sockaddr dst;
1543

    
1544
  sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
1545

    
1546
  struct msghdr msg = {
1547
    .msg_name = &dst.sa,
1548
    .msg_namelen = SA_LEN(dst),
1549
    .msg_iov = &iov,
1550
    .msg_iovlen = 1
1551
  };
1552

    
1553
#ifdef CONFIG_USE_HDRINCL
1554
  byte hdr[20];
1555
  struct iovec iov2[2] = { {hdr, 20}, iov };
1556

    
1557
  if (s->flags & SKF_HDRINCL)
1558
  {
1559
    sk_prepare_ip_header(s, hdr, iov.iov_len);
1560
    msg.msg_iov = iov2;
1561
    msg.msg_iovlen = 2;
1562
  }
1563
#endif
1564

    
1565
  if (s->flags & SKF_PKTINFO)
1566
    sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
1567

    
1568
  return sendmsg(s->fd, &msg, 0);
1569
}
1570

    
1571
static inline int
1572
sk_recvmsg(sock *s)
1573
{
1574
  struct iovec iov = {s->rbuf, s->rbsize};
1575
  byte cmsg_buf[CMSG_RX_SPACE];
1576
  sockaddr src;
1577

    
1578
  struct msghdr msg = {
1579
    .msg_name = &src.sa,
1580
    .msg_namelen = sizeof(src), // XXXX ??
1581
    .msg_iov = &iov,
1582
    .msg_iovlen = 1,
1583
    .msg_control = cmsg_buf,
1584
    .msg_controllen = sizeof(cmsg_buf),
1585
    .msg_flags = 0
1586
  };
1587

    
1588
  int rv = recvmsg(s->fd, &msg, 0);
1589
  if (rv < 0)
1590
    return rv;
1591

    
1592
  //ifdef IPV4
1593
  //  if (cf_type == SK_IP)
1594
  //    rv = ipv4_skip_header(pbuf, rv);
1595
  //endif
1596

    
1597
  sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
1598
  sk_process_cmsgs(s, &msg);
1599

    
1600
  if (msg.msg_flags & MSG_TRUNC)
1601
    s->flags |= SKF_TRUNCATED;
1602
  else
1603
    s->flags &= ~SKF_TRUNCATED;
1604

    
1605
  return rv;
1606
}
1607

    
1608

    
1609
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
1610

    
1611
static int
1612
sk_maybe_write(sock *s)
1613
{
1614
  int e;
1615

    
1616
  switch (s->type)
1617
  {
1618
  case SK_TCP:
1619
  case SK_MAGIC:
1620
  case SK_UNIX:
1621
    while (s->ttx != s->tpos)
1622
    {
1623
      e = write(s->fd, s->ttx, s->tpos - s->ttx);
1624

    
1625
      if (e < 0)
1626
      {
1627
        if (errno != EINTR && errno != EAGAIN)
1628
        {
1629
          reset_tx_buffer(s);
1630
          /* EPIPE is just a connection close notification during TX */
1631
          s->err_hook(s, (errno != EPIPE) ? errno : 0);
1632
          return -1;
1633
        }
1634
        return 0;
1635
      }
1636
      s->ttx += e;
1637
    }
1638
    reset_tx_buffer(s);
1639
    return 1;
1640

    
1641
#ifdef HAVE_LIBSSH
1642
  case SK_SSH:
1643
    while (s->ttx != s->tpos)
1644
    {
1645
      e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
1646

    
1647
      if (e < 0)
1648
      {
1649
        s->err = ssh_get_error(s->ssh->session);
1650
        s->err_hook(s, ssh_get_error_code(s->ssh->session));
1651

    
1652
        reset_tx_buffer(s);
1653
        /* EPIPE is just a connection close notification during TX */
1654
        s->err_hook(s, (errno != EPIPE) ? errno : 0);
1655
        return -1;
1656
      }
1657
      s->ttx += e;
1658
    }
1659
    reset_tx_buffer(s);
1660
    return 1;
1661
#endif
1662

    
1663
  case SK_UDP:
1664
  case SK_IP:
1665
    {
1666
      if (s->tbuf == s->tpos)
1667
        return 1;
1668

    
1669
      e = sk_sendmsg(s);
1670

    
1671
      if (e < 0)
1672
      {
1673
        if (errno != EINTR && errno != EAGAIN)
1674
        {
1675
          reset_tx_buffer(s);
1676
          s->err_hook(s, errno);
1677
          return -1;
1678
        }
1679

    
1680
        if (!s->tx_hook)
1681
          reset_tx_buffer(s);
1682
        return 0;
1683
      }
1684
      reset_tx_buffer(s);
1685
      return 1;
1686
    }
1687

    
1688
  default:
1689
    bug("sk_maybe_write: unknown socket type %d", s->type);
1690
  }
1691
}
1692

    
1693
int
1694
sk_rx_ready(sock *s)
1695
{
1696
  int rv;
1697
  struct pollfd pfd = { .fd = s->fd };
1698
  pfd.events |= POLLIN;
1699

    
1700
 redo:
1701
  rv = poll(&pfd, 1, 0);
1702

    
1703
  if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
1704
    goto redo;
1705

    
1706
  return rv;
1707
}
1708

    
1709
/**
1710
 * sk_send - send data to a socket
1711
 * @s: socket
1712
 * @len: number of bytes to send
1713
 *
1714
 * This function sends @len bytes of data prepared in the
1715
 * transmit buffer of the socket @s to the network connection.
1716
 * If the packet can be sent immediately, it does so and returns
1717
 * 1, else it queues the packet for later processing, returns 0
1718
 * and calls the @tx_hook of the socket when the tranmission
1719
 * takes place.
1720
 */
1721
int
1722
sk_send(sock *s, unsigned len)
1723
{
1724
  s->ttx = s->tbuf;
1725
  s->tpos = s->tbuf + len;
1726
  return sk_maybe_write(s);
1727
}
1728

    
1729
/**
1730
 * sk_send_to - send data to a specific destination
1731
 * @s: socket
1732
 * @len: number of bytes to send
1733
 * @addr: IP address to send the packet to
1734
 * @port: port to send the packet to
1735
 *
1736
 * This is a sk_send() replacement for connection-less packet sockets
1737
 * which allows destination of the packet to be chosen dynamically.
1738
 * Raw IP sockets should use 0 for @port.
1739
 */
1740
int
1741
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
1742
{
1743
  s->daddr = addr;
1744
  if (port)
1745
    s->dport = port;
1746

    
1747
  s->ttx = s->tbuf;
1748
  s->tpos = s->tbuf + len;
1749
  return sk_maybe_write(s);
1750
}
1751

    
1752
/*
1753
int
1754
sk_send_full(sock *s, unsigned len, struct iface *ifa,
1755
             ip_addr saddr, ip_addr daddr, unsigned dport)
1756
{
1757
  s->iface = ifa;
1758
  s->saddr = saddr;
1759
  s->daddr = daddr;
1760
  s->dport = dport;
1761
  s->ttx = s->tbuf;
1762
  s->tpos = s->tbuf + len;
1763
  return sk_maybe_write(s);
1764
}
1765
*/
1766

    
1767
static void
1768
call_rx_hook(sock *s, int size)
1769
{
1770
  if (s->rx_hook(s, size))
1771
  {
1772
    /* We need to be careful since the socket could have been deleted by the hook */
1773
    if (current_sock == s)
1774
      s->rpos = s->rbuf;
1775
  }
1776
}
1777

    
1778
#ifdef HAVE_LIBSSH
1779
static int
1780
sk_read_ssh(sock *s)
1781
{
1782
  ssh_channel rchans[2] = { s->ssh->channel, NULL };
1783
  struct timeval timev = { 1, 0 };
1784

    
1785
  if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
1786
    return 1; /* Try again */
1787

    
1788
  if (ssh_channel_is_eof(s->ssh->channel) != 0)
1789
  {
1790
    /* The remote side is closing the connection */
1791
    s->err_hook(s, 0);
1792
    return 0;
1793
  }
1794

    
1795
  if (rchans[0] == NULL)
1796
    return 0; /* No data is available on the socket */
1797

    
1798
  const uint used_bytes = s->rpos - s->rbuf;
1799
  const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
1800
  if (read_bytes > 0)
1801
  {
1802
    /* Received data */
1803
    s->rpos += read_bytes;
1804
    call_rx_hook(s, used_bytes + read_bytes);
1805
    return 1;
1806
  }
1807
  else if (read_bytes == 0)
1808
  {
1809
    if (ssh_channel_is_eof(s->ssh->channel) != 0)
1810
    {
1811
        /* The remote side is closing the connection */
1812
        s->err_hook(s, 0);
1813
    }
1814
  }
1815
  else
1816
  {
1817
    s->err = ssh_get_error(s->ssh->session);
1818
    s->err_hook(s, ssh_get_error_code(s->ssh->session));
1819
  }
1820

    
1821
  return 0; /* No data is available on the socket */
1822
}
1823
#endif
1824

    
1825
 /* sk_read() and sk_write() are called from BFD's event loop */
1826

    
1827
int
1828
sk_read(sock *s, int revents)
1829
{
1830
  switch (s->type)
1831
  {
1832
  case SK_TCP_PASSIVE:
1833
    return sk_passive_connected(s, SK_TCP);
1834

    
1835
  case SK_UNIX_PASSIVE:
1836
    return sk_passive_connected(s, SK_UNIX);
1837

    
1838
  case SK_TCP:
1839
  case SK_UNIX:
1840
    {
1841
      int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
1842

    
1843
      if (c < 0)
1844
      {
1845
        if (errno != EINTR && errno != EAGAIN)
1846
          s->err_hook(s, errno);
1847
        else if (errno == EAGAIN && !(revents & POLLIN))
1848
        {
1849
          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
1850
          s->err_hook(s, 0);
1851
        }
1852
      }
1853
      else if (!c)
1854
        s->err_hook(s, 0);
1855
      else
1856
      {
1857
        s->rpos += c;
1858
        call_rx_hook(s, s->rpos - s->rbuf);
1859
        return 1;
1860
      }
1861
      return 0;
1862
    }
1863

    
1864
#ifdef HAVE_LIBSSH
1865
  case SK_SSH:
1866
    return sk_read_ssh(s);
1867
#endif
1868

    
1869
  case SK_MAGIC:
1870
    return s->rx_hook(s, 0);
1871

    
1872
  default:
1873
    {
1874
      int e = sk_recvmsg(s);
1875

    
1876
      if (e < 0)
1877
      {
1878
        if (errno != EINTR && errno != EAGAIN)
1879
          s->err_hook(s, errno);
1880
        return 0;
1881
      }
1882

    
1883
      s->rpos = s->rbuf + e;
1884
      s->rx_hook(s, e);
1885
      return 1;
1886
    }
1887
  }
1888
}
1889

    
1890
int
1891
sk_write(sock *s)
1892
{
1893
  switch (s->type)
1894
  {
1895
  case SK_TCP_ACTIVE:
1896
    {
1897
      sockaddr sa;
1898
      sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
1899

    
1900
      if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
1901
        sk_tcp_connected(s);
1902
      else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
1903
        s->err_hook(s, errno);
1904
      return 0;
1905
    }
1906

    
1907
#ifdef HAVE_LIBSSH
1908
  case SK_SSH_ACTIVE:
1909
    {
1910
      switch (sk_ssh_connect(s))
1911
      {
1912
        case SSH_OK:
1913
          sk_ssh_connected(s);
1914
          break;
1915

    
1916
        case SSH_AGAIN:
1917
          return 1;
1918

    
1919
        case SSH_ERROR:
1920
          s->err = ssh_get_error(s->ssh->session);
1921
          s->err_hook(s, ssh_get_error_code(s->ssh->session));
1922
          break;
1923
      }
1924
      return 0;
1925
    }
1926
#endif
1927

    
1928
  default:
1929
    if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
1930
    {
1931
      if (s->tx_hook)
1932
        s->tx_hook(s);
1933
      return 1;
1934
    }
1935
    return 0;
1936
  }
1937
}
1938

    
1939
int sk_is_ipv4(sock *s)
1940
{ return s->af == AF_INET; }
1941

    
1942
int sk_is_ipv6(sock *s)
1943
{ return s->af == AF_INET6; }
1944

    
1945
void
1946
sk_err(sock *s, int revents)
1947
{
1948
  int se = 0, sse = sizeof(se);
1949
  if ((s->type != SK_MAGIC) && (revents & POLLERR))
1950
    if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
1951
    {
1952
      log(L_ERR "IO: Socket error: SO_ERROR: %m");
1953
      se = 0;
1954
    }
1955

    
1956
  s->err_hook(s, se);
1957
}
1958

    
1959
void
1960
sk_dump_all(void)
1961
{
1962
  node *n;
1963
  sock *s;
1964

    
1965
  debug("Open sockets:\n");
1966
  WALK_LIST(n, sock_list)
1967
  {
1968
    s = SKIP_BACK(sock, n, n);
1969
    debug("%p ", s);
1970
    sk_dump(&s->r);
1971
  }
1972
  debug("\n");
1973
}
1974

    
1975

    
1976
/*
1977
 *        Internal event log and watchdog
1978
 */
1979

    
1980
#define EVENT_LOG_LENGTH 32
1981

    
1982
struct event_log_entry
1983
{
1984
  void *hook;
1985
  void *data;
1986
  btime timestamp;
1987
  btime duration;
1988
};
1989

    
1990
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
1991
static struct event_log_entry *event_open;
1992
static int event_log_pos, event_log_num, watchdog_active;
1993
static btime last_time;
1994
static btime loop_time;
1995

    
1996
static void
1997
io_update_time(void)
1998
{
1999
  struct timespec ts;
2000
  int rv;
2001

    
2002
  /*
2003
   * This is third time-tracking procedure (after update_times() above and
2004
   * times_update() in BFD), dedicated to internal event log and latency
2005
   * tracking. Hopefully, we consolidate these sometimes.
2006
   */
2007

    
2008
  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
2009
  if (rv < 0)
2010
    die("clock_gettime: %m");
2011

    
2012
  last_time = ts.tv_sec S + ts.tv_nsec NS;
2013

    
2014
  if (event_open)
2015
  {
2016
    event_open->duration = last_time - event_open->timestamp;
2017

    
2018
    if (event_open->duration > config->latency_limit)
2019
      log(L_WARN "Event 0x%p 0x%p took %d ms",
2020
          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
2021

    
2022
    event_open = NULL;
2023
  }
2024
}
2025

    
2026
/**
2027
 * io_log_event - mark approaching event into event log
2028
 * @hook: event hook address
2029
 * @data: event data address
2030
 *
2031
 * Store info (hook, data, timestamp) about the following internal event into
2032
 * a circular event log (@event_log). When latency tracking is enabled, the log
2033
 * entry is kept open (in @event_open) so the duration can be filled later.
2034
 */
2035
void
2036
io_log_event(void *hook, void *data)
2037
{
2038
  if (config->latency_debug)
2039
    io_update_time();
2040

    
2041
  struct event_log_entry *en = event_log + event_log_pos;
2042

    
2043
  en->hook = hook;
2044
  en->data = data;
2045
  en->timestamp = last_time;
2046
  en->duration = 0;
2047

    
2048
  event_log_num++;
2049
  event_log_pos++;
2050
  event_log_pos %= EVENT_LOG_LENGTH;
2051

    
2052
  event_open = config->latency_debug ? en : NULL;
2053
}
2054

    
2055
static inline void
2056
io_close_event(void)
2057
{
2058
  if (event_open)
2059
    io_update_time();
2060
}
2061

    
2062
void
2063
io_log_dump(void)
2064
{
2065
  int i;
2066

    
2067
  log(L_DEBUG "Event log:");
2068
  for (i = 0; i < EVENT_LOG_LENGTH; i++)
2069
  {
2070
    struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
2071
    if (en->hook)
2072
      log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
2073
          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
2074
  }
2075
}
2076

    
2077
void
2078
watchdog_sigalrm(int sig UNUSED)
2079
{
2080
  /* Update last_time and duration, but skip latency check */
2081
  config->latency_limit = 0xffffffff;
2082
  io_update_time();
2083

    
2084
  /* We want core dump */
2085
  abort();
2086
}
2087

    
2088
static inline void
2089
watchdog_start1(void)
2090
{
2091
  io_update_time();
2092

    
2093
  loop_time = last_time;
2094
}
2095

    
2096
static inline void
2097
watchdog_start(void)
2098
{
2099
  io_update_time();
2100

    
2101
  loop_time = last_time;
2102
  event_log_num = 0;
2103

    
2104
  if (config->watchdog_timeout)
2105
  {
2106
    alarm(config->watchdog_timeout);
2107
    watchdog_active = 1;
2108
  }
2109
}
2110

    
2111
static inline void
2112
watchdog_stop(void)
2113
{
2114
  io_update_time();
2115

    
2116
  if (watchdog_active)
2117
  {
2118
    alarm(0);
2119
    watchdog_active = 0;
2120
  }
2121

    
2122
  btime duration = last_time - loop_time;
2123
  if (duration > config->watchdog_warning)
2124
    log(L_WARN "I/O loop cycle took %d ms for %d events",
2125
        (int) (duration TO_MS), event_log_num);
2126
}
2127

    
2128

    
2129
/*
2130
 *        Main I/O Loop
2131
 */
2132

    
2133
volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
2134
volatile int async_dump_flag;
2135
volatile int async_shutdown_flag;
2136

    
2137
void
2138
io_init(void)
2139
{
2140
  init_list(&sock_list);
2141
  init_list(&global_event_list);
2142
  krt_io_init();
2143
  // XXX init_times();
2144
  // XXX update_times();
2145
  boot_time = current_time();
2146
  srandom((uint) (current_real_time() TO_S));
2147
}
2148

    
2149
static int short_loops = 0;
2150
#define SHORT_LOOP_MAX 10
2151

    
2152
void
2153
io_loop(void)
2154
{
2155
  int poll_tout, timeout;
2156
  int nfds, events, pout;
2157
  timer *t;
2158
  sock *s;
2159
  node *n;
2160
  int fdmax = 256;
2161
  struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
2162

    
2163
  watchdog_start1();
2164
  for(;;)
2165
    {
2166
      times_update(&main_timeloop);
2167
      events = ev_run_list(&global_event_list);
2168
      timers_fire(&main_timeloop);
2169
      io_close_event();
2170

    
2171
      // FIXME
2172
      poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
2173
      if (t = timers_first(&main_timeloop))
2174
      {
2175
        times_update(&main_timeloop);
2176
        timeout = (tm_remains(t) TO_MS) + 1;
2177
        poll_tout = MIN(poll_tout, timeout);
2178
      }
2179

    
2180
      nfds = 0;
2181
      WALK_LIST(n, sock_list)
2182
        {
2183
          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
2184
          s = SKIP_BACK(sock, n, n);
2185
          if (s->rx_hook)
2186
            {
2187
              pfd[nfds].fd = s->fd;
2188
              pfd[nfds].events |= POLLIN;
2189
            }
2190
          if (s->tx_hook && s->ttx != s->tpos)
2191
            {
2192
              pfd[nfds].fd = s->fd;
2193
              pfd[nfds].events |= POLLOUT;
2194
            }
2195
          if (pfd[nfds].fd != -1)
2196
            {
2197
              s->index = nfds;
2198
              nfds++;
2199
            }
2200
          else
2201
            s->index = -1;
2202

    
2203
          if (nfds >= fdmax)
2204
            {
2205
              fdmax *= 2;
2206
              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
2207
            }
2208
        }
2209

    
2210
      /*
2211
       * Yes, this is racy. But even if the signal comes before this test
2212
       * and entering poll(), it gets caught on the next timer tick.
2213
       */
2214

    
2215
      if (async_config_flag)
2216
        {
2217
          io_log_event(async_config, NULL);
2218
          async_config();
2219
          async_config_flag = 0;
2220
          continue;
2221
        }
2222
      if (async_dump_flag)
2223
        {
2224
          io_log_event(async_dump, NULL);
2225
          async_dump();
2226
          async_dump_flag = 0;
2227
          continue;
2228
        }
2229
      if (async_shutdown_flag)
2230
        {
2231
          io_log_event(async_shutdown, NULL);
2232
          async_shutdown();
2233
          async_shutdown_flag = 0;
2234
          continue;
2235
        }
2236

    
2237
      /* And finally enter poll() to find active sockets */
2238
      watchdog_stop();
2239
      pout = poll(pfd, nfds, poll_tout);
2240
      watchdog_start();
2241

    
2242
      if (pout < 0)
2243
        {
2244
          if (errno == EINTR || errno == EAGAIN)
2245
            continue;
2246
          die("poll: %m");
2247
        }
2248
      if (pout)
2249
        {
2250
          times_update(&main_timeloop);
2251

    
2252
          /* guaranteed to be non-empty */
2253
          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2254

    
2255
          while (current_sock)
2256
            {
2257
              sock *s = current_sock;
2258
              if (s->index == -1)
2259
                {
2260
                  current_sock = sk_next(s);
2261
                  goto next;
2262
                }
2263

    
2264
              int e;
2265
              int steps;
2266

    
2267
              steps = MAX_STEPS;
2268
              if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2269
                do
2270
                  {
2271
                    steps--;
2272
                    io_log_event(s->rx_hook, s->data);
2273
                    e = sk_read(s, pfd[s->index].revents);
2274
                    if (s != current_sock)
2275
                      goto next;
2276
                  }
2277
                while (e && s->rx_hook && steps);
2278

    
2279
              steps = MAX_STEPS;
2280
              if (pfd[s->index].revents & POLLOUT)
2281
                do
2282
                  {
2283
                    steps--;
2284
                    io_log_event(s->tx_hook, s->data);
2285
                    e = sk_write(s);
2286
                    if (s != current_sock)
2287
                      goto next;
2288
                  }
2289
                while (e && steps);
2290

    
2291
              current_sock = sk_next(s);
2292
            next: ;
2293
            }
2294

    
2295
          short_loops++;
2296
          if (events && (short_loops < SHORT_LOOP_MAX))
2297
            continue;
2298
          short_loops = 0;
2299

    
2300
          int count = 0;
2301
          current_sock = stored_sock;
2302
          if (current_sock == NULL)
2303
            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
2304

    
2305
          while (current_sock && count < MAX_RX_STEPS)
2306
            {
2307
              sock *s = current_sock;
2308
              if (s->index == -1)
2309
                {
2310
                  current_sock = sk_next(s);
2311
                  goto next2;
2312
                }
2313

    
2314
              if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
2315
                {
2316
                  count++;
2317
                  io_log_event(s->rx_hook, s->data);
2318
                  sk_read(s, pfd[s->index].revents);
2319
                  if (s != current_sock)
2320
                    goto next2;
2321
                }
2322

    
2323
              if (pfd[s->index].revents & (POLLHUP | POLLERR))
2324
                {
2325
                  sk_err(s, pfd[s->index].revents);
2326
                  if (s != current_sock)
2327
                    goto next2;
2328
                }
2329

    
2330
              current_sock = sk_next(s);
2331
            next2: ;
2332
            }
2333

    
2334

    
2335
          stored_sock = current_sock;
2336
        }
2337
    }
2338
}
2339

    
2340
void
2341
test_old_bird(char *path)
2342
{
2343
  int fd;
2344
  struct sockaddr_un sa;
2345

    
2346
  fd = socket(AF_UNIX, SOCK_STREAM, 0);
2347
  if (fd < 0)
2348
    die("Cannot create socket: %m");
2349
  if (strlen(path) >= sizeof(sa.sun_path))
2350
    die("Socket path too long");
2351
  bzero(&sa, sizeof(sa));
2352
  sa.sun_family = AF_UNIX;
2353
  strcpy(sa.sun_path, path);
2354
  if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
2355
    die("I found another BIRD running.");
2356
  close(fd);
2357
}