iof-bird-daemon / sysdep / unix / io.c @ e0835db4
History | View | Annotate | Download (48.4 KB)
1 |
/*
|
---|---|
2 |
* BIRD Internet Routing Daemon -- Unix I/O
|
3 |
*
|
4 |
* (c) 1998--2004 Martin Mares <mj@ucw.cz>
|
5 |
* (c) 2004 Ondrej Filip <feela@network.cz>
|
6 |
*
|
7 |
* Can be freely distributed and used under the terms of the GNU GPL.
|
8 |
*/
|
9 |
|
10 |
/* Unfortunately, some glibc versions hide parts of RFC 3542 API
|
11 |
if _GNU_SOURCE is not defined. */
|
12 |
#ifndef _GNU_SOURCE
|
13 |
#define _GNU_SOURCE
|
14 |
#endif
|
15 |
|
16 |
#include <stdio.h> |
17 |
#include <stdlib.h> |
18 |
#include <time.h> |
19 |
#include <sys/time.h> |
20 |
#include <sys/types.h> |
21 |
#include <sys/socket.h> |
22 |
#include <sys/uio.h> |
23 |
#include <sys/un.h> |
24 |
#include <poll.h> |
25 |
#include <unistd.h> |
26 |
#include <fcntl.h> |
27 |
#include <errno.h> |
28 |
#include <net/if.h> |
29 |
#include <netinet/in.h> |
30 |
#include <netinet/tcp.h> |
31 |
#include <netinet/udp.h> |
32 |
#include <netinet/icmp6.h> |
33 |
|
34 |
#include "nest/bird.h" |
35 |
#include "lib/lists.h" |
36 |
#include "lib/resource.h" |
37 |
#include "lib/socket.h" |
38 |
#include "lib/event.h" |
39 |
#include "lib/timer.h" |
40 |
#include "lib/string.h" |
41 |
#include "nest/iface.h" |
42 |
#include "conf/conf.h" |
43 |
|
44 |
#include "sysdep/unix/unix.h" |
45 |
#include CONFIG_INCLUDE_SYSIO_H
|
46 |
|
47 |
/* Maximum number of calls of tx handler for one socket in one
|
48 |
* poll iteration. Should be small enough to not monopolize CPU by
|
49 |
* one protocol instance.
|
50 |
*/
|
51 |
#define MAX_STEPS 4 |
52 |
|
53 |
/* Maximum number of calls of rx handler for all sockets in one poll
|
54 |
iteration. RX callbacks are often much more costly so we limit
|
55 |
this to gen small latencies */
|
56 |
#define MAX_RX_STEPS 4 |
57 |
|
58 |
|
59 |
/*
|
60 |
* Tracked Files
|
61 |
*/
|
62 |
|
63 |
struct rfile {
|
64 |
resource r; |
65 |
FILE *f; |
66 |
}; |
67 |
|
68 |
static void |
69 |
rf_free(resource *r) |
70 |
{ |
71 |
struct rfile *a = (struct rfile *) r; |
72 |
|
73 |
fclose(a->f); |
74 |
} |
75 |
|
76 |
static void |
77 |
rf_dump(resource *r) |
78 |
{ |
79 |
struct rfile *a = (struct rfile *) r; |
80 |
|
81 |
debug("(FILE *%p)\n", a->f);
|
82 |
} |
83 |
|
84 |
static struct resclass rf_class = { |
85 |
"FILE",
|
86 |
sizeof(struct rfile), |
87 |
rf_free, |
88 |
rf_dump, |
89 |
NULL,
|
90 |
NULL
|
91 |
}; |
92 |
|
93 |
struct rfile *
|
94 |
rf_open(pool *p, char *name, char *mode) |
95 |
{ |
96 |
FILE *f = fopen(name, mode); |
97 |
|
98 |
if (!f)
|
99 |
return NULL; |
100 |
|
101 |
struct rfile *r = ralloc(p, &rf_class);
|
102 |
r->f = f; |
103 |
return r;
|
104 |
} |
105 |
|
106 |
void *
|
107 |
rf_file(struct rfile *f)
|
108 |
{ |
109 |
return f->f;
|
110 |
} |
111 |
|
112 |
int
|
113 |
rf_fileno(struct rfile *f)
|
114 |
{ |
115 |
return fileno(f->f);
|
116 |
} |
117 |
|
118 |
|
119 |
/*
|
120 |
* Time clock
|
121 |
*/
|
122 |
|
123 |
btime boot_time; |
124 |
|
125 |
void
|
126 |
times_init(struct timeloop *loop)
|
127 |
{ |
128 |
struct timespec ts;
|
129 |
int rv;
|
130 |
|
131 |
rv = clock_gettime(CLOCK_MONOTONIC, &ts); |
132 |
if (rv < 0) |
133 |
die("Monotonic clock is missing");
|
134 |
|
135 |
if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40))) |
136 |
log(L_WARN "Monotonic clock is crazy");
|
137 |
|
138 |
loop->last_time = ts.tv_sec S + ts.tv_nsec NS; |
139 |
loop->real_time = 0;
|
140 |
} |
141 |
|
142 |
void
|
143 |
times_update(struct timeloop *loop)
|
144 |
{ |
145 |
struct timespec ts;
|
146 |
int rv;
|
147 |
|
148 |
rv = clock_gettime(CLOCK_MONOTONIC, &ts); |
149 |
if (rv < 0) |
150 |
die("clock_gettime: %m");
|
151 |
|
152 |
btime new_time = ts.tv_sec S + ts.tv_nsec NS; |
153 |
|
154 |
if (new_time < loop->last_time)
|
155 |
log(L_ERR "Monotonic clock is broken");
|
156 |
|
157 |
loop->last_time = new_time; |
158 |
loop->real_time = 0;
|
159 |
} |
160 |
|
161 |
void
|
162 |
times_update_real_time(struct timeloop *loop)
|
163 |
{ |
164 |
struct timespec ts;
|
165 |
int rv;
|
166 |
|
167 |
rv = clock_gettime(CLOCK_REALTIME, &ts); |
168 |
if (rv < 0) |
169 |
die("clock_gettime: %m");
|
170 |
|
171 |
loop->real_time = ts.tv_sec S + ts.tv_nsec NS; |
172 |
} |
173 |
|
174 |
|
175 |
/**
|
176 |
* DOC: Sockets
|
177 |
*
|
178 |
* Socket resources represent network connections. Their data structure (&socket)
|
179 |
* contains a lot of fields defining the exact type of the socket, the local and
|
180 |
* remote addresses and ports, pointers to socket buffers and finally pointers to
|
181 |
* hook functions to be called when new data have arrived to the receive buffer
|
182 |
* (@rx_hook), when the contents of the transmit buffer have been transmitted
|
183 |
* (@tx_hook) and when an error or connection close occurs (@err_hook).
|
184 |
*
|
185 |
* Freeing of sockets from inside socket hooks is perfectly safe.
|
186 |
*/
|
187 |
|
188 |
#ifndef SOL_IP
|
189 |
#define SOL_IP IPPROTO_IP
|
190 |
#endif
|
191 |
|
192 |
#ifndef SOL_IPV6
|
193 |
#define SOL_IPV6 IPPROTO_IPV6
|
194 |
#endif
|
195 |
|
196 |
#ifndef SOL_ICMPV6
|
197 |
#define SOL_ICMPV6 IPPROTO_ICMPV6
|
198 |
#endif
|
199 |
|
200 |
|
201 |
/*
|
202 |
* Sockaddr helper functions
|
203 |
*/
|
204 |
|
205 |
static inline int UNUSED sockaddr_length(int af) |
206 |
{ return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } |
207 |
|
208 |
static inline void |
209 |
sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
|
210 |
{ |
211 |
memset(sa, 0, sizeof(struct sockaddr_in)); |
212 |
#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
|
213 |
sa->sin_len = sizeof(struct sockaddr_in); |
214 |
#endif
|
215 |
sa->sin_family = AF_INET; |
216 |
sa->sin_port = htons(port); |
217 |
sa->sin_addr = ipa_to_in4(a); |
218 |
} |
219 |
|
220 |
static inline void |
221 |
sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port) |
222 |
{ |
223 |
memset(sa, 0, sizeof(struct sockaddr_in6)); |
224 |
#ifdef SIN6_LEN
|
225 |
sa->sin6_len = sizeof(struct sockaddr_in6); |
226 |
#endif
|
227 |
sa->sin6_family = AF_INET6; |
228 |
sa->sin6_port = htons(port); |
229 |
sa->sin6_flowinfo = 0;
|
230 |
sa->sin6_addr = ipa_to_in6(a); |
231 |
|
232 |
if (ifa && ipa_is_link_local(a))
|
233 |
sa->sin6_scope_id = ifa->index; |
234 |
} |
235 |
|
236 |
void
|
237 |
sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port) |
238 |
{ |
239 |
if (af == AF_INET)
|
240 |
sockaddr_fill4((struct sockaddr_in *) sa, a, port);
|
241 |
else if (af == AF_INET6) |
242 |
sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
|
243 |
else
|
244 |
bug("Unknown AF");
|
245 |
} |
246 |
|
247 |
static inline void |
248 |
sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
|
249 |
{ |
250 |
*port = ntohs(sa->sin_port); |
251 |
*a = ipa_from_in4(sa->sin_addr); |
252 |
} |
253 |
|
254 |
static inline void |
255 |
sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port) |
256 |
{ |
257 |
*port = ntohs(sa->sin6_port); |
258 |
*a = ipa_from_in6(sa->sin6_addr); |
259 |
|
260 |
if (ifa && ipa_is_link_local(*a))
|
261 |
*ifa = if_find_by_index(sa->sin6_scope_id); |
262 |
} |
263 |
|
264 |
int
|
265 |
sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port) |
266 |
{ |
267 |
if (sa->sa.sa_family != af)
|
268 |
goto fail;
|
269 |
|
270 |
if (af == AF_INET)
|
271 |
sockaddr_read4((struct sockaddr_in *) sa, a, port);
|
272 |
else if (af == AF_INET6) |
273 |
sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
|
274 |
else
|
275 |
goto fail;
|
276 |
|
277 |
return 0; |
278 |
|
279 |
fail:
|
280 |
*a = IPA_NONE; |
281 |
*port = 0;
|
282 |
return -1; |
283 |
} |
284 |
|
285 |
|
286 |
/*
|
287 |
* IPv6 multicast syscalls
|
288 |
*/
|
289 |
|
290 |
/* Fortunately standardized in RFC 3493 */
|
291 |
|
292 |
#define INIT_MREQ6(maddr,ifa) \
|
293 |
{ .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index } |
294 |
|
295 |
static inline int |
296 |
sk_setup_multicast6(sock *s) |
297 |
{ |
298 |
int index = s->iface->index;
|
299 |
int ttl = s->ttl;
|
300 |
int n = 0; |
301 |
|
302 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) |
303 |
ERR("IPV6_MULTICAST_IF");
|
304 |
|
305 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0) |
306 |
ERR("IPV6_MULTICAST_HOPS");
|
307 |
|
308 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0) |
309 |
ERR("IPV6_MULTICAST_LOOP");
|
310 |
|
311 |
return 0; |
312 |
} |
313 |
|
314 |
static inline int |
315 |
sk_join_group6(sock *s, ip_addr maddr) |
316 |
{ |
317 |
struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
|
318 |
|
319 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0) |
320 |
ERR("IPV6_JOIN_GROUP");
|
321 |
|
322 |
return 0; |
323 |
} |
324 |
|
325 |
static inline int |
326 |
sk_leave_group6(sock *s, ip_addr maddr) |
327 |
{ |
328 |
struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
|
329 |
|
330 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0) |
331 |
ERR("IPV6_LEAVE_GROUP");
|
332 |
|
333 |
return 0; |
334 |
} |
335 |
|
336 |
|
337 |
/*
|
338 |
* IPv6 packet control messages
|
339 |
*/
|
340 |
|
341 |
/* Also standardized, in RFC 3542 */
|
342 |
|
343 |
/*
|
344 |
* RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
|
345 |
* type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
|
346 |
* don't have IPV6_RECVPKTINFO we suppose the OS implements the older
|
347 |
* RFC and we use IPV6_PKTINFO.
|
348 |
*/
|
349 |
#ifndef IPV6_RECVPKTINFO
|
350 |
#define IPV6_RECVPKTINFO IPV6_PKTINFO
|
351 |
#endif
|
352 |
/*
|
353 |
* Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
|
354 |
*/
|
355 |
#ifndef IPV6_RECVHOPLIMIT
|
356 |
#define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
|
357 |
#endif
|
358 |
|
359 |
|
360 |
#define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo)) |
361 |
#define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int)) |
362 |
|
363 |
static inline int |
364 |
sk_request_cmsg6_pktinfo(sock *s) |
365 |
{ |
366 |
int y = 1; |
367 |
|
368 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0) |
369 |
ERR("IPV6_RECVPKTINFO");
|
370 |
|
371 |
return 0; |
372 |
} |
373 |
|
374 |
static inline int |
375 |
sk_request_cmsg6_ttl(sock *s) |
376 |
{ |
377 |
int y = 1; |
378 |
|
379 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0) |
380 |
ERR("IPV6_RECVHOPLIMIT");
|
381 |
|
382 |
return 0; |
383 |
} |
384 |
|
385 |
static inline void |
386 |
sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
|
387 |
{ |
388 |
if (cm->cmsg_type == IPV6_PKTINFO)
|
389 |
{ |
390 |
struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm); |
391 |
s->laddr = ipa_from_in6(pi->ipi6_addr); |
392 |
s->lifindex = pi->ipi6_ifindex; |
393 |
} |
394 |
} |
395 |
|
396 |
static inline void |
397 |
sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
|
398 |
{ |
399 |
if (cm->cmsg_type == IPV6_HOPLIMIT)
|
400 |
s->rcv_ttl = * (int *) CMSG_DATA(cm);
|
401 |
} |
402 |
|
403 |
static inline void |
404 |
sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) |
405 |
{ |
406 |
struct cmsghdr *cm;
|
407 |
struct in6_pktinfo *pi;
|
408 |
int controllen = 0; |
409 |
|
410 |
msg->msg_control = cbuf; |
411 |
msg->msg_controllen = cbuflen; |
412 |
|
413 |
cm = CMSG_FIRSTHDR(msg); |
414 |
cm->cmsg_level = SOL_IPV6; |
415 |
cm->cmsg_type = IPV6_PKTINFO; |
416 |
cm->cmsg_len = CMSG_LEN(sizeof(*pi));
|
417 |
controllen += CMSG_SPACE(sizeof(*pi));
|
418 |
|
419 |
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
|
420 |
pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
|
421 |
pi->ipi6_addr = ipa_to_in6(s->saddr); |
422 |
|
423 |
msg->msg_controllen = controllen; |
424 |
} |
425 |
|
426 |
|
427 |
/*
|
428 |
* Miscellaneous socket syscalls
|
429 |
*/
|
430 |
|
431 |
static inline int |
432 |
sk_set_ttl4(sock *s, int ttl)
|
433 |
{ |
434 |
if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0) |
435 |
ERR("IP_TTL");
|
436 |
|
437 |
return 0; |
438 |
} |
439 |
|
440 |
static inline int |
441 |
sk_set_ttl6(sock *s, int ttl)
|
442 |
{ |
443 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0) |
444 |
ERR("IPV6_UNICAST_HOPS");
|
445 |
|
446 |
return 0; |
447 |
} |
448 |
|
449 |
static inline int |
450 |
sk_set_tos4(sock *s, int tos)
|
451 |
{ |
452 |
if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0) |
453 |
ERR("IP_TOS");
|
454 |
|
455 |
return 0; |
456 |
} |
457 |
|
458 |
static inline int |
459 |
sk_set_tos6(sock *s, int tos)
|
460 |
{ |
461 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0) |
462 |
ERR("IPV6_TCLASS");
|
463 |
|
464 |
return 0; |
465 |
} |
466 |
|
467 |
static inline int |
468 |
sk_set_high_port(sock *s UNUSED) |
469 |
{ |
470 |
/* Port range setting is optional, ignore it if not supported */
|
471 |
|
472 |
#ifdef IP_PORTRANGE
|
473 |
if (sk_is_ipv4(s))
|
474 |
{ |
475 |
int range = IP_PORTRANGE_HIGH;
|
476 |
if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0) |
477 |
ERR("IP_PORTRANGE");
|
478 |
} |
479 |
#endif
|
480 |
|
481 |
#ifdef IPV6_PORTRANGE
|
482 |
if (sk_is_ipv6(s))
|
483 |
{ |
484 |
int range = IPV6_PORTRANGE_HIGH;
|
485 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0) |
486 |
ERR("IPV6_PORTRANGE");
|
487 |
} |
488 |
#endif
|
489 |
|
490 |
return 0; |
491 |
} |
492 |
|
493 |
static inline byte * |
494 |
sk_skip_ip_header(byte *pkt, int *len)
|
495 |
{ |
496 |
if ((*len < 20) || ((*pkt & 0xf0) != 0x40)) |
497 |
return NULL; |
498 |
|
499 |
int hlen = (*pkt & 0x0f) * 4; |
500 |
if ((hlen < 20) || (hlen > *len)) |
501 |
return NULL; |
502 |
|
503 |
*len -= hlen; |
504 |
return pkt + hlen;
|
505 |
} |
506 |
|
507 |
byte * |
508 |
sk_rx_buffer(sock *s, int *len)
|
509 |
{ |
510 |
if (sk_is_ipv4(s) && (s->type == SK_IP))
|
511 |
return sk_skip_ip_header(s->rbuf, len);
|
512 |
else
|
513 |
return s->rbuf;
|
514 |
} |
515 |
|
516 |
|
517 |
/*
|
518 |
* Public socket functions
|
519 |
*/
|
520 |
|
521 |
/**
|
522 |
* sk_setup_multicast - enable multicast for given socket
|
523 |
* @s: socket
|
524 |
*
|
525 |
* Prepare transmission of multicast packets for given datagram socket.
|
526 |
* The socket must have defined @iface.
|
527 |
*
|
528 |
* Result: 0 for success, -1 for an error.
|
529 |
*/
|
530 |
|
531 |
int
|
532 |
sk_setup_multicast(sock *s) |
533 |
{ |
534 |
ASSERT(s->iface); |
535 |
|
536 |
if (sk_is_ipv4(s))
|
537 |
return sk_setup_multicast4(s);
|
538 |
else
|
539 |
return sk_setup_multicast6(s);
|
540 |
} |
541 |
|
542 |
/**
|
543 |
* sk_join_group - join multicast group for given socket
|
544 |
* @s: socket
|
545 |
* @maddr: multicast address
|
546 |
*
|
547 |
* Join multicast group for given datagram socket and associated interface.
|
548 |
* The socket must have defined @iface.
|
549 |
*
|
550 |
* Result: 0 for success, -1 for an error.
|
551 |
*/
|
552 |
|
553 |
int
|
554 |
sk_join_group(sock *s, ip_addr maddr) |
555 |
{ |
556 |
if (sk_is_ipv4(s))
|
557 |
return sk_join_group4(s, maddr);
|
558 |
else
|
559 |
return sk_join_group6(s, maddr);
|
560 |
} |
561 |
|
562 |
/**
|
563 |
* sk_leave_group - leave multicast group for given socket
|
564 |
* @s: socket
|
565 |
* @maddr: multicast address
|
566 |
*
|
567 |
* Leave multicast group for given datagram socket and associated interface.
|
568 |
* The socket must have defined @iface.
|
569 |
*
|
570 |
* Result: 0 for success, -1 for an error.
|
571 |
*/
|
572 |
|
573 |
int
|
574 |
sk_leave_group(sock *s, ip_addr maddr) |
575 |
{ |
576 |
if (sk_is_ipv4(s))
|
577 |
return sk_leave_group4(s, maddr);
|
578 |
else
|
579 |
return sk_leave_group6(s, maddr);
|
580 |
} |
581 |
|
582 |
/**
|
583 |
* sk_setup_broadcast - enable broadcast for given socket
|
584 |
* @s: socket
|
585 |
*
|
586 |
* Allow reception and transmission of broadcast packets for given datagram
|
587 |
* socket. The socket must have defined @iface. For transmission, packets should
|
588 |
* be send to @brd address of @iface.
|
589 |
*
|
590 |
* Result: 0 for success, -1 for an error.
|
591 |
*/
|
592 |
|
593 |
int
|
594 |
sk_setup_broadcast(sock *s) |
595 |
{ |
596 |
int y = 1; |
597 |
|
598 |
if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0) |
599 |
ERR("SO_BROADCAST");
|
600 |
|
601 |
return 0; |
602 |
} |
603 |
|
604 |
/**
|
605 |
* sk_set_ttl - set transmit TTL for given socket
|
606 |
* @s: socket
|
607 |
* @ttl: TTL value
|
608 |
*
|
609 |
* Set TTL for already opened connections when TTL was not set before. Useful
|
610 |
* for accepted connections when different ones should have different TTL.
|
611 |
*
|
612 |
* Result: 0 for success, -1 for an error.
|
613 |
*/
|
614 |
|
615 |
int
|
616 |
sk_set_ttl(sock *s, int ttl)
|
617 |
{ |
618 |
s->ttl = ttl; |
619 |
|
620 |
if (sk_is_ipv4(s))
|
621 |
return sk_set_ttl4(s, ttl);
|
622 |
else
|
623 |
return sk_set_ttl6(s, ttl);
|
624 |
} |
625 |
|
626 |
/**
|
627 |
* sk_set_min_ttl - set minimal accepted TTL for given socket
|
628 |
* @s: socket
|
629 |
* @ttl: TTL value
|
630 |
*
|
631 |
* Set minimal accepted TTL for given socket. Can be used for TTL security.
|
632 |
* implementations.
|
633 |
*
|
634 |
* Result: 0 for success, -1 for an error.
|
635 |
*/
|
636 |
|
637 |
int
|
638 |
sk_set_min_ttl(sock *s, int ttl)
|
639 |
{ |
640 |
if (sk_is_ipv4(s))
|
641 |
return sk_set_min_ttl4(s, ttl);
|
642 |
else
|
643 |
return sk_set_min_ttl6(s, ttl);
|
644 |
} |
645 |
|
646 |
#if 0
|
647 |
/**
|
648 |
* sk_set_md5_auth - add / remove MD5 security association for given socket
|
649 |
* @s: socket
|
650 |
* @local: IP address of local side
|
651 |
* @remote: IP address of remote side
|
652 |
* @ifa: Interface for link-local IP address
|
653 |
* @passwd: Password used for MD5 authentication
|
654 |
* @setkey: Update also system SA/SP database
|
655 |
*
|
656 |
* In TCP MD5 handling code in kernel, there is a set of security associations
|
657 |
* used for choosing password and other authentication parameters according to
|
658 |
* the local and remote address. This function is useful for listening socket,
|
659 |
* for active sockets it may be enough to set s->password field.
|
660 |
*
|
661 |
* When called with passwd != NULL, the new pair is added,
|
662 |
* When called with passwd == NULL, the existing pair is removed.
|
663 |
*
|
664 |
* Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
|
665 |
* stored in global SA/SP database (but the behavior also must be enabled on
|
666 |
* per-socket basis). In case of multiple sockets to the same neighbor, the
|
667 |
* socket-specific state must be configured for each socket while global state
|
668 |
* just once per src-dst pair. The @setkey argument controls whether the global
|
669 |
* state (SA/SP database) is also updated.
|
670 |
*
|
671 |
* Result: 0 for success, -1 for an error.
|
672 |
*/
|
673 |
|
674 |
int
|
675 |
sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
|
676 |
{ DUMMY; }
|
677 |
#endif
|
678 |
|
679 |
/**
|
680 |
* sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
|
681 |
* @s: socket
|
682 |
* @offset: offset
|
683 |
*
|
684 |
* Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
|
685 |
* kernel will automatically fill it for outgoing packets and check it for
|
686 |
* incoming packets. Should not be used on ICMPv6 sockets, where the position is
|
687 |
* known to the kernel.
|
688 |
*
|
689 |
* Result: 0 for success, -1 for an error.
|
690 |
*/
|
691 |
|
692 |
int
|
693 |
sk_set_ipv6_checksum(sock *s, int offset)
|
694 |
{ |
695 |
if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) |
696 |
ERR("IPV6_CHECKSUM");
|
697 |
|
698 |
return 0; |
699 |
} |
700 |
|
701 |
int
|
702 |
sk_set_icmp6_filter(sock *s, int p1, int p2) |
703 |
{ |
704 |
/* a bit of lame interface, but it is here only for Radv */
|
705 |
struct icmp6_filter f;
|
706 |
|
707 |
ICMP6_FILTER_SETBLOCKALL(&f); |
708 |
ICMP6_FILTER_SETPASS(p1, &f); |
709 |
ICMP6_FILTER_SETPASS(p2, &f); |
710 |
|
711 |
if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) |
712 |
ERR("ICMP6_FILTER");
|
713 |
|
714 |
return 0; |
715 |
} |
716 |
|
717 |
void
|
718 |
sk_log_error(sock *s, const char *p) |
719 |
{ |
720 |
log(L_ERR "%s: Socket error: %s%#m", p, s->err);
|
721 |
} |
722 |
|
723 |
|
724 |
/*
|
725 |
* Actual struct birdsock code
|
726 |
*/
|
727 |
|
728 |
static list sock_list;
|
729 |
static struct birdsock *current_sock; |
730 |
static struct birdsock *stored_sock; |
731 |
|
732 |
static inline sock * |
733 |
sk_next(sock *s) |
734 |
{ |
735 |
if (!s->n.next->next)
|
736 |
return NULL; |
737 |
else
|
738 |
return SKIP_BACK(sock, n, s->n.next);
|
739 |
} |
740 |
|
741 |
static void |
742 |
sk_alloc_bufs(sock *s) |
743 |
{ |
744 |
if (!s->rbuf && s->rbsize)
|
745 |
s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); |
746 |
s->rpos = s->rbuf; |
747 |
if (!s->tbuf && s->tbsize)
|
748 |
s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); |
749 |
s->tpos = s->ttx = s->tbuf; |
750 |
} |
751 |
|
752 |
static void |
753 |
sk_free_bufs(sock *s) |
754 |
{ |
755 |
if (s->rbuf_alloc)
|
756 |
{ |
757 |
xfree(s->rbuf_alloc); |
758 |
s->rbuf = s->rbuf_alloc = NULL;
|
759 |
} |
760 |
if (s->tbuf_alloc)
|
761 |
{ |
762 |
xfree(s->tbuf_alloc); |
763 |
s->tbuf = s->tbuf_alloc = NULL;
|
764 |
} |
765 |
} |
766 |
|
767 |
#ifdef HAVE_LIBSSH
|
768 |
static void |
769 |
sk_ssh_free(sock *s) |
770 |
{ |
771 |
struct ssh_sock *ssh = s->ssh;
|
772 |
|
773 |
if (s->ssh == NULL) |
774 |
return;
|
775 |
|
776 |
s->ssh = NULL;
|
777 |
|
778 |
if (ssh->channel)
|
779 |
{ |
780 |
if (ssh_channel_is_open(ssh->channel))
|
781 |
ssh_channel_close(ssh->channel); |
782 |
ssh_channel_free(ssh->channel); |
783 |
ssh->channel = NULL;
|
784 |
} |
785 |
|
786 |
if (ssh->session)
|
787 |
{ |
788 |
ssh_disconnect(ssh->session); |
789 |
ssh_free(ssh->session); |
790 |
ssh->session = NULL;
|
791 |
} |
792 |
} |
793 |
#endif
|
794 |
|
795 |
static void |
796 |
sk_free(resource *r) |
797 |
{ |
798 |
sock *s = (sock *) r; |
799 |
|
800 |
sk_free_bufs(s); |
801 |
|
802 |
#ifdef HAVE_LIBSSH
|
803 |
if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
|
804 |
sk_ssh_free(s); |
805 |
#endif
|
806 |
|
807 |
if (s->fd < 0) |
808 |
return;
|
809 |
|
810 |
/* FIXME: we should call sk_stop() for SKF_THREAD sockets */
|
811 |
if (!(s->flags & SKF_THREAD))
|
812 |
{ |
813 |
if (s == current_sock)
|
814 |
current_sock = sk_next(s); |
815 |
if (s == stored_sock)
|
816 |
stored_sock = sk_next(s); |
817 |
rem_node(&s->n); |
818 |
} |
819 |
|
820 |
if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
|
821 |
close(s->fd); |
822 |
|
823 |
s->fd = -1;
|
824 |
} |
825 |
|
826 |
void
|
827 |
sk_set_rbsize(sock *s, uint val) |
828 |
{ |
829 |
ASSERT(s->rbuf_alloc == s->rbuf); |
830 |
|
831 |
if (s->rbsize == val)
|
832 |
return;
|
833 |
|
834 |
s->rbsize = val; |
835 |
xfree(s->rbuf_alloc); |
836 |
s->rbuf_alloc = xmalloc(val); |
837 |
s->rpos = s->rbuf = s->rbuf_alloc; |
838 |
} |
839 |
|
840 |
void
|
841 |
sk_set_tbsize(sock *s, uint val) |
842 |
{ |
843 |
ASSERT(s->tbuf_alloc == s->tbuf); |
844 |
|
845 |
if (s->tbsize == val)
|
846 |
return;
|
847 |
|
848 |
byte *old_tbuf = s->tbuf; |
849 |
|
850 |
s->tbsize = val; |
851 |
s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); |
852 |
s->tpos = s->tbuf + (s->tpos - old_tbuf); |
853 |
s->ttx = s->tbuf + (s->ttx - old_tbuf); |
854 |
} |
855 |
|
856 |
void
|
857 |
sk_set_tbuf(sock *s, void *tbuf)
|
858 |
{ |
859 |
s->tbuf = tbuf ?: s->tbuf_alloc; |
860 |
s->ttx = s->tpos = s->tbuf; |
861 |
} |
862 |
|
863 |
void
|
864 |
sk_reallocate(sock *s) |
865 |
{ |
866 |
sk_free_bufs(s); |
867 |
sk_alloc_bufs(s); |
868 |
} |
869 |
|
870 |
static void |
871 |
sk_dump(resource *r) |
872 |
{ |
873 |
sock *s = (sock *) r; |
874 |
static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" }; |
875 |
|
876 |
debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
|
877 |
sk_type_names[s->type], |
878 |
s->data, |
879 |
s->saddr, |
880 |
s->sport, |
881 |
s->daddr, |
882 |
s->dport, |
883 |
s->tos, |
884 |
s->ttl, |
885 |
s->iface ? s->iface->name : "none");
|
886 |
} |
887 |
|
888 |
static struct resclass sk_class = { |
889 |
"Socket",
|
890 |
sizeof(sock),
|
891 |
sk_free, |
892 |
sk_dump, |
893 |
NULL,
|
894 |
NULL
|
895 |
}; |
896 |
|
897 |
/**
|
898 |
* sk_new - create a socket
|
899 |
* @p: pool
|
900 |
*
|
901 |
* This function creates a new socket resource. If you want to use it,
|
902 |
* you need to fill in all the required fields of the structure and
|
903 |
* call sk_open() to do the actual opening of the socket.
|
904 |
*
|
905 |
* The real function name is sock_new(), sk_new() is a macro wrapper
|
906 |
* to avoid collision with OpenSSL.
|
907 |
*/
|
908 |
sock * |
909 |
sock_new(pool *p) |
910 |
{ |
911 |
sock *s = ralloc(p, &sk_class); |
912 |
s->pool = p; |
913 |
// s->saddr = s->daddr = IPA_NONE;
|
914 |
s->tos = s->priority = s->ttl = -1;
|
915 |
s->fd = -1;
|
916 |
return s;
|
917 |
} |
918 |
|
919 |
static int |
920 |
sk_setup(sock *s) |
921 |
{ |
922 |
int y = 1; |
923 |
int fd = s->fd;
|
924 |
|
925 |
if (s->type == SK_SSH_ACTIVE)
|
926 |
return 0; |
927 |
|
928 |
if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) |
929 |
ERR("O_NONBLOCK");
|
930 |
|
931 |
if (!s->af)
|
932 |
return 0; |
933 |
|
934 |
if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
|
935 |
s->flags |= SKF_PKTINFO; |
936 |
|
937 |
#ifdef CONFIG_USE_HDRINCL
|
938 |
if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
|
939 |
{ |
940 |
s->flags &= ~SKF_PKTINFO; |
941 |
s->flags |= SKF_HDRINCL; |
942 |
if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0) |
943 |
ERR("IP_HDRINCL");
|
944 |
} |
945 |
#endif
|
946 |
|
947 |
if (s->vrf && !s->iface)
|
948 |
{ |
949 |
/* Bind socket to associated VRF interface.
|
950 |
This is Linux-specific, but so is SO_BINDTODEVICE. */
|
951 |
#ifdef SO_BINDTODEVICE
|
952 |
struct ifreq ifr = {};
|
953 |
strcpy(ifr.ifr_name, s->vrf->name); |
954 |
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) |
955 |
ERR("SO_BINDTODEVICE");
|
956 |
#endif
|
957 |
} |
958 |
|
959 |
if (s->iface)
|
960 |
{ |
961 |
#ifdef SO_BINDTODEVICE
|
962 |
struct ifreq ifr = {};
|
963 |
strcpy(ifr.ifr_name, s->iface->name); |
964 |
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) |
965 |
ERR("SO_BINDTODEVICE");
|
966 |
#endif
|
967 |
|
968 |
#ifdef CONFIG_UNIX_DONTROUTE
|
969 |
if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0) |
970 |
ERR("SO_DONTROUTE");
|
971 |
#endif
|
972 |
} |
973 |
|
974 |
if (sk_is_ipv4(s))
|
975 |
{ |
976 |
if (s->flags & SKF_LADDR_RX)
|
977 |
if (sk_request_cmsg4_pktinfo(s) < 0) |
978 |
return -1; |
979 |
|
980 |
if (s->flags & SKF_TTL_RX)
|
981 |
if (sk_request_cmsg4_ttl(s) < 0) |
982 |
return -1; |
983 |
|
984 |
if ((s->type == SK_UDP) || (s->type == SK_IP))
|
985 |
if (sk_disable_mtu_disc4(s) < 0) |
986 |
return -1; |
987 |
|
988 |
if (s->ttl >= 0) |
989 |
if (sk_set_ttl4(s, s->ttl) < 0) |
990 |
return -1; |
991 |
|
992 |
if (s->tos >= 0) |
993 |
if (sk_set_tos4(s, s->tos) < 0) |
994 |
return -1; |
995 |
} |
996 |
|
997 |
if (sk_is_ipv6(s))
|
998 |
{ |
999 |
if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
|
1000 |
if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) |
1001 |
ERR("IPV6_V6ONLY");
|
1002 |
|
1003 |
if (s->flags & SKF_LADDR_RX)
|
1004 |
if (sk_request_cmsg6_pktinfo(s) < 0) |
1005 |
return -1; |
1006 |
|
1007 |
if (s->flags & SKF_TTL_RX)
|
1008 |
if (sk_request_cmsg6_ttl(s) < 0) |
1009 |
return -1; |
1010 |
|
1011 |
if ((s->type == SK_UDP) || (s->type == SK_IP))
|
1012 |
if (sk_disable_mtu_disc6(s) < 0) |
1013 |
return -1; |
1014 |
|
1015 |
if (s->ttl >= 0) |
1016 |
if (sk_set_ttl6(s, s->ttl) < 0) |
1017 |
return -1; |
1018 |
|
1019 |
if (s->tos >= 0) |
1020 |
if (sk_set_tos6(s, s->tos) < 0) |
1021 |
return -1; |
1022 |
} |
1023 |
|
1024 |
/* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
|
1025 |
if (s->priority >= 0) |
1026 |
if (sk_set_priority(s, s->priority) < 0) |
1027 |
return -1; |
1028 |
|
1029 |
return 0; |
1030 |
} |
1031 |
|
1032 |
static void |
1033 |
sk_insert(sock *s) |
1034 |
{ |
1035 |
add_tail(&sock_list, &s->n); |
1036 |
} |
1037 |
|
1038 |
static void |
1039 |
sk_tcp_connected(sock *s) |
1040 |
{ |
1041 |
sockaddr sa; |
1042 |
int sa_len = sizeof(sa); |
1043 |
|
1044 |
if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) || |
1045 |
(sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
|
1046 |
log(L_WARN "SOCK: Cannot get local IP address for TCP>");
|
1047 |
|
1048 |
s->type = SK_TCP; |
1049 |
sk_alloc_bufs(s); |
1050 |
s->tx_hook(s); |
1051 |
} |
1052 |
|
1053 |
#ifdef HAVE_LIBSSH
|
1054 |
static void |
1055 |
sk_ssh_connected(sock *s) |
1056 |
{ |
1057 |
sk_alloc_bufs(s); |
1058 |
s->type = SK_SSH; |
1059 |
s->tx_hook(s); |
1060 |
} |
1061 |
#endif
|
1062 |
|
1063 |
static int |
1064 |
sk_passive_connected(sock *s, int type)
|
1065 |
{ |
1066 |
sockaddr loc_sa, rem_sa; |
1067 |
int loc_sa_len = sizeof(loc_sa); |
1068 |
int rem_sa_len = sizeof(rem_sa); |
1069 |
|
1070 |
int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len); |
1071 |
if (fd < 0) |
1072 |
{ |
1073 |
if ((errno != EINTR) && (errno != EAGAIN))
|
1074 |
s->err_hook(s, errno); |
1075 |
return 0; |
1076 |
} |
1077 |
|
1078 |
sock *t = sk_new(s->pool); |
1079 |
t->type = type; |
1080 |
t->data = s->data; |
1081 |
t->af = s->af; |
1082 |
t->fd = fd; |
1083 |
t->ttl = s->ttl; |
1084 |
t->tos = s->tos; |
1085 |
t->vrf = s->vrf; |
1086 |
t->rbsize = s->rbsize; |
1087 |
t->tbsize = s->tbsize; |
1088 |
|
1089 |
if (type == SK_TCP)
|
1090 |
{ |
1091 |
if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) || |
1092 |
(sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
|
1093 |
log(L_WARN "SOCK: Cannot get local IP address for TCP<");
|
1094 |
|
1095 |
if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0) |
1096 |
log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
|
1097 |
} |
1098 |
|
1099 |
if (sk_setup(t) < 0) |
1100 |
{ |
1101 |
/* FIXME: Call err_hook instead ? */
|
1102 |
log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
|
1103 |
|
1104 |
/* FIXME: handle it better in rfree() */
|
1105 |
close(t->fd); |
1106 |
t->fd = -1;
|
1107 |
rfree(t); |
1108 |
return 1; |
1109 |
} |
1110 |
|
1111 |
sk_insert(t); |
1112 |
sk_alloc_bufs(t); |
1113 |
s->rx_hook(t, 0);
|
1114 |
return 1; |
1115 |
} |
1116 |
|
1117 |
#ifdef HAVE_LIBSSH
|
1118 |
/*
|
1119 |
* Return SSH_OK or SSH_AGAIN or SSH_ERROR
|
1120 |
*/
|
1121 |
static int |
1122 |
sk_ssh_connect(sock *s) |
1123 |
{ |
1124 |
s->fd = ssh_get_fd(s->ssh->session); |
1125 |
|
1126 |
/* Big fall thru automata */
|
1127 |
switch (s->ssh->state)
|
1128 |
{ |
1129 |
case SK_SSH_CONNECT:
|
1130 |
{ |
1131 |
switch (ssh_connect(s->ssh->session))
|
1132 |
{ |
1133 |
case SSH_AGAIN:
|
1134 |
/* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
|
1135 |
* after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
|
1136 |
* documented but our code relies on that.
|
1137 |
*/
|
1138 |
return SSH_AGAIN;
|
1139 |
|
1140 |
case SSH_OK:
|
1141 |
break;
|
1142 |
|
1143 |
default:
|
1144 |
return SSH_ERROR;
|
1145 |
} |
1146 |
} /* fallthrough */
|
1147 |
|
1148 |
case SK_SSH_SERVER_KNOWN:
|
1149 |
{ |
1150 |
s->ssh->state = SK_SSH_SERVER_KNOWN; |
1151 |
|
1152 |
if (s->ssh->server_hostkey_path)
|
1153 |
{ |
1154 |
int server_identity_is_ok = 1; |
1155 |
|
1156 |
/* Check server identity */
|
1157 |
switch (ssh_is_server_known(s->ssh->session))
|
1158 |
{ |
1159 |
#define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args); |
1160 |
case SSH_SERVER_KNOWN_OK:
|
1161 |
/* The server is known and has not changed. */
|
1162 |
break;
|
1163 |
|
1164 |
case SSH_SERVER_NOT_KNOWN:
|
1165 |
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
|
1166 |
break;
|
1167 |
|
1168 |
case SSH_SERVER_KNOWN_CHANGED:
|
1169 |
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
|
1170 |
server_identity_is_ok = 0;
|
1171 |
break;
|
1172 |
|
1173 |
case SSH_SERVER_FILE_NOT_FOUND:
|
1174 |
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
|
1175 |
server_identity_is_ok = 0;
|
1176 |
break;
|
1177 |
|
1178 |
case SSH_SERVER_ERROR:
|
1179 |
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
|
1180 |
server_identity_is_ok = 0;
|
1181 |
break;
|
1182 |
|
1183 |
case SSH_SERVER_FOUND_OTHER:
|
1184 |
LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
|
1185 |
"It is a possible attack.");
|
1186 |
server_identity_is_ok = 0;
|
1187 |
break;
|
1188 |
} |
1189 |
|
1190 |
if (!server_identity_is_ok)
|
1191 |
return SSH_ERROR;
|
1192 |
} |
1193 |
} /* fallthrough */
|
1194 |
|
1195 |
case SK_SSH_USERAUTH:
|
1196 |
{ |
1197 |
s->ssh->state = SK_SSH_USERAUTH; |
1198 |
switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL)) |
1199 |
{ |
1200 |
case SSH_AUTH_AGAIN:
|
1201 |
return SSH_AGAIN;
|
1202 |
|
1203 |
case SSH_AUTH_SUCCESS:
|
1204 |
break;
|
1205 |
|
1206 |
default:
|
1207 |
return SSH_ERROR;
|
1208 |
} |
1209 |
} /* fallthrough */
|
1210 |
|
1211 |
case SK_SSH_CHANNEL:
|
1212 |
{ |
1213 |
s->ssh->state = SK_SSH_CHANNEL; |
1214 |
s->ssh->channel = ssh_channel_new(s->ssh->session); |
1215 |
if (s->ssh->channel == NULL) |
1216 |
return SSH_ERROR;
|
1217 |
} /* fallthrough */
|
1218 |
|
1219 |
case SK_SSH_SESSION:
|
1220 |
{ |
1221 |
s->ssh->state = SK_SSH_SESSION; |
1222 |
switch (ssh_channel_open_session(s->ssh->channel))
|
1223 |
{ |
1224 |
case SSH_AGAIN:
|
1225 |
return SSH_AGAIN;
|
1226 |
|
1227 |
case SSH_OK:
|
1228 |
break;
|
1229 |
|
1230 |
default:
|
1231 |
return SSH_ERROR;
|
1232 |
} |
1233 |
} /* fallthrough */
|
1234 |
|
1235 |
case SK_SSH_SUBSYSTEM:
|
1236 |
{ |
1237 |
s->ssh->state = SK_SSH_SUBSYSTEM; |
1238 |
if (s->ssh->subsystem)
|
1239 |
{ |
1240 |
switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
|
1241 |
{ |
1242 |
case SSH_AGAIN:
|
1243 |
return SSH_AGAIN;
|
1244 |
|
1245 |
case SSH_OK:
|
1246 |
break;
|
1247 |
|
1248 |
default:
|
1249 |
return SSH_ERROR;
|
1250 |
} |
1251 |
} |
1252 |
} /* fallthrough */
|
1253 |
|
1254 |
case SK_SSH_ESTABLISHED:
|
1255 |
s->ssh->state = SK_SSH_ESTABLISHED; |
1256 |
} |
1257 |
|
1258 |
return SSH_OK;
|
1259 |
} |
1260 |
|
1261 |
/*
|
1262 |
* Return file descriptor number if success
|
1263 |
* Return -1 if failed
|
1264 |
*/
|
1265 |
static int |
1266 |
sk_open_ssh(sock *s) |
1267 |
{ |
1268 |
if (!s->ssh)
|
1269 |
bug("sk_open() sock->ssh is not allocated");
|
1270 |
|
1271 |
ssh_session sess = ssh_new(); |
1272 |
if (sess == NULL) |
1273 |
ERR2("Cannot create a ssh session");
|
1274 |
s->ssh->session = sess; |
1275 |
|
1276 |
const int verbosity = SSH_LOG_NOLOG; |
1277 |
ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity); |
1278 |
ssh_options_set(sess, SSH_OPTIONS_HOST, s->host); |
1279 |
ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport)); |
1280 |
/* TODO: Add SSH_OPTIONS_BINDADDR */
|
1281 |
ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username); |
1282 |
|
1283 |
if (s->ssh->server_hostkey_path)
|
1284 |
ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path); |
1285 |
|
1286 |
if (s->ssh->client_privkey_path)
|
1287 |
ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path); |
1288 |
|
1289 |
ssh_set_blocking(sess, 0);
|
1290 |
|
1291 |
switch (sk_ssh_connect(s))
|
1292 |
{ |
1293 |
case SSH_AGAIN:
|
1294 |
break;
|
1295 |
|
1296 |
case SSH_OK:
|
1297 |
sk_ssh_connected(s); |
1298 |
break;
|
1299 |
|
1300 |
case SSH_ERROR:
|
1301 |
ERR2(ssh_get_error(sess)); |
1302 |
break;
|
1303 |
} |
1304 |
|
1305 |
return ssh_get_fd(sess);
|
1306 |
|
1307 |
err:
|
1308 |
return -1; |
1309 |
} |
1310 |
#endif
|
1311 |
|
1312 |
/**
|
1313 |
* sk_open - open a socket
|
1314 |
* @s: socket
|
1315 |
*
|
1316 |
* This function takes a socket resource created by sk_new() and
|
1317 |
* initialized by the user and binds a corresponding network connection
|
1318 |
* to it.
|
1319 |
*
|
1320 |
* Result: 0 for success, -1 for an error.
|
1321 |
*/
|
1322 |
int
|
1323 |
sk_open(sock *s) |
1324 |
{ |
1325 |
int af = AF_UNSPEC;
|
1326 |
int fd = -1; |
1327 |
int do_bind = 0; |
1328 |
int bind_port = 0; |
1329 |
ip_addr bind_addr = IPA_NONE; |
1330 |
sockaddr sa; |
1331 |
|
1332 |
if (s->type <= SK_IP)
|
1333 |
{ |
1334 |
/*
|
1335 |
* For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
|
1336 |
* explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
|
1337 |
* But the specifications have to be consistent.
|
1338 |
*/
|
1339 |
|
1340 |
switch (s->subtype)
|
1341 |
{ |
1342 |
case 0: |
1343 |
ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) || |
1344 |
(ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr))); |
1345 |
af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6; |
1346 |
break;
|
1347 |
|
1348 |
case SK_IPV4:
|
1349 |
ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr)); |
1350 |
ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr)); |
1351 |
af = AF_INET; |
1352 |
break;
|
1353 |
|
1354 |
case SK_IPV6:
|
1355 |
ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr)); |
1356 |
ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr)); |
1357 |
af = AF_INET6; |
1358 |
break;
|
1359 |
|
1360 |
default:
|
1361 |
bug("Invalid subtype %d", s->subtype);
|
1362 |
} |
1363 |
} |
1364 |
|
1365 |
switch (s->type)
|
1366 |
{ |
1367 |
case SK_TCP_ACTIVE:
|
1368 |
s->ttx = ""; /* Force s->ttx != s->tpos */ |
1369 |
/* Fall thru */
|
1370 |
case SK_TCP_PASSIVE:
|
1371 |
fd = socket(af, SOCK_STREAM, IPPROTO_TCP); |
1372 |
bind_port = s->sport; |
1373 |
bind_addr = s->saddr; |
1374 |
do_bind = bind_port || ipa_nonzero(bind_addr); |
1375 |
break;
|
1376 |
|
1377 |
#ifdef HAVE_LIBSSH
|
1378 |
case SK_SSH_ACTIVE:
|
1379 |
s->ttx = ""; /* Force s->ttx != s->tpos */ |
1380 |
fd = sk_open_ssh(s); |
1381 |
break;
|
1382 |
#endif
|
1383 |
|
1384 |
case SK_UDP:
|
1385 |
fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); |
1386 |
bind_port = s->sport; |
1387 |
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; |
1388 |
do_bind = 1;
|
1389 |
break;
|
1390 |
|
1391 |
case SK_IP:
|
1392 |
fd = socket(af, SOCK_RAW, s->dport); |
1393 |
bind_port = 0;
|
1394 |
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; |
1395 |
do_bind = ipa_nonzero(bind_addr); |
1396 |
break;
|
1397 |
|
1398 |
case SK_MAGIC:
|
1399 |
af = 0;
|
1400 |
fd = s->fd; |
1401 |
break;
|
1402 |
|
1403 |
default:
|
1404 |
bug("sk_open() called for invalid sock type %d", s->type);
|
1405 |
} |
1406 |
|
1407 |
if (fd < 0) |
1408 |
ERR("socket");
|
1409 |
|
1410 |
s->af = af; |
1411 |
s->fd = fd; |
1412 |
|
1413 |
if (sk_setup(s) < 0) |
1414 |
goto err;
|
1415 |
|
1416 |
if (do_bind)
|
1417 |
{ |
1418 |
if (bind_port)
|
1419 |
{ |
1420 |
int y = 1; |
1421 |
|
1422 |
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0) |
1423 |
ERR2("SO_REUSEADDR");
|
1424 |
|
1425 |
#ifdef CONFIG_NO_IFACE_BIND
|
1426 |
/* Workaround missing ability to bind to an iface */
|
1427 |
if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
|
1428 |
{ |
1429 |
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0) |
1430 |
ERR2("SO_REUSEPORT");
|
1431 |
} |
1432 |
#endif
|
1433 |
} |
1434 |
else
|
1435 |
if (s->flags & SKF_HIGH_PORT)
|
1436 |
if (sk_set_high_port(s) < 0) |
1437 |
log(L_WARN "Socket error: %s%#m", s->err);
|
1438 |
|
1439 |
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); |
1440 |
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) |
1441 |
ERR2("bind");
|
1442 |
} |
1443 |
|
1444 |
if (s->password)
|
1445 |
if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0) |
1446 |
goto err;
|
1447 |
|
1448 |
switch (s->type)
|
1449 |
{ |
1450 |
case SK_TCP_ACTIVE:
|
1451 |
sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); |
1452 |
if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0) |
1453 |
sk_tcp_connected(s); |
1454 |
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && |
1455 |
errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) |
1456 |
ERR2("connect");
|
1457 |
break;
|
1458 |
|
1459 |
case SK_TCP_PASSIVE:
|
1460 |
if (listen(fd, 8) < 0) |
1461 |
ERR2("listen");
|
1462 |
break;
|
1463 |
|
1464 |
case SK_SSH_ACTIVE:
|
1465 |
case SK_MAGIC:
|
1466 |
break;
|
1467 |
|
1468 |
default:
|
1469 |
sk_alloc_bufs(s); |
1470 |
} |
1471 |
|
1472 |
if (!(s->flags & SKF_THREAD))
|
1473 |
sk_insert(s); |
1474 |
|
1475 |
return 0; |
1476 |
|
1477 |
err:
|
1478 |
close(fd); |
1479 |
s->fd = -1;
|
1480 |
return -1; |
1481 |
} |
1482 |
|
1483 |
int
|
1484 |
sk_open_unix(sock *s, char *name)
|
1485 |
{ |
1486 |
struct sockaddr_un sa;
|
1487 |
int fd;
|
1488 |
|
1489 |
/* We are sloppy during error (leak fd and not set s->err), but we die anyway */
|
1490 |
|
1491 |
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
1492 |
if (fd < 0) |
1493 |
return -1; |
1494 |
|
1495 |
if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) |
1496 |
return -1; |
1497 |
|
1498 |
/* Path length checked in test_old_bird() */
|
1499 |
sa.sun_family = AF_UNIX; |
1500 |
strcpy(sa.sun_path, name); |
1501 |
|
1502 |
if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) |
1503 |
return -1; |
1504 |
|
1505 |
if (listen(fd, 8) < 0) |
1506 |
return -1; |
1507 |
|
1508 |
s->fd = fd; |
1509 |
sk_insert(s); |
1510 |
return 0; |
1511 |
} |
1512 |
|
1513 |
|
1514 |
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
|
1515 |
CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) |
1516 |
#define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
|
1517 |
|
1518 |
static void |
1519 |
sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) |
1520 |
{ |
1521 |
if (sk_is_ipv4(s))
|
1522 |
sk_prepare_cmsgs4(s, msg, cbuf, cbuflen); |
1523 |
else
|
1524 |
sk_prepare_cmsgs6(s, msg, cbuf, cbuflen); |
1525 |
} |
1526 |
|
1527 |
static void |
1528 |
sk_process_cmsgs(sock *s, struct msghdr *msg)
|
1529 |
{ |
1530 |
struct cmsghdr *cm;
|
1531 |
|
1532 |
s->laddr = IPA_NONE; |
1533 |
s->lifindex = 0;
|
1534 |
s->rcv_ttl = -1;
|
1535 |
|
1536 |
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) |
1537 |
{ |
1538 |
if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
|
1539 |
{ |
1540 |
sk_process_cmsg4_pktinfo(s, cm); |
1541 |
sk_process_cmsg4_ttl(s, cm); |
1542 |
} |
1543 |
|
1544 |
if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
|
1545 |
{ |
1546 |
sk_process_cmsg6_pktinfo(s, cm); |
1547 |
sk_process_cmsg6_ttl(s, cm); |
1548 |
} |
1549 |
} |
1550 |
} |
1551 |
|
1552 |
|
1553 |
static inline int |
1554 |
sk_sendmsg(sock *s) |
1555 |
{ |
1556 |
struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
|
1557 |
byte cmsg_buf[CMSG_TX_SPACE]; |
1558 |
sockaddr dst; |
1559 |
int flags = 0; |
1560 |
|
1561 |
sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport); |
1562 |
|
1563 |
struct msghdr msg = {
|
1564 |
.msg_name = &dst.sa, |
1565 |
.msg_namelen = SA_LEN(dst), |
1566 |
.msg_iov = &iov, |
1567 |
.msg_iovlen = 1
|
1568 |
}; |
1569 |
|
1570 |
#ifdef CONFIG_DONTROUTE_UNICAST
|
1571 |
/* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
|
1572 |
cannot use it for other cases (e.g. when TTL security is used). */
|
1573 |
if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1)) |
1574 |
flags = MSG_DONTROUTE; |
1575 |
#endif
|
1576 |
|
1577 |
#ifdef CONFIG_USE_HDRINCL
|
1578 |
byte hdr[20];
|
1579 |
struct iovec iov2[2] = { {hdr, 20}, iov }; |
1580 |
|
1581 |
if (s->flags & SKF_HDRINCL)
|
1582 |
{ |
1583 |
sk_prepare_ip_header(s, hdr, iov.iov_len); |
1584 |
msg.msg_iov = iov2; |
1585 |
msg.msg_iovlen = 2;
|
1586 |
} |
1587 |
#endif
|
1588 |
|
1589 |
if (s->flags & SKF_PKTINFO)
|
1590 |
sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
|
1591 |
|
1592 |
return sendmsg(s->fd, &msg, flags);
|
1593 |
} |
1594 |
|
1595 |
static inline int |
1596 |
sk_recvmsg(sock *s) |
1597 |
{ |
1598 |
struct iovec iov = {s->rbuf, s->rbsize};
|
1599 |
byte cmsg_buf[CMSG_RX_SPACE]; |
1600 |
sockaddr src; |
1601 |
|
1602 |
struct msghdr msg = {
|
1603 |
.msg_name = &src.sa, |
1604 |
.msg_namelen = sizeof(src), // XXXX ?? |
1605 |
.msg_iov = &iov, |
1606 |
.msg_iovlen = 1,
|
1607 |
.msg_control = cmsg_buf, |
1608 |
.msg_controllen = sizeof(cmsg_buf),
|
1609 |
.msg_flags = 0
|
1610 |
}; |
1611 |
|
1612 |
int rv = recvmsg(s->fd, &msg, 0); |
1613 |
if (rv < 0) |
1614 |
return rv;
|
1615 |
|
1616 |
//ifdef IPV4
|
1617 |
// if (cf_type == SK_IP)
|
1618 |
// rv = ipv4_skip_header(pbuf, rv);
|
1619 |
//endif
|
1620 |
|
1621 |
sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
|
1622 |
sk_process_cmsgs(s, &msg); |
1623 |
|
1624 |
if (msg.msg_flags & MSG_TRUNC)
|
1625 |
s->flags |= SKF_TRUNCATED; |
1626 |
else
|
1627 |
s->flags &= ~SKF_TRUNCATED; |
1628 |
|
1629 |
return rv;
|
1630 |
} |
1631 |
|
1632 |
|
1633 |
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } |
1634 |
|
1635 |
static int |
1636 |
sk_maybe_write(sock *s) |
1637 |
{ |
1638 |
int e;
|
1639 |
|
1640 |
switch (s->type)
|
1641 |
{ |
1642 |
case SK_TCP:
|
1643 |
case SK_MAGIC:
|
1644 |
case SK_UNIX:
|
1645 |
while (s->ttx != s->tpos)
|
1646 |
{ |
1647 |
e = write(s->fd, s->ttx, s->tpos - s->ttx); |
1648 |
|
1649 |
if (e < 0) |
1650 |
{ |
1651 |
if (errno != EINTR && errno != EAGAIN)
|
1652 |
{ |
1653 |
reset_tx_buffer(s); |
1654 |
/* EPIPE is just a connection close notification during TX */
|
1655 |
s->err_hook(s, (errno != EPIPE) ? errno : 0);
|
1656 |
return -1; |
1657 |
} |
1658 |
return 0; |
1659 |
} |
1660 |
s->ttx += e; |
1661 |
} |
1662 |
reset_tx_buffer(s); |
1663 |
return 1; |
1664 |
|
1665 |
#ifdef HAVE_LIBSSH
|
1666 |
case SK_SSH:
|
1667 |
while (s->ttx != s->tpos)
|
1668 |
{ |
1669 |
e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx); |
1670 |
|
1671 |
if (e < 0) |
1672 |
{ |
1673 |
s->err = ssh_get_error(s->ssh->session); |
1674 |
s->err_hook(s, ssh_get_error_code(s->ssh->session)); |
1675 |
|
1676 |
reset_tx_buffer(s); |
1677 |
/* EPIPE is just a connection close notification during TX */
|
1678 |
s->err_hook(s, (errno != EPIPE) ? errno : 0);
|
1679 |
return -1; |
1680 |
} |
1681 |
s->ttx += e; |
1682 |
} |
1683 |
reset_tx_buffer(s); |
1684 |
return 1; |
1685 |
#endif
|
1686 |
|
1687 |
case SK_UDP:
|
1688 |
case SK_IP:
|
1689 |
{ |
1690 |
if (s->tbuf == s->tpos)
|
1691 |
return 1; |
1692 |
|
1693 |
e = sk_sendmsg(s); |
1694 |
|
1695 |
if (e < 0) |
1696 |
{ |
1697 |
if (errno != EINTR && errno != EAGAIN)
|
1698 |
{ |
1699 |
reset_tx_buffer(s); |
1700 |
s->err_hook(s, errno); |
1701 |
return -1; |
1702 |
} |
1703 |
|
1704 |
if (!s->tx_hook)
|
1705 |
reset_tx_buffer(s); |
1706 |
return 0; |
1707 |
} |
1708 |
reset_tx_buffer(s); |
1709 |
return 1; |
1710 |
} |
1711 |
|
1712 |
default:
|
1713 |
bug("sk_maybe_write: unknown socket type %d", s->type);
|
1714 |
} |
1715 |
} |
1716 |
|
1717 |
int
|
1718 |
sk_rx_ready(sock *s) |
1719 |
{ |
1720 |
int rv;
|
1721 |
struct pollfd pfd = { .fd = s->fd };
|
1722 |
pfd.events |= POLLIN; |
1723 |
|
1724 |
redo:
|
1725 |
rv = poll(&pfd, 1, 0); |
1726 |
|
1727 |
if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) |
1728 |
goto redo;
|
1729 |
|
1730 |
return rv;
|
1731 |
} |
1732 |
|
1733 |
/**
|
1734 |
* sk_send - send data to a socket
|
1735 |
* @s: socket
|
1736 |
* @len: number of bytes to send
|
1737 |
*
|
1738 |
* This function sends @len bytes of data prepared in the
|
1739 |
* transmit buffer of the socket @s to the network connection.
|
1740 |
* If the packet can be sent immediately, it does so and returns
|
1741 |
* 1, else it queues the packet for later processing, returns 0
|
1742 |
* and calls the @tx_hook of the socket when the tranmission
|
1743 |
* takes place.
|
1744 |
*/
|
1745 |
int
|
1746 |
sk_send(sock *s, unsigned len)
|
1747 |
{ |
1748 |
s->ttx = s->tbuf; |
1749 |
s->tpos = s->tbuf + len; |
1750 |
return sk_maybe_write(s);
|
1751 |
} |
1752 |
|
1753 |
/**
|
1754 |
* sk_send_to - send data to a specific destination
|
1755 |
* @s: socket
|
1756 |
* @len: number of bytes to send
|
1757 |
* @addr: IP address to send the packet to
|
1758 |
* @port: port to send the packet to
|
1759 |
*
|
1760 |
* This is a sk_send() replacement for connection-less packet sockets
|
1761 |
* which allows destination of the packet to be chosen dynamically.
|
1762 |
* Raw IP sockets should use 0 for @port.
|
1763 |
*/
|
1764 |
int
|
1765 |
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) |
1766 |
{ |
1767 |
s->daddr = addr; |
1768 |
if (port)
|
1769 |
s->dport = port; |
1770 |
|
1771 |
s->ttx = s->tbuf; |
1772 |
s->tpos = s->tbuf + len; |
1773 |
return sk_maybe_write(s);
|
1774 |
} |
1775 |
|
1776 |
/*
|
1777 |
int
|
1778 |
sk_send_full(sock *s, unsigned len, struct iface *ifa,
|
1779 |
ip_addr saddr, ip_addr daddr, unsigned dport)
|
1780 |
{
|
1781 |
s->iface = ifa;
|
1782 |
s->saddr = saddr;
|
1783 |
s->daddr = daddr;
|
1784 |
s->dport = dport;
|
1785 |
s->ttx = s->tbuf;
|
1786 |
s->tpos = s->tbuf + len;
|
1787 |
return sk_maybe_write(s);
|
1788 |
}
|
1789 |
*/
|
1790 |
|
1791 |
static void |
1792 |
call_rx_hook(sock *s, int size)
|
1793 |
{ |
1794 |
if (s->rx_hook(s, size))
|
1795 |
{ |
1796 |
/* We need to be careful since the socket could have been deleted by the hook */
|
1797 |
if (current_sock == s)
|
1798 |
s->rpos = s->rbuf; |
1799 |
} |
1800 |
} |
1801 |
|
1802 |
#ifdef HAVE_LIBSSH
|
1803 |
static int |
1804 |
sk_read_ssh(sock *s) |
1805 |
{ |
1806 |
ssh_channel rchans[2] = { s->ssh->channel, NULL }; |
1807 |
struct timeval timev = { 1, 0 }; |
1808 |
|
1809 |
if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR) |
1810 |
return 1; /* Try again */ |
1811 |
|
1812 |
if (ssh_channel_is_eof(s->ssh->channel) != 0) |
1813 |
{ |
1814 |
/* The remote side is closing the connection */
|
1815 |
s->err_hook(s, 0);
|
1816 |
return 0; |
1817 |
} |
1818 |
|
1819 |
if (rchans[0] == NULL) |
1820 |
return 0; /* No data is available on the socket */ |
1821 |
|
1822 |
const uint used_bytes = s->rpos - s->rbuf;
|
1823 |
const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0); |
1824 |
if (read_bytes > 0) |
1825 |
{ |
1826 |
/* Received data */
|
1827 |
s->rpos += read_bytes; |
1828 |
call_rx_hook(s, used_bytes + read_bytes); |
1829 |
return 1; |
1830 |
} |
1831 |
else if (read_bytes == 0) |
1832 |
{ |
1833 |
if (ssh_channel_is_eof(s->ssh->channel) != 0) |
1834 |
{ |
1835 |
/* The remote side is closing the connection */
|
1836 |
s->err_hook(s, 0);
|
1837 |
} |
1838 |
} |
1839 |
else
|
1840 |
{ |
1841 |
s->err = ssh_get_error(s->ssh->session); |
1842 |
s->err_hook(s, ssh_get_error_code(s->ssh->session)); |
1843 |
} |
1844 |
|
1845 |
return 0; /* No data is available on the socket */ |
1846 |
} |
1847 |
#endif
|
1848 |
|
1849 |
/* sk_read() and sk_write() are called from BFD's event loop */
|
1850 |
|
1851 |
int
|
1852 |
sk_read(sock *s, int revents)
|
1853 |
{ |
1854 |
switch (s->type)
|
1855 |
{ |
1856 |
case SK_TCP_PASSIVE:
|
1857 |
return sk_passive_connected(s, SK_TCP);
|
1858 |
|
1859 |
case SK_UNIX_PASSIVE:
|
1860 |
return sk_passive_connected(s, SK_UNIX);
|
1861 |
|
1862 |
case SK_TCP:
|
1863 |
case SK_UNIX:
|
1864 |
{ |
1865 |
int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
|
1866 |
|
1867 |
if (c < 0) |
1868 |
{ |
1869 |
if (errno != EINTR && errno != EAGAIN)
|
1870 |
s->err_hook(s, errno); |
1871 |
else if (errno == EAGAIN && !(revents & POLLIN)) |
1872 |
{ |
1873 |
log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
|
1874 |
s->err_hook(s, 0);
|
1875 |
} |
1876 |
} |
1877 |
else if (!c) |
1878 |
s->err_hook(s, 0);
|
1879 |
else
|
1880 |
{ |
1881 |
s->rpos += c; |
1882 |
call_rx_hook(s, s->rpos - s->rbuf); |
1883 |
return 1; |
1884 |
} |
1885 |
return 0; |
1886 |
} |
1887 |
|
1888 |
#ifdef HAVE_LIBSSH
|
1889 |
case SK_SSH:
|
1890 |
return sk_read_ssh(s);
|
1891 |
#endif
|
1892 |
|
1893 |
case SK_MAGIC:
|
1894 |
return s->rx_hook(s, 0); |
1895 |
|
1896 |
default:
|
1897 |
{ |
1898 |
int e = sk_recvmsg(s);
|
1899 |
|
1900 |
if (e < 0) |
1901 |
{ |
1902 |
if (errno != EINTR && errno != EAGAIN)
|
1903 |
s->err_hook(s, errno); |
1904 |
return 0; |
1905 |
} |
1906 |
|
1907 |
s->rpos = s->rbuf + e; |
1908 |
s->rx_hook(s, e); |
1909 |
return 1; |
1910 |
} |
1911 |
} |
1912 |
} |
1913 |
|
1914 |
int
|
1915 |
sk_write(sock *s) |
1916 |
{ |
1917 |
switch (s->type)
|
1918 |
{ |
1919 |
case SK_TCP_ACTIVE:
|
1920 |
{ |
1921 |
sockaddr sa; |
1922 |
sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); |
1923 |
|
1924 |
if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN) |
1925 |
sk_tcp_connected(s); |
1926 |
else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) |
1927 |
s->err_hook(s, errno); |
1928 |
return 0; |
1929 |
} |
1930 |
|
1931 |
#ifdef HAVE_LIBSSH
|
1932 |
case SK_SSH_ACTIVE:
|
1933 |
{ |
1934 |
switch (sk_ssh_connect(s))
|
1935 |
{ |
1936 |
case SSH_OK:
|
1937 |
sk_ssh_connected(s); |
1938 |
break;
|
1939 |
|
1940 |
case SSH_AGAIN:
|
1941 |
return 1; |
1942 |
|
1943 |
case SSH_ERROR:
|
1944 |
s->err = ssh_get_error(s->ssh->session); |
1945 |
s->err_hook(s, ssh_get_error_code(s->ssh->session)); |
1946 |
break;
|
1947 |
} |
1948 |
return 0; |
1949 |
} |
1950 |
#endif
|
1951 |
|
1952 |
default:
|
1953 |
if (s->ttx != s->tpos && sk_maybe_write(s) > 0) |
1954 |
{ |
1955 |
if (s->tx_hook)
|
1956 |
s->tx_hook(s); |
1957 |
return 1; |
1958 |
} |
1959 |
return 0; |
1960 |
} |
1961 |
} |
1962 |
|
1963 |
int sk_is_ipv4(sock *s)
|
1964 |
{ return s->af == AF_INET; }
|
1965 |
|
1966 |
int sk_is_ipv6(sock *s)
|
1967 |
{ return s->af == AF_INET6; }
|
1968 |
|
1969 |
void
|
1970 |
sk_err(sock *s, int revents)
|
1971 |
{ |
1972 |
int se = 0, sse = sizeof(se); |
1973 |
if ((s->type != SK_MAGIC) && (revents & POLLERR))
|
1974 |
if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0) |
1975 |
{ |
1976 |
log(L_ERR "IO: Socket error: SO_ERROR: %m");
|
1977 |
se = 0;
|
1978 |
} |
1979 |
|
1980 |
s->err_hook(s, se); |
1981 |
} |
1982 |
|
1983 |
void
|
1984 |
sk_dump_all(void)
|
1985 |
{ |
1986 |
node *n; |
1987 |
sock *s; |
1988 |
|
1989 |
debug("Open sockets:\n");
|
1990 |
WALK_LIST(n, sock_list) |
1991 |
{ |
1992 |
s = SKIP_BACK(sock, n, n); |
1993 |
debug("%p ", s);
|
1994 |
sk_dump(&s->r); |
1995 |
} |
1996 |
debug("\n");
|
1997 |
} |
1998 |
|
1999 |
|
2000 |
/*
|
2001 |
* Internal event log and watchdog
|
2002 |
*/
|
2003 |
|
2004 |
#define EVENT_LOG_LENGTH 32 |
2005 |
|
2006 |
struct event_log_entry
|
2007 |
{ |
2008 |
void *hook;
|
2009 |
void *data;
|
2010 |
btime timestamp; |
2011 |
btime duration; |
2012 |
}; |
2013 |
|
2014 |
static struct event_log_entry event_log[EVENT_LOG_LENGTH]; |
2015 |
static struct event_log_entry *event_open; |
2016 |
static int event_log_pos, event_log_num, watchdog_active; |
2017 |
static btime last_time;
|
2018 |
static btime loop_time;
|
2019 |
|
2020 |
static void |
2021 |
io_update_time(void)
|
2022 |
{ |
2023 |
struct timespec ts;
|
2024 |
int rv;
|
2025 |
|
2026 |
/*
|
2027 |
* This is third time-tracking procedure (after update_times() above and
|
2028 |
* times_update() in BFD), dedicated to internal event log and latency
|
2029 |
* tracking. Hopefully, we consolidate these sometimes.
|
2030 |
*/
|
2031 |
|
2032 |
rv = clock_gettime(CLOCK_MONOTONIC, &ts); |
2033 |
if (rv < 0) |
2034 |
die("clock_gettime: %m");
|
2035 |
|
2036 |
last_time = ts.tv_sec S + ts.tv_nsec NS; |
2037 |
|
2038 |
if (event_open)
|
2039 |
{ |
2040 |
event_open->duration = last_time - event_open->timestamp; |
2041 |
|
2042 |
if (event_open->duration > config->latency_limit)
|
2043 |
log(L_WARN "Event 0x%p 0x%p took %d ms",
|
2044 |
event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
|
2045 |
|
2046 |
event_open = NULL;
|
2047 |
} |
2048 |
} |
2049 |
|
2050 |
/**
|
2051 |
* io_log_event - mark approaching event into event log
|
2052 |
* @hook: event hook address
|
2053 |
* @data: event data address
|
2054 |
*
|
2055 |
* Store info (hook, data, timestamp) about the following internal event into
|
2056 |
* a circular event log (@event_log). When latency tracking is enabled, the log
|
2057 |
* entry is kept open (in @event_open) so the duration can be filled later.
|
2058 |
*/
|
2059 |
void
|
2060 |
io_log_event(void *hook, void *data) |
2061 |
{ |
2062 |
if (config->latency_debug)
|
2063 |
io_update_time(); |
2064 |
|
2065 |
struct event_log_entry *en = event_log + event_log_pos;
|
2066 |
|
2067 |
en->hook = hook; |
2068 |
en->data = data; |
2069 |
en->timestamp = last_time; |
2070 |
en->duration = 0;
|
2071 |
|
2072 |
event_log_num++; |
2073 |
event_log_pos++; |
2074 |
event_log_pos %= EVENT_LOG_LENGTH; |
2075 |
|
2076 |
event_open = config->latency_debug ? en : NULL;
|
2077 |
} |
2078 |
|
2079 |
static inline void |
2080 |
io_close_event(void)
|
2081 |
{ |
2082 |
if (event_open)
|
2083 |
io_update_time(); |
2084 |
} |
2085 |
|
2086 |
void
|
2087 |
io_log_dump(void)
|
2088 |
{ |
2089 |
int i;
|
2090 |
|
2091 |
log(L_DEBUG "Event log:");
|
2092 |
for (i = 0; i < EVENT_LOG_LENGTH; i++) |
2093 |
{ |
2094 |
struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
|
2095 |
if (en->hook)
|
2096 |
log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
|
2097 |
(int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS)); |
2098 |
} |
2099 |
} |
2100 |
|
2101 |
void
|
2102 |
watchdog_sigalrm(int sig UNUSED)
|
2103 |
{ |
2104 |
/* Update last_time and duration, but skip latency check */
|
2105 |
config->latency_limit = 0xffffffff;
|
2106 |
io_update_time(); |
2107 |
|
2108 |
/* We want core dump */
|
2109 |
abort(); |
2110 |
} |
2111 |
|
2112 |
static inline void |
2113 |
watchdog_start1(void)
|
2114 |
{ |
2115 |
io_update_time(); |
2116 |
|
2117 |
loop_time = last_time; |
2118 |
} |
2119 |
|
2120 |
static inline void |
2121 |
watchdog_start(void)
|
2122 |
{ |
2123 |
io_update_time(); |
2124 |
|
2125 |
loop_time = last_time; |
2126 |
event_log_num = 0;
|
2127 |
|
2128 |
if (config->watchdog_timeout)
|
2129 |
{ |
2130 |
alarm(config->watchdog_timeout); |
2131 |
watchdog_active = 1;
|
2132 |
} |
2133 |
} |
2134 |
|
2135 |
static inline void |
2136 |
watchdog_stop(void)
|
2137 |
{ |
2138 |
io_update_time(); |
2139 |
|
2140 |
if (watchdog_active)
|
2141 |
{ |
2142 |
alarm(0);
|
2143 |
watchdog_active = 0;
|
2144 |
} |
2145 |
|
2146 |
btime duration = last_time - loop_time; |
2147 |
if (duration > config->watchdog_warning)
|
2148 |
log(L_WARN "I/O loop cycle took %d ms for %d events",
|
2149 |
(int) (duration TO_MS), event_log_num);
|
2150 |
} |
2151 |
|
2152 |
|
2153 |
/*
|
2154 |
* Main I/O Loop
|
2155 |
*/
|
2156 |
|
2157 |
volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */ |
2158 |
volatile int async_dump_flag; |
2159 |
volatile int async_shutdown_flag; |
2160 |
|
2161 |
void
|
2162 |
io_init(void)
|
2163 |
{ |
2164 |
init_list(&sock_list); |
2165 |
init_list(&global_event_list); |
2166 |
krt_io_init(); |
2167 |
// XXX init_times();
|
2168 |
// XXX update_times();
|
2169 |
boot_time = current_time(); |
2170 |
|
2171 |
u64 now = (u64) current_real_time(); |
2172 |
srandom((uint) (now ^ (now >> 32)));
|
2173 |
} |
2174 |
|
2175 |
static int short_loops = 0; |
2176 |
#define SHORT_LOOP_MAX 10 |
2177 |
|
2178 |
void
|
2179 |
io_loop(void)
|
2180 |
{ |
2181 |
int poll_tout, timeout;
|
2182 |
int nfds, events, pout;
|
2183 |
timer *t; |
2184 |
sock *s; |
2185 |
node *n; |
2186 |
int fdmax = 256; |
2187 |
struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd)); |
2188 |
|
2189 |
watchdog_start1(); |
2190 |
for(;;)
|
2191 |
{ |
2192 |
times_update(&main_timeloop); |
2193 |
events = ev_run_list(&global_event_list); |
2194 |
timers_fire(&main_timeloop); |
2195 |
io_close_event(); |
2196 |
|
2197 |
// FIXME
|
2198 |
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ |
2199 |
if (t = timers_first(&main_timeloop))
|
2200 |
{ |
2201 |
times_update(&main_timeloop); |
2202 |
timeout = (tm_remains(t) TO_MS) + 1;
|
2203 |
poll_tout = MIN(poll_tout, timeout); |
2204 |
} |
2205 |
|
2206 |
nfds = 0;
|
2207 |
WALK_LIST(n, sock_list) |
2208 |
{ |
2209 |
pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ |
2210 |
s = SKIP_BACK(sock, n, n); |
2211 |
if (s->rx_hook)
|
2212 |
{ |
2213 |
pfd[nfds].fd = s->fd; |
2214 |
pfd[nfds].events |= POLLIN; |
2215 |
} |
2216 |
if (s->tx_hook && s->ttx != s->tpos)
|
2217 |
{ |
2218 |
pfd[nfds].fd = s->fd; |
2219 |
pfd[nfds].events |= POLLOUT; |
2220 |
} |
2221 |
if (pfd[nfds].fd != -1) |
2222 |
{ |
2223 |
s->index = nfds; |
2224 |
nfds++; |
2225 |
} |
2226 |
else
|
2227 |
s->index = -1;
|
2228 |
|
2229 |
if (nfds >= fdmax)
|
2230 |
{ |
2231 |
fdmax *= 2;
|
2232 |
pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd)); |
2233 |
} |
2234 |
} |
2235 |
|
2236 |
/*
|
2237 |
* Yes, this is racy. But even if the signal comes before this test
|
2238 |
* and entering poll(), it gets caught on the next timer tick.
|
2239 |
*/
|
2240 |
|
2241 |
if (async_config_flag)
|
2242 |
{ |
2243 |
io_log_event(async_config, NULL);
|
2244 |
async_config(); |
2245 |
async_config_flag = 0;
|
2246 |
continue;
|
2247 |
} |
2248 |
if (async_dump_flag)
|
2249 |
{ |
2250 |
io_log_event(async_dump, NULL);
|
2251 |
async_dump(); |
2252 |
async_dump_flag = 0;
|
2253 |
continue;
|
2254 |
} |
2255 |
if (async_shutdown_flag)
|
2256 |
{ |
2257 |
io_log_event(async_shutdown, NULL);
|
2258 |
async_shutdown(); |
2259 |
async_shutdown_flag = 0;
|
2260 |
continue;
|
2261 |
} |
2262 |
|
2263 |
/* And finally enter poll() to find active sockets */
|
2264 |
watchdog_stop(); |
2265 |
pout = poll(pfd, nfds, poll_tout); |
2266 |
watchdog_start(); |
2267 |
|
2268 |
if (pout < 0) |
2269 |
{ |
2270 |
if (errno == EINTR || errno == EAGAIN)
|
2271 |
continue;
|
2272 |
die("poll: %m");
|
2273 |
} |
2274 |
if (pout)
|
2275 |
{ |
2276 |
times_update(&main_timeloop); |
2277 |
|
2278 |
/* guaranteed to be non-empty */
|
2279 |
current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); |
2280 |
|
2281 |
while (current_sock)
|
2282 |
{ |
2283 |
sock *s = current_sock; |
2284 |
if (s->index == -1) |
2285 |
{ |
2286 |
current_sock = sk_next(s); |
2287 |
goto next;
|
2288 |
} |
2289 |
|
2290 |
int e;
|
2291 |
int steps;
|
2292 |
|
2293 |
steps = MAX_STEPS; |
2294 |
if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
|
2295 |
do
|
2296 |
{ |
2297 |
steps--; |
2298 |
io_log_event(s->rx_hook, s->data); |
2299 |
e = sk_read(s, pfd[s->index].revents); |
2300 |
if (s != current_sock)
|
2301 |
goto next;
|
2302 |
} |
2303 |
while (e && s->rx_hook && steps);
|
2304 |
|
2305 |
steps = MAX_STEPS; |
2306 |
if (pfd[s->index].revents & POLLOUT)
|
2307 |
do
|
2308 |
{ |
2309 |
steps--; |
2310 |
io_log_event(s->tx_hook, s->data); |
2311 |
e = sk_write(s); |
2312 |
if (s != current_sock)
|
2313 |
goto next;
|
2314 |
} |
2315 |
while (e && steps);
|
2316 |
|
2317 |
current_sock = sk_next(s); |
2318 |
next: ;
|
2319 |
} |
2320 |
|
2321 |
short_loops++; |
2322 |
if (events && (short_loops < SHORT_LOOP_MAX))
|
2323 |
continue;
|
2324 |
short_loops = 0;
|
2325 |
|
2326 |
int count = 0; |
2327 |
current_sock = stored_sock; |
2328 |
if (current_sock == NULL) |
2329 |
current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); |
2330 |
|
2331 |
while (current_sock && count < MAX_RX_STEPS)
|
2332 |
{ |
2333 |
sock *s = current_sock; |
2334 |
if (s->index == -1) |
2335 |
{ |
2336 |
current_sock = sk_next(s); |
2337 |
goto next2;
|
2338 |
} |
2339 |
|
2340 |
if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
|
2341 |
{ |
2342 |
count++; |
2343 |
io_log_event(s->rx_hook, s->data); |
2344 |
sk_read(s, pfd[s->index].revents); |
2345 |
if (s != current_sock)
|
2346 |
goto next2;
|
2347 |
} |
2348 |
|
2349 |
if (pfd[s->index].revents & (POLLHUP | POLLERR))
|
2350 |
{ |
2351 |
sk_err(s, pfd[s->index].revents); |
2352 |
if (s != current_sock)
|
2353 |
goto next2;
|
2354 |
} |
2355 |
|
2356 |
current_sock = sk_next(s); |
2357 |
next2: ;
|
2358 |
} |
2359 |
|
2360 |
|
2361 |
stored_sock = current_sock; |
2362 |
} |
2363 |
} |
2364 |
} |
2365 |
|
2366 |
void
|
2367 |
test_old_bird(char *path)
|
2368 |
{ |
2369 |
int fd;
|
2370 |
struct sockaddr_un sa;
|
2371 |
|
2372 |
fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
2373 |
if (fd < 0) |
2374 |
die("Cannot create socket: %m");
|
2375 |
if (strlen(path) >= sizeof(sa.sun_path)) |
2376 |
die("Socket path too long");
|
2377 |
bzero(&sa, sizeof(sa));
|
2378 |
sa.sun_family = AF_UNIX; |
2379 |
strcpy(sa.sun_path, path); |
2380 |
if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0) |
2381 |
die("I found another BIRD running.");
|
2382 |
close(fd); |
2383 |
} |