Statistics
| Branch: | Tag: | Revision:

mininet / util / sch_htb-ofbuf / sch_htb.c @ 9c6620d8

History | View | Annotate | Download (42.9 KB)

1
#define OFBUF (1)
2
/*
3
 * net/sched/sch_htb.c        Hierarchical token bucket, feed tree version
4
 *
5
 *                This program is free software; you can redistribute it and/or
6
 *                modify it under the terms of the GNU General Public License
7
 *                as published by the Free Software Foundation; either version
8
 *                2 of the License, or (at your option) any later version.
9
 *
10
 * Authors:        Martin Devera, <devik@cdi.cz>
11
 *
12
 * Credits (in time order) for older HTB versions:
13
 *              Stef Coene <stef.coene@docum.org>
14
 *                        HTB support at LARTC mailing list
15
 *                Ondrej Kraus, <krauso@barr.cz>
16
 *                        found missing INIT_QDISC(htb)
17
 *                Vladimir Smelhaus, Aamer Akhter, Bert Hubert
18
 *                        helped a lot to locate nasty class stall bug
19
 *                Andi Kleen, Jamal Hadi, Bert Hubert
20
 *                        code review and helpful comments on shaping
21
 *                Tomasz Wrona, <tw@eter.tym.pl>
22
 *                        created test case so that I was able to fix nasty bug
23
 *                Wilfried Weissmann
24
 *                        spotted bug in dequeue code and helped with fix
25
 *                Jiri Fojtasek
26
 *                        fixed requeue routine
27
 *                and many others. thanks.
28
 */
29
#include <linux/module.h>
30
#include <linux/moduleparam.h>
31
#include <linux/types.h>
32
#include <linux/kernel.h>
33
#include <linux/string.h>
34
#include <linux/errno.h>
35
#include <linux/skbuff.h>
36
#include <linux/list.h>
37
#include <linux/compiler.h>
38
#include <linux/rbtree.h>
39
#include <linux/workqueue.h>
40
#include <linux/slab.h>
41
#include <net/netlink.h>
42
#include <net/pkt_sched.h>
43

    
44
/* HTB algorithm.
45
    Author: devik@cdi.cz
46
    ========================================================================
47
    HTB is like TBF with multiple classes. It is also similar to CBQ because
48
    it allows to assign priority to each class in hierarchy.
49
    In fact it is another implementation of Floyd's formal sharing.
50

51
    Levels:
52
    Each class is assigned level. Leaf has ALWAYS level 0 and root
53
    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54
    one less than their parent.
55
*/
56

    
57
static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
58
#define HTB_VER 0x30011                /* major must be matched with number suplied by TC as version */
59

    
60
#if HTB_VER >> 16 != TC_HTB_PROTOVER
61
#error "Mismatched sch_htb.c and pkt_sch.h"
62
#endif
63

    
64
/* Module parameter and sysfs export */
65
module_param    (htb_hysteresis, int, 0640);
66
MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67

    
68
/* used internaly to keep status of single class */
69
enum htb_cmode {
70
        HTB_CANT_SEND,                /* class can't send and can't borrow */
71
        HTB_MAY_BORROW,                /* class can't send but may borrow */
72
        HTB_CAN_SEND                /* class can send */
73
};
74

    
75
/* interior & leaf nodes; props specific to leaves are marked L: */
76
struct htb_class {
77
        struct Qdisc_class_common common;
78
        /* general class parameters */
79
        struct gnet_stats_basic_packed bstats;
80
        struct gnet_stats_queue qstats;
81
        struct gnet_stats_rate_est rate_est;
82
        struct tc_htb_xstats xstats;        /* our special stats */
83
        int refcnt;                /* usage count of this class */
84

    
85
        /* topology */
86
        int level;                /* our level (see above) */
87
        unsigned int children;
88
        struct htb_class *parent;        /* parent class */
89

    
90
        int prio;                /* these two are used only by leaves... */
91
        int quantum;                /* but stored for parent-to-leaf return */
92

    
93
        union {
94
                struct htb_class_leaf {
95
                        struct Qdisc *q;
96
                        int deficit[TC_HTB_MAXDEPTH];
97
                        struct list_head drop_list;
98
                } leaf;
99
                struct htb_class_inner {
100
                        struct rb_root feed[TC_HTB_NUMPRIO];        /* feed trees */
101
                        struct rb_node *ptr[TC_HTB_NUMPRIO];        /* current class ptr */
102
                        /* When class changes from state 1->2 and disconnects from
103
                         * parent's feed then we lost ptr value and start from the
104
                         * first child again. Here we store classid of the
105
                         * last valid ptr (used when ptr is NULL).
106
                         */
107
                        u32 last_ptr_id[TC_HTB_NUMPRIO];
108
                } inner;
109
        } un;
110
        struct rb_node node[TC_HTB_NUMPRIO];        /* node for self or feed tree */
111
        struct rb_node pq_node;        /* node for event queue */
112
        psched_time_t pq_key;
113

    
114
        int prio_activity;        /* for which prios are we active */
115
        enum htb_cmode cmode;        /* current mode of the class */
116

    
117
        /* class attached filters */
118
        struct tcf_proto *filter_list;
119
        int filter_cnt;
120

    
121
        /* token bucket parameters */
122
        struct qdisc_rate_table *rate;        /* rate table of the class itself */
123
        struct qdisc_rate_table *ceil;        /* ceiling rate (limits borrows too) */
124
        long buffer, cbuffer;        /* token bucket depth/rate */
125
        psched_tdiff_t mbuffer;        /* max wait time */
126
        long tokens, ctokens;        /* current number of tokens */
127
        psched_time_t t_c;        /* checkpoint time */
128
};
129

    
130
struct htb_sched {
131
        struct Qdisc_class_hash clhash;
132
        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
133

    
134
        /* self list - roots of self generating tree */
135
        struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
136
        int row_mask[TC_HTB_MAXDEPTH];
137
        struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
138
        u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
139

    
140
        /* self wait list - roots of wait PQs per row */
141
        struct rb_root wait_pq[TC_HTB_MAXDEPTH];
142

    
143
        /* time of nearest event per level (row) */
144
        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
145

    
146
        int defcls;                /* class where unclassified flows go to */
147

    
148
        /* filters for qdisc itself */
149
        struct tcf_proto *filter_list;
150

    
151
        int rate2quantum;        /* quant = rate / rate2quantum */
152
        psched_time_t now;        /* cached dequeue time */
153
        struct qdisc_watchdog watchdog;
154

    
155
        /* non shaped skbs; let them go directly thru */
156
        struct sk_buff_head direct_queue;
157
        int direct_qlen;        /* max qlen of above */
158

    
159
        long direct_pkts;
160

    
161
#if OFBUF
162
        /* overflow buffer */
163
        struct sk_buff_head ofbuf;
164
        int ofbuf_queued;        /* # packets queued in above */
165
#endif
166

    
167
#define HTB_WARN_TOOMANYEVENTS        0x1
168
        unsigned int warned;        /* only one warning */
169
        struct work_struct work;
170
};
171

    
172
/* find class in global hash table using given handle */
173
static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
174
{
175
        struct htb_sched *q = qdisc_priv(sch);
176
        struct Qdisc_class_common *clc;
177

    
178
        clc = qdisc_class_find(&q->clhash, handle);
179
        if (clc == NULL)
180
                return NULL;
181
        return container_of(clc, struct htb_class, common);
182
}
183

    
184
/**
185
 * htb_classify - classify a packet into class
186
 *
187
 * It returns NULL if the packet should be dropped or -1 if the packet
188
 * should be passed directly thru. In all other cases leaf class is returned.
189
 * We allow direct class selection by classid in priority. The we examine
190
 * filters in qdisc and in inner nodes (if higher filter points to the inner
191
 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
192
 * internal fifo (direct). These packets then go directly thru. If we still
193
 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
194
 * then finish and return direct queue.
195
 */
196
#define HTB_DIRECT ((struct htb_class *)-1L)
197

    
198
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
199
                                      int *qerr)
200
{
201
        struct htb_sched *q = qdisc_priv(sch);
202
        struct htb_class *cl;
203
        struct tcf_result res;
204
        struct tcf_proto *tcf;
205
        int result;
206

    
207
        /* allow to select class by setting skb->priority to valid classid;
208
         * note that nfmark can be used too by attaching filter fw with no
209
         * rules in it
210
         */
211
        if (skb->priority == sch->handle)
212
                return HTB_DIRECT;        /* X:0 (direct flow) selected */
213
        cl = htb_find(skb->priority, sch);
214
        if (cl && cl->level == 0)
215
                return cl;
216

    
217
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
218
        tcf = q->filter_list;
219
        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
220
#ifdef CONFIG_NET_CLS_ACT
221
                switch (result) {
222
                case TC_ACT_QUEUED:
223
                case TC_ACT_STOLEN:
224
                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
225
                case TC_ACT_SHOT:
226
                        return NULL;
227
                }
228
#endif
229
                cl = (void *)res.class;
230
                if (!cl) {
231
                        if (res.classid == sch->handle)
232
                                return HTB_DIRECT;        /* X:0 (direct flow) */
233
                        cl = htb_find(res.classid, sch);
234
                        if (!cl)
235
                                break;        /* filter selected invalid classid */
236
                }
237
                if (!cl->level)
238
                        return cl;        /* we hit leaf; return it */
239

    
240
                /* we have got inner class; apply inner filter chain */
241
                tcf = cl->filter_list;
242
        }
243
        /* classification failed; try to use default class */
244
        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
245
        if (!cl || cl->level)
246
                return HTB_DIRECT;        /* bad default .. this is safe bet */
247
        return cl;
248
}
249

    
250
/**
251
 * htb_add_to_id_tree - adds class to the round robin list
252
 *
253
 * Routine adds class to the list (actually tree) sorted by classid.
254
 * Make sure that class is not already on such list for given prio.
255
 */
256
static void htb_add_to_id_tree(struct rb_root *root,
257
                               struct htb_class *cl, int prio)
258
{
259
        struct rb_node **p = &root->rb_node, *parent = NULL;
260

    
261
        while (*p) {
262
                struct htb_class *c;
263
                parent = *p;
264
                c = rb_entry(parent, struct htb_class, node[prio]);
265

    
266
                if (cl->common.classid > c->common.classid)
267
                        p = &parent->rb_right;
268
                else
269
                        p = &parent->rb_left;
270
        }
271
        rb_link_node(&cl->node[prio], parent, p);
272
        rb_insert_color(&cl->node[prio], root);
273
}
274

    
275
/**
276
 * htb_add_to_wait_tree - adds class to the event queue with delay
277
 *
278
 * The class is added to priority event queue to indicate that class will
279
 * change its mode in cl->pq_key microseconds. Make sure that class is not
280
 * already in the queue.
281
 */
282
static void htb_add_to_wait_tree(struct htb_sched *q,
283
                                 struct htb_class *cl, long delay)
284
{
285
        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
286

    
287
        cl->pq_key = q->now + delay;
288
        if (cl->pq_key == q->now)
289
                cl->pq_key++;
290

    
291
        /* update the nearest event cache */
292
        if (q->near_ev_cache[cl->level] > cl->pq_key)
293
                q->near_ev_cache[cl->level] = cl->pq_key;
294

    
295
        while (*p) {
296
                struct htb_class *c;
297
                parent = *p;
298
                c = rb_entry(parent, struct htb_class, pq_node);
299
                if (cl->pq_key >= c->pq_key)
300
                        p = &parent->rb_right;
301
                else
302
                        p = &parent->rb_left;
303
        }
304
        rb_link_node(&cl->pq_node, parent, p);
305
        rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
306
}
307

    
308
/**
309
 * htb_next_rb_node - finds next node in binary tree
310
 *
311
 * When we are past last key we return NULL.
312
 * Average complexity is 2 steps per call.
313
 */
314
static inline void htb_next_rb_node(struct rb_node **n)
315
{
316
        *n = rb_next(*n);
317
}
318

    
319
/**
320
 * htb_add_class_to_row - add class to its row
321
 *
322
 * The class is added to row at priorities marked in mask.
323
 * It does nothing if mask == 0.
324
 */
325
static inline void htb_add_class_to_row(struct htb_sched *q,
326
                                        struct htb_class *cl, int mask)
327
{
328
        q->row_mask[cl->level] |= mask;
329
        while (mask) {
330
                int prio = ffz(~mask);
331
                mask &= ~(1 << prio);
332
                htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
333
        }
334
}
335

    
336
/* If this triggers, it is a bug in this code, but it need not be fatal */
337
static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
338
{
339
        if (RB_EMPTY_NODE(rb)) {
340
                WARN_ON(1);
341
        } else {
342
                rb_erase(rb, root);
343
                RB_CLEAR_NODE(rb);
344
        }
345
}
346

    
347

    
348
/**
349
 * htb_remove_class_from_row - removes class from its row
350
 *
351
 * The class is removed from row at priorities marked in mask.
352
 * It does nothing if mask == 0.
353
 */
354
static inline void htb_remove_class_from_row(struct htb_sched *q,
355
                                                 struct htb_class *cl, int mask)
356
{
357
        int m = 0;
358

    
359
        while (mask) {
360
                int prio = ffz(~mask);
361

    
362
                mask &= ~(1 << prio);
363
                if (q->ptr[cl->level][prio] == cl->node + prio)
364
                        htb_next_rb_node(q->ptr[cl->level] + prio);
365

    
366
                htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
367
                if (!q->row[cl->level][prio].rb_node)
368
                        m |= 1 << prio;
369
        }
370
        q->row_mask[cl->level] &= ~m;
371
}
372

    
373
/**
374
 * htb_activate_prios - creates active classe's feed chain
375
 *
376
 * The class is connected to ancestors and/or appropriate rows
377
 * for priorities it is participating on. cl->cmode must be new
378
 * (activated) mode. It does nothing if cl->prio_activity == 0.
379
 */
380
static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
381
{
382
        struct htb_class *p = cl->parent;
383
        long m, mask = cl->prio_activity;
384

    
385
        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
386
                m = mask;
387
                while (m) {
388
                        int prio = ffz(~m);
389
                        m &= ~(1 << prio);
390

    
391
                        if (p->un.inner.feed[prio].rb_node)
392
                                /* parent already has its feed in use so that
393
                                 * reset bit in mask as parent is already ok
394
                                 */
395
                                mask &= ~(1 << prio);
396

    
397
                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
398
                }
399
                p->prio_activity |= mask;
400
                cl = p;
401
                p = cl->parent;
402

    
403
        }
404
        if (cl->cmode == HTB_CAN_SEND && mask)
405
                htb_add_class_to_row(q, cl, mask);
406
}
407

    
408
/**
409
 * htb_deactivate_prios - remove class from feed chain
410
 *
411
 * cl->cmode must represent old mode (before deactivation). It does
412
 * nothing if cl->prio_activity == 0. Class is removed from all feed
413
 * chains and rows.
414
 */
415
static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
416
{
417
        struct htb_class *p = cl->parent;
418
        long m, mask = cl->prio_activity;
419

    
420
        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
421
                m = mask;
422
                mask = 0;
423
                while (m) {
424
                        int prio = ffz(~m);
425
                        m &= ~(1 << prio);
426

    
427
                        if (p->un.inner.ptr[prio] == cl->node + prio) {
428
                                /* we are removing child which is pointed to from
429
                                 * parent feed - forget the pointer but remember
430
                                 * classid
431
                                 */
432
                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
433
                                p->un.inner.ptr[prio] = NULL;
434
                        }
435

    
436
                        htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
437

    
438
                        if (!p->un.inner.feed[prio].rb_node)
439
                                mask |= 1 << prio;
440
                }
441

    
442
                p->prio_activity &= ~mask;
443
                cl = p;
444
                p = cl->parent;
445

    
446
        }
447
        if (cl->cmode == HTB_CAN_SEND && mask)
448
                htb_remove_class_from_row(q, cl, mask);
449
}
450

    
451
static inline long htb_lowater(const struct htb_class *cl)
452
{
453
        if (htb_hysteresis)
454
                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
455
        else
456
                return 0;
457
}
458
static inline long htb_hiwater(const struct htb_class *cl)
459
{
460
        if (htb_hysteresis)
461
                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
462
        else
463
                return 0;
464
}
465

    
466

    
467
/**
468
 * htb_class_mode - computes and returns current class mode
469
 *
470
 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
471
 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
472
 * from now to time when cl will change its state.
473
 * Also it is worth to note that class mode doesn't change simply
474
 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
475
 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
476
 * mode transitions per time unit. The speed gain is about 1/6.
477
 */
478
static inline enum htb_cmode
479
htb_class_mode(struct htb_class *cl, long *diff)
480
{
481
        long toks;
482

    
483
        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
484
                *diff = -toks;
485
                return HTB_CANT_SEND;
486
        }
487

    
488
        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
489
                return HTB_CAN_SEND;
490

    
491
        *diff = -toks;
492
        return HTB_MAY_BORROW;
493
}
494

    
495
/**
496
 * htb_change_class_mode - changes classe's mode
497
 *
498
 * This should be the only way how to change classe's mode under normal
499
 * cirsumstances. Routine will update feed lists linkage, change mode
500
 * and add class to the wait event queue if appropriate. New mode should
501
 * be different from old one and cl->pq_key has to be valid if changing
502
 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
503
 */
504
static void
505
htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
506
{
507
        enum htb_cmode new_mode = htb_class_mode(cl, diff);
508

    
509
        if (new_mode == cl->cmode)
510
                return;
511

    
512
        if (cl->prio_activity) {        /* not necessary: speed optimization */
513
                if (cl->cmode != HTB_CANT_SEND)
514
                        htb_deactivate_prios(q, cl);
515
                cl->cmode = new_mode;
516
                if (new_mode != HTB_CANT_SEND)
517
                        htb_activate_prios(q, cl);
518
        } else
519
                cl->cmode = new_mode;
520
}
521

    
522
/**
523
 * htb_activate - inserts leaf cl into appropriate active feeds
524
 *
525
 * Routine learns (new) priority of leaf and activates feed chain
526
 * for the prio. It can be called on already active leaf safely.
527
 * It also adds leaf into droplist.
528
 */
529
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
530
{
531
        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
532

    
533
        if (!cl->prio_activity) {
534
                cl->prio_activity = 1 << cl->prio;
535
                htb_activate_prios(q, cl);
536
                list_add_tail(&cl->un.leaf.drop_list,
537
                              q->drops + cl->prio);
538
        }
539
}
540

    
541
/**
542
 * htb_deactivate - remove leaf cl from active feeds
543
 *
544
 * Make sure that leaf is active. In the other words it can't be called
545
 * with non-active leaf. It also removes class from the drop list.
546
 */
547
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
548
{
549
        WARN_ON(!cl->prio_activity);
550

    
551
        htb_deactivate_prios(q, cl);
552
        cl->prio_activity = 0;
553
        list_del_init(&cl->un.leaf.drop_list);
554
}
555

    
556
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
557
{
558
        int uninitialized_var(ret);
559
        struct htb_sched *q = qdisc_priv(sch);
560
        struct htb_class *cl = htb_classify(skb, sch, &ret);
561

    
562
#if OFBUF
563
    if(cl != HTB_DIRECT && cl)
564
        skb_get(skb);
565
#endif
566

    
567
        if (cl == HTB_DIRECT) {
568
                /* enqueue to helper queue */
569
                if (q->direct_queue.qlen < q->direct_qlen) {
570
                        __skb_queue_tail(&q->direct_queue, skb);
571
                        q->direct_pkts++;
572
                } else {
573
                        kfree_skb(skb);
574
                        sch->qstats.drops++;
575
                        return NET_XMIT_DROP;
576
                }
577
#ifdef CONFIG_NET_CLS_ACT
578
        } else if (!cl) {
579
                if (ret & __NET_XMIT_BYPASS)
580
                        sch->qstats.drops++;
581
                kfree_skb(skb);
582
                return ret;
583
#endif
584
        } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
585
        /* We shouldn't drop this, but enqueue it into ofbuf */
586
        // TODO: is skb actually valid?
587
        // Ans: looks like qdisc_enqueue will end up freeing the packet
588
        // if enqueue failed.  So we should incr refcnt before calling qdisc_enqueue...
589
#if OFBUF
590
        __skb_queue_tail(&q->ofbuf, skb);
591
        q->ofbuf_queued++;
592
#else 
593
                if (net_xmit_drop_count(ret)) {
594
                        sch->qstats.drops++;
595
                        cl->qstats.drops++;
596
                }
597
                return ret;
598
#endif
599
        } else {
600
                bstats_update(&cl->bstats, skb);
601
                htb_activate(q, cl);
602
#if OFBUF
603
        kfree_skb(skb);
604
#endif
605
        }
606

    
607
        sch->q.qlen++;
608
        return NET_XMIT_SUCCESS;
609
}
610

    
611
static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
612
{
613
        long toks = diff + cl->tokens;
614

    
615
        if (toks > cl->buffer)
616
                toks = cl->buffer;
617
        toks -= (long) qdisc_l2t(cl->rate, bytes);
618
        if (toks <= -cl->mbuffer)
619
                toks = 1 - cl->mbuffer;
620

    
621
        cl->tokens = toks;
622
}
623

    
624
static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
625
{
626
        long toks = diff + cl->ctokens;
627

    
628
        if (toks > cl->cbuffer)
629
                toks = cl->cbuffer;
630
        toks -= (long) qdisc_l2t(cl->ceil, bytes);
631
        if (toks <= -cl->mbuffer)
632
                toks = 1 - cl->mbuffer;
633

    
634
        cl->ctokens = toks;
635
}
636

    
637
/**
638
 * htb_charge_class - charges amount "bytes" to leaf and ancestors
639
 *
640
 * Routine assumes that packet "bytes" long was dequeued from leaf cl
641
 * borrowing from "level". It accounts bytes to ceil leaky bucket for
642
 * leaf and all ancestors and to rate bucket for ancestors at levels
643
 * "level" and higher. It also handles possible change of mode resulting
644
 * from the update. Note that mode can also increase here (MAY_BORROW to
645
 * CAN_SEND) because we can use more precise clock that event queue here.
646
 * In such case we remove class from event queue first.
647
 */
648
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
649
                             int level, struct sk_buff *skb)
650
{
651
        int bytes = qdisc_pkt_len(skb);
652
        enum htb_cmode old_mode;
653
        long diff;
654

    
655
        while (cl) {
656
                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
657
                if (cl->level >= level) {
658
                        if (cl->level == level)
659
                                cl->xstats.lends++;
660
                        htb_accnt_tokens(cl, bytes, diff);
661
                } else {
662
                        cl->xstats.borrows++;
663
                        cl->tokens += diff;        /* we moved t_c; update tokens */
664
                }
665
                htb_accnt_ctokens(cl, bytes, diff);
666
                cl->t_c = q->now;
667

    
668
                old_mode = cl->cmode;
669
                diff = 0;
670
                htb_change_class_mode(q, cl, &diff);
671
                if (old_mode != cl->cmode) {
672
                        if (old_mode != HTB_CAN_SEND)
673
                                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
674
                        if (cl->cmode != HTB_CAN_SEND)
675
                                htb_add_to_wait_tree(q, cl, diff);
676
                }
677

    
678
                /* update basic stats except for leaves which are already updated */
679
                if (cl->level)
680
                        bstats_update(&cl->bstats, skb);
681

    
682
                cl = cl->parent;
683
        }
684
}
685

    
686
/**
687
 * htb_do_events - make mode changes to classes at the level
688
 *
689
 * Scans event queue for pending events and applies them. Returns time of
690
 * next pending event (0 for no event in pq, q->now for too many events).
691
 * Note: Applied are events whose have cl->pq_key <= q->now.
692
 */
693
static psched_time_t htb_do_events(struct htb_sched *q, int level,
694
                                   unsigned long start)
695
{
696
        /* don't run for longer than 2 jiffies; 2 is used instead of
697
         * 1 to simplify things when jiffy is going to be incremented
698
         * too soon
699
         */
700
        unsigned long stop_at = start + 2;
701
        while (time_before(jiffies, stop_at)) {
702
                struct htb_class *cl;
703
                long diff;
704
                struct rb_node *p = rb_first(&q->wait_pq[level]);
705

    
706
                if (!p)
707
                        return 0;
708

    
709
                cl = rb_entry(p, struct htb_class, pq_node);
710
                if (cl->pq_key > q->now)
711
                        return cl->pq_key;
712

    
713
                htb_safe_rb_erase(p, q->wait_pq + level);
714
                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
715
                htb_change_class_mode(q, cl, &diff);
716
                if (cl->cmode != HTB_CAN_SEND)
717
                        htb_add_to_wait_tree(q, cl, diff);
718
        }
719

    
720
        /* too much load - let's continue after a break for scheduling */
721
        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
722
                pr_warning("htb: too many events!\n");
723
                q->warned |= HTB_WARN_TOOMANYEVENTS;
724
        }
725

    
726
        return q->now;
727
}
728

    
729
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
730
 * is no such one exists.
731
 */
732
static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
733
                                              u32 id)
734
{
735
        struct rb_node *r = NULL;
736
        while (n) {
737
                struct htb_class *cl =
738
                    rb_entry(n, struct htb_class, node[prio]);
739

    
740
                if (id > cl->common.classid) {
741
                        n = n->rb_right;
742
                } else if (id < cl->common.classid) {
743
                        r = n;
744
                        n = n->rb_left;
745
                } else {
746
                        return n;
747
                }
748
        }
749
        return r;
750
}
751

    
752
/**
753
 * htb_lookup_leaf - returns next leaf class in DRR order
754
 *
755
 * Find leaf where current feed pointers points to.
756
 */
757
static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
758
                                         struct rb_node **pptr, u32 * pid)
759
{
760
        int i;
761
        struct {
762
                struct rb_node *root;
763
                struct rb_node **pptr;
764
                u32 *pid;
765
        } stk[TC_HTB_MAXDEPTH], *sp = stk;
766

    
767
        BUG_ON(!tree->rb_node);
768
        sp->root = tree->rb_node;
769
        sp->pptr = pptr;
770
        sp->pid = pid;
771

    
772
        for (i = 0; i < 65535; i++) {
773
                if (!*sp->pptr && *sp->pid) {
774
                        /* ptr was invalidated but id is valid - try to recover
775
                         * the original or next ptr
776
                         */
777
                        *sp->pptr =
778
                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
779
                }
780
                *sp->pid = 0;        /* ptr is valid now so that remove this hint as it
781
                                 * can become out of date quickly
782
                                 */
783
                if (!*sp->pptr) {        /* we are at right end; rewind & go up */
784
                        *sp->pptr = sp->root;
785
                        while ((*sp->pptr)->rb_left)
786
                                *sp->pptr = (*sp->pptr)->rb_left;
787
                        if (sp > stk) {
788
                                sp--;
789
                                if (!*sp->pptr) {
790
                                        WARN_ON(1);
791
                                        return NULL;
792
                                }
793
                                htb_next_rb_node(sp->pptr);
794
                        }
795
                } else {
796
                        struct htb_class *cl;
797
                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
798
                        if (!cl->level)
799
                                return cl;
800
                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
801
                        sp->pptr = cl->un.inner.ptr + prio;
802
                        sp->pid = cl->un.inner.last_ptr_id + prio;
803
                }
804
        }
805
        WARN_ON(1);
806
        return NULL;
807
}
808

    
809
/* dequeues packet at given priority and level; call only if
810
 * you are sure that there is active class at prio/level
811
 */
812
static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
813
                                        int level)
814
{
815
        struct sk_buff *skb = NULL;
816
        struct htb_class *cl, *start;
817
        /* look initial class up in the row */
818
        start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
819
                                     q->ptr[level] + prio,
820
                                     q->last_ptr_id[level] + prio);
821

    
822
        do {
823
next:
824
                if (unlikely(!cl))
825
                        return NULL;
826

    
827
                /* class can be empty - it is unlikely but can be true if leaf
828
                 * qdisc drops packets in enqueue routine or if someone used
829
                 * graft operation on the leaf since last dequeue;
830
                 * simply deactivate and skip such class
831
                 */
832
                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
833
                        struct htb_class *next;
834
                        htb_deactivate(q, cl);
835

    
836
                        /* row/level might become empty */
837
                        if ((q->row_mask[level] & (1 << prio)) == 0)
838
                                return NULL;
839

    
840
                        next = htb_lookup_leaf(q->row[level] + prio,
841
                                               prio, q->ptr[level] + prio,
842
                                               q->last_ptr_id[level] + prio);
843

    
844
                        if (cl == start)        /* fix start if we just deleted it */
845
                                start = next;
846
                        cl = next;
847
                        goto next;
848
                }
849

    
850
                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
851
                if (likely(skb != NULL))
852
                        break;
853

    
854
                qdisc_warn_nonwc("htb", cl->un.leaf.q);
855
                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
856
                                  ptr[0]) + prio);
857
                cl = htb_lookup_leaf(q->row[level] + prio, prio,
858
                                     q->ptr[level] + prio,
859
                                     q->last_ptr_id[level] + prio);
860

    
861
        } while (cl != start);
862

    
863
        if (likely(skb != NULL)) {
864
                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
865
                if (cl->un.leaf.deficit[level] < 0) {
866
                        cl->un.leaf.deficit[level] += cl->quantum;
867
                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
868
                                          ptr[0]) + prio);
869
                }
870
                /* this used to be after charge_class but this constelation
871
                 * gives us slightly better performance
872
                 */
873
                if (!cl->un.leaf.q->q.qlen)
874
                        htb_deactivate(q, cl);
875
                htb_charge_class(q, cl, level, skb);
876
        }
877
        return skb;
878
}
879

    
880
static struct sk_buff *htb_dequeue(struct Qdisc *sch)
881
{
882
        struct sk_buff *skb;
883
        struct htb_sched *q = qdisc_priv(sch);
884
        int level;
885
        psched_time_t next_event;
886
        unsigned long start_at;
887
    u32 r, i;
888
    struct sk_buff *pkt;
889

    
890
        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
891
        skb = __skb_dequeue(&q->direct_queue);
892
        if (skb != NULL) {
893
ok:
894
                qdisc_bstats_update(sch, skb);
895
                qdisc_unthrottled(sch);
896
                sch->q.qlen--;
897
#if OFBUF
898
        if(q->ofbuf_queued > 0) {
899
            i = 0;
900
            r = net_random() % q->ofbuf_queued;
901
            // enqueue the rth packet and drop the rest
902
            while((pkt = __skb_dequeue(&q->ofbuf)) != NULL) {
903
                if(i == r) {
904
                    // the chosen one
905
                    htb_enqueue(pkt, sch);
906
                } else {
907
                    kfree_skb(pkt);
908
                }
909
                i++;
910
            }
911
            q->ofbuf_queued = 0;
912
        }
913
#endif
914
                return skb;
915
        }
916

    
917
        if (!sch->q.qlen)
918
                goto fin;
919
        q->now = psched_get_time();
920
        start_at = jiffies;
921

    
922
        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
923

    
924
        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
925
                /* common case optimization - skip event handler quickly */
926
                int m;
927
                psched_time_t event;
928

    
929
                if (q->now >= q->near_ev_cache[level]) {
930
                        event = htb_do_events(q, level, start_at);
931
                        if (!event)
932
                                event = q->now + PSCHED_TICKS_PER_SEC;
933
                        q->near_ev_cache[level] = event;
934
                } else
935
                        event = q->near_ev_cache[level];
936

    
937
                if (next_event > event)
938
                        next_event = event;
939

    
940
                m = ~q->row_mask[level];
941
                while (m != (int)(-1)) {
942
                        int prio = ffz(m);
943

    
944
                        m |= 1 << prio;
945
                        skb = htb_dequeue_tree(q, prio, level);
946
                        if (likely(skb != NULL))
947
                                goto ok;
948
                }
949
        }
950
        sch->qstats.overlimits++;
951
        if (likely(next_event > q->now))
952
                qdisc_watchdog_schedule(&q->watchdog, next_event);
953
        else
954
                schedule_work(&q->work);
955
fin:
956
        return skb;
957
}
958

    
959
/* try to drop from each class (by prio) until one succeed */
960
static unsigned int htb_drop(struct Qdisc *sch)
961
{
962
        struct htb_sched *q = qdisc_priv(sch);
963
        int prio;
964

    
965
        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
966
                struct list_head *p;
967
                list_for_each(p, q->drops + prio) {
968
                        struct htb_class *cl = list_entry(p, struct htb_class,
969
                                                          un.leaf.drop_list);
970
                        unsigned int len;
971
                        if (cl->un.leaf.q->ops->drop &&
972
                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
973
                                sch->q.qlen--;
974
                                if (!cl->un.leaf.q->q.qlen)
975
                                        htb_deactivate(q, cl);
976
                                return len;
977
                        }
978
                }
979
        }
980
        return 0;
981
}
982

    
983
/* reset all classes */
984
/* always caled under BH & queue lock */
985
static void htb_reset(struct Qdisc *sch)
986
{
987
        struct htb_sched *q = qdisc_priv(sch);
988
        struct htb_class *cl;
989
        struct hlist_node *n;
990
        unsigned int i;
991

    
992
        for (i = 0; i < q->clhash.hashsize; i++) {
993
                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
994
                        if (cl->level)
995
                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
996
                        else {
997
                                if (cl->un.leaf.q)
998
                                        qdisc_reset(cl->un.leaf.q);
999
                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1000
                        }
1001
                        cl->prio_activity = 0;
1002
                        cl->cmode = HTB_CAN_SEND;
1003

    
1004
                }
1005
        }
1006
        qdisc_watchdog_cancel(&q->watchdog);
1007
        __skb_queue_purge(&q->direct_queue);
1008
        sch->q.qlen = 0;
1009
#if OFBUF
1010
    __skb_queue_purge(&q->ofbuf);
1011
    q->ofbuf_queued = 0;
1012
#endif
1013
        memset(q->row, 0, sizeof(q->row));
1014
        memset(q->row_mask, 0, sizeof(q->row_mask));
1015
        memset(q->wait_pq, 0, sizeof(q->wait_pq));
1016
        memset(q->ptr, 0, sizeof(q->ptr));
1017
        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1018
                INIT_LIST_HEAD(q->drops + i);
1019
}
1020

    
1021
static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1022
        [TCA_HTB_PARMS]        = { .len = sizeof(struct tc_htb_opt) },
1023
        [TCA_HTB_INIT]        = { .len = sizeof(struct tc_htb_glob) },
1024
        [TCA_HTB_CTAB]        = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1025
        [TCA_HTB_RTAB]        = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026
};
1027

    
1028
static void htb_work_func(struct work_struct *work)
1029
{
1030
        struct htb_sched *q = container_of(work, struct htb_sched, work);
1031
        struct Qdisc *sch = q->watchdog.qdisc;
1032

    
1033
        __netif_schedule(qdisc_root(sch));
1034
}
1035

    
1036
static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1037
{
1038
        struct htb_sched *q = qdisc_priv(sch);
1039
        struct nlattr *tb[TCA_HTB_INIT + 1];
1040
        struct tc_htb_glob *gopt;
1041
        int err;
1042
        int i;
1043

    
1044
        if (!opt)
1045
                return -EINVAL;
1046

    
1047
        err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1048
        if (err < 0)
1049
                return err;
1050

    
1051
        if (tb[TCA_HTB_INIT] == NULL) {
1052
                pr_err("HTB: hey probably you have bad tc tool ?\n");
1053
                return -EINVAL;
1054
        }
1055
        gopt = nla_data(tb[TCA_HTB_INIT]);
1056
        if (gopt->version != HTB_VER >> 16) {
1057
                pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1058
                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1059
                return -EINVAL;
1060
        }
1061

    
1062
        err = qdisc_class_hash_init(&q->clhash);
1063
        if (err < 0)
1064
                return err;
1065
        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1066
                INIT_LIST_HEAD(q->drops + i);
1067

    
1068
        qdisc_watchdog_init(&q->watchdog, sch);
1069
        INIT_WORK(&q->work, htb_work_func);
1070
        skb_queue_head_init(&q->direct_queue);
1071

    
1072
#if OFBUF
1073
    skb_queue_head_init(&q->ofbuf);
1074
    q->ofbuf_queued = 0;
1075
#endif
1076

    
1077
        q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1078

    
1079
        if (q->direct_qlen < 2)        /* some devices have zero tx_queue_len */
1080
                q->direct_qlen = 2;
1081

    
1082
        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1083
                q->rate2quantum = 1;
1084
        q->defcls = gopt->defcls;
1085

    
1086
        return 0;
1087
}
1088

    
1089
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1090
{
1091
        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1092
        struct htb_sched *q = qdisc_priv(sch);
1093
        struct nlattr *nest;
1094
        struct tc_htb_glob gopt;
1095

    
1096
        spin_lock_bh(root_lock);
1097

    
1098
        gopt.direct_pkts = q->direct_pkts;
1099
        gopt.version = HTB_VER;
1100
        gopt.rate2quantum = q->rate2quantum;
1101
        gopt.defcls = q->defcls;
1102
        gopt.debug = 0;
1103

    
1104
        nest = nla_nest_start(skb, TCA_OPTIONS);
1105
        if (nest == NULL)
1106
                goto nla_put_failure;
1107
        NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1108
        nla_nest_end(skb, nest);
1109

    
1110
        spin_unlock_bh(root_lock);
1111
        return skb->len;
1112

    
1113
nla_put_failure:
1114
        spin_unlock_bh(root_lock);
1115
        nla_nest_cancel(skb, nest);
1116
        return -1;
1117
}
1118

    
1119
static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1120
                          struct sk_buff *skb, struct tcmsg *tcm)
1121
{
1122
        struct htb_class *cl = (struct htb_class *)arg;
1123
        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1124
        struct nlattr *nest;
1125
        struct tc_htb_opt opt;
1126

    
1127
        spin_lock_bh(root_lock);
1128
        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1129
        tcm->tcm_handle = cl->common.classid;
1130
        if (!cl->level && cl->un.leaf.q)
1131
                tcm->tcm_info = cl->un.leaf.q->handle;
1132

    
1133
        nest = nla_nest_start(skb, TCA_OPTIONS);
1134
        if (nest == NULL)
1135
                goto nla_put_failure;
1136

    
1137
        memset(&opt, 0, sizeof(opt));
1138

    
1139
        opt.rate = cl->rate->rate;
1140
        opt.buffer = cl->buffer;
1141
        opt.ceil = cl->ceil->rate;
1142
        opt.cbuffer = cl->cbuffer;
1143
        opt.quantum = cl->quantum;
1144
        opt.prio = cl->prio;
1145
        opt.level = cl->level;
1146
        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1147

    
1148
        nla_nest_end(skb, nest);
1149
        spin_unlock_bh(root_lock);
1150
        return skb->len;
1151

    
1152
nla_put_failure:
1153
        spin_unlock_bh(root_lock);
1154
        nla_nest_cancel(skb, nest);
1155
        return -1;
1156
}
1157

    
1158
static int
1159
htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1160
{
1161
        struct htb_class *cl = (struct htb_class *)arg;
1162

    
1163
        if (!cl->level && cl->un.leaf.q)
1164
                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1165
        cl->xstats.tokens = cl->tokens;
1166
        cl->xstats.ctokens = cl->ctokens;
1167

    
1168
        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1169
            gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1170
            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1171
                return -1;
1172

    
1173
        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1174
}
1175

    
1176
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1177
                     struct Qdisc **old)
1178
{
1179
        struct htb_class *cl = (struct htb_class *)arg;
1180

    
1181
        if (cl->level)
1182
                return -EINVAL;
1183
        if (new == NULL &&
1184
            (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1185
                                     cl->common.classid)) == NULL)
1186
                return -ENOBUFS;
1187

    
1188
        sch_tree_lock(sch);
1189
        *old = cl->un.leaf.q;
1190
        cl->un.leaf.q = new;
1191
        if (*old != NULL) {
1192
                qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1193
                qdisc_reset(*old);
1194
        }
1195
        sch_tree_unlock(sch);
1196
        return 0;
1197
}
1198

    
1199
static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1200
{
1201
        struct htb_class *cl = (struct htb_class *)arg;
1202
        return !cl->level ? cl->un.leaf.q : NULL;
1203
}
1204

    
1205
static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1206
{
1207
        struct htb_class *cl = (struct htb_class *)arg;
1208

    
1209
        if (cl->un.leaf.q->q.qlen == 0)
1210
                htb_deactivate(qdisc_priv(sch), cl);
1211
}
1212

    
1213
static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1214
{
1215
        struct htb_class *cl = htb_find(classid, sch);
1216
        if (cl)
1217
                cl->refcnt++;
1218
        return (unsigned long)cl;
1219
}
1220

    
1221
static inline int htb_parent_last_child(struct htb_class *cl)
1222
{
1223
        if (!cl->parent)
1224
                /* the root class */
1225
                return 0;
1226
        if (cl->parent->children > 1)
1227
                /* not the last child */
1228
                return 0;
1229
        return 1;
1230
}
1231

    
1232
static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1233
                               struct Qdisc *new_q)
1234
{
1235
        struct htb_class *parent = cl->parent;
1236

    
1237
        WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1238

    
1239
        if (parent->cmode != HTB_CAN_SEND)
1240
                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1241

    
1242
        parent->level = 0;
1243
        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1244
        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1245
        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1246
        parent->tokens = parent->buffer;
1247
        parent->ctokens = parent->cbuffer;
1248
        parent->t_c = psched_get_time();
1249
        parent->cmode = HTB_CAN_SEND;
1250
}
1251

    
1252
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1253
{
1254
        if (!cl->level) {
1255
                WARN_ON(!cl->un.leaf.q);
1256
                qdisc_destroy(cl->un.leaf.q);
1257
        }
1258
        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1259
        qdisc_put_rtab(cl->rate);
1260
        qdisc_put_rtab(cl->ceil);
1261

    
1262
        tcf_destroy_chain(&cl->filter_list);
1263
        kfree(cl);
1264
}
1265

    
1266
static void htb_destroy(struct Qdisc *sch)
1267
{
1268
        struct htb_sched *q = qdisc_priv(sch);
1269
        struct hlist_node *n, *next;
1270
        struct htb_class *cl;
1271
        unsigned int i;
1272

    
1273
        cancel_work_sync(&q->work);
1274
        qdisc_watchdog_cancel(&q->watchdog);
1275
        /* This line used to be after htb_destroy_class call below
1276
         * and surprisingly it worked in 2.4. But it must precede it
1277
         * because filter need its target class alive to be able to call
1278
         * unbind_filter on it (without Oops).
1279
         */
1280
        tcf_destroy_chain(&q->filter_list);
1281

    
1282
        for (i = 0; i < q->clhash.hashsize; i++) {
1283
                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1284
                        tcf_destroy_chain(&cl->filter_list);
1285
        }
1286
        for (i = 0; i < q->clhash.hashsize; i++) {
1287
                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1288
                                          common.hnode)
1289
                        htb_destroy_class(sch, cl);
1290
        }
1291
        qdisc_class_hash_destroy(&q->clhash);
1292
        __skb_queue_purge(&q->direct_queue);
1293
#if OFBUF
1294
    __skb_queue_purge(&q->ofbuf);
1295
    q->ofbuf_queued = 0;
1296
#endif
1297
}
1298

    
1299
static int htb_delete(struct Qdisc *sch, unsigned long arg)
1300
{
1301
        struct htb_sched *q = qdisc_priv(sch);
1302
        struct htb_class *cl = (struct htb_class *)arg;
1303
        unsigned int qlen;
1304
        struct Qdisc *new_q = NULL;
1305
        int last_child = 0;
1306

    
1307
        // TODO: why don't allow to delete subtree ? references ? does
1308
        // tc subsys quarantee us that in htb_destroy it holds no class
1309
        // refs so that we can remove children safely there ?
1310
        if (cl->children || cl->filter_cnt)
1311
                return -EBUSY;
1312

    
1313
        if (!cl->level && htb_parent_last_child(cl)) {
1314
                new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1315
                                          cl->parent->common.classid);
1316
                last_child = 1;
1317
        }
1318

    
1319
        sch_tree_lock(sch);
1320

    
1321
        if (!cl->level) {
1322
                qlen = cl->un.leaf.q->q.qlen;
1323
                qdisc_reset(cl->un.leaf.q);
1324
                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1325
        }
1326

    
1327
        /* delete from hash and active; remainder in destroy_class */
1328
        qdisc_class_hash_remove(&q->clhash, &cl->common);
1329
        if (cl->parent)
1330
                cl->parent->children--;
1331

    
1332
        if (cl->prio_activity)
1333
                htb_deactivate(q, cl);
1334

    
1335
        if (cl->cmode != HTB_CAN_SEND)
1336
                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1337

    
1338
        if (last_child)
1339
                htb_parent_to_leaf(q, cl, new_q);
1340

    
1341
        BUG_ON(--cl->refcnt == 0);
1342
        /*
1343
         * This shouldn't happen: we "hold" one cops->get() when called
1344
         * from tc_ctl_tclass; the destroy method is done from cops->put().
1345
         */
1346

    
1347
        sch_tree_unlock(sch);
1348
        return 0;
1349
}
1350

    
1351
static void htb_put(struct Qdisc *sch, unsigned long arg)
1352
{
1353
        struct htb_class *cl = (struct htb_class *)arg;
1354

    
1355
        if (--cl->refcnt == 0)
1356
                htb_destroy_class(sch, cl);
1357
}
1358

    
1359
static int htb_change_class(struct Qdisc *sch, u32 classid,
1360
                            u32 parentid, struct nlattr **tca,
1361
                            unsigned long *arg)
1362
{
1363
        int err = -EINVAL;
1364
        struct htb_sched *q = qdisc_priv(sch);
1365
        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1366
        struct nlattr *opt = tca[TCA_OPTIONS];
1367
        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1368
        struct nlattr *tb[__TCA_HTB_MAX];
1369
        struct tc_htb_opt *hopt;
1370

    
1371
        /* extract all subattrs from opt attr */
1372
        if (!opt)
1373
                goto failure;
1374

    
1375
        err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1376
        if (err < 0)
1377
                goto failure;
1378

    
1379
        err = -EINVAL;
1380
        if (tb[TCA_HTB_PARMS] == NULL)
1381
                goto failure;
1382

    
1383
        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1384

    
1385
        hopt = nla_data(tb[TCA_HTB_PARMS]);
1386

    
1387
        rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1388
        ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1389
        if (!rtab || !ctab)
1390
                goto failure;
1391

    
1392
        if (!cl) {                /* new class */
1393
                struct Qdisc *new_q;
1394
                int prio;
1395
                struct {
1396
                        struct nlattr                nla;
1397
                        struct gnet_estimator        opt;
1398
                } est = {
1399
                        .nla = {
1400
                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1401
                                .nla_type        = TCA_RATE,
1402
                        },
1403
                        .opt = {
1404
                                /* 4s interval, 16s averaging constant */
1405
                                .interval        = 2,
1406
                                .ewma_log        = 2,
1407
                        },
1408
                };
1409

    
1410
                /* check for valid classid */
1411
                if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1412
                    htb_find(classid, sch))
1413
                        goto failure;
1414

    
1415
                /* check maximal depth */
1416
                if (parent && parent->parent && parent->parent->level < 2) {
1417
                        pr_err("htb: tree is too deep\n");
1418
                        goto failure;
1419
                }
1420
                err = -ENOBUFS;
1421
                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1422
                if (!cl)
1423
                        goto failure;
1424

    
1425
                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1426
                                        qdisc_root_sleeping_lock(sch),
1427
                                        tca[TCA_RATE] ? : &est.nla);
1428
                if (err) {
1429
                        kfree(cl);
1430
                        goto failure;
1431
                }
1432

    
1433
                cl->refcnt = 1;
1434
                cl->children = 0;
1435
                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1436
                RB_CLEAR_NODE(&cl->pq_node);
1437

    
1438
                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1439
                        RB_CLEAR_NODE(&cl->node[prio]);
1440

    
1441
                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1442
                 * so that can't be used inside of sch_tree_lock
1443
                 * -- thanks to Karlis Peisenieks
1444
                 */
1445
                new_q = qdisc_create_dflt(sch->dev_queue,
1446
                                          &pfifo_qdisc_ops, classid);
1447
                sch_tree_lock(sch);
1448
                if (parent && !parent->level) {
1449
                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1450

    
1451
                        /* turn parent into inner node */
1452
                        qdisc_reset(parent->un.leaf.q);
1453
                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1454
                        qdisc_destroy(parent->un.leaf.q);
1455
                        if (parent->prio_activity)
1456
                                htb_deactivate(q, parent);
1457

    
1458
                        /* remove from evt list because of level change */
1459
                        if (parent->cmode != HTB_CAN_SEND) {
1460
                                htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1461
                                parent->cmode = HTB_CAN_SEND;
1462
                        }
1463
                        parent->level = (parent->parent ? parent->parent->level
1464
                                         : TC_HTB_MAXDEPTH) - 1;
1465
                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1466
                }
1467
                /* leaf (we) needs elementary qdisc */
1468
                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1469

    
1470
                cl->common.classid = classid;
1471
                cl->parent = parent;
1472

    
1473
                /* set class to be in HTB_CAN_SEND state */
1474
                cl->tokens = hopt->buffer;
1475
                cl->ctokens = hopt->cbuffer;
1476
                cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;        /* 1min */
1477
                cl->t_c = psched_get_time();
1478
                cl->cmode = HTB_CAN_SEND;
1479

    
1480
                /* attach to the hash list and parent's family */
1481
                qdisc_class_hash_insert(&q->clhash, &cl->common);
1482
                if (parent)
1483
                        parent->children++;
1484
        } else {
1485
                if (tca[TCA_RATE]) {
1486
                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1487
                                                    qdisc_root_sleeping_lock(sch),
1488
                                                    tca[TCA_RATE]);
1489
                        if (err)
1490
                                return err;
1491
                }
1492
                sch_tree_lock(sch);
1493
        }
1494

    
1495
        /* it used to be a nasty bug here, we have to check that node
1496
         * is really leaf before changing cl->un.leaf !
1497
         */
1498
        if (!cl->level) {
1499
                cl->quantum = rtab->rate.rate / q->rate2quantum;
1500
                if (!hopt->quantum && cl->quantum < 1000) {
1501
                        pr_warning(
1502
                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1503
                               cl->common.classid);
1504
                        cl->quantum = 1000;
1505
                }
1506
                if (!hopt->quantum && cl->quantum > 200000) {
1507
                        pr_warning(
1508
                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1509
                               cl->common.classid);
1510
                        cl->quantum = 200000;
1511
                }
1512
                if (hopt->quantum)
1513
                        cl->quantum = hopt->quantum;
1514
                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1515
                        cl->prio = TC_HTB_NUMPRIO - 1;
1516
        }
1517

    
1518
        cl->buffer = hopt->buffer;
1519
        cl->cbuffer = hopt->cbuffer;
1520
        if (cl->rate)
1521
                qdisc_put_rtab(cl->rate);
1522
        cl->rate = rtab;
1523
        if (cl->ceil)
1524
                qdisc_put_rtab(cl->ceil);
1525
        cl->ceil = ctab;
1526
        sch_tree_unlock(sch);
1527

    
1528
        qdisc_class_hash_grow(sch, &q->clhash);
1529

    
1530
        *arg = (unsigned long)cl;
1531
        return 0;
1532

    
1533
failure:
1534
        if (rtab)
1535
                qdisc_put_rtab(rtab);
1536
        if (ctab)
1537
                qdisc_put_rtab(ctab);
1538
        return err;
1539
}
1540

    
1541
static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1542
{
1543
        struct htb_sched *q = qdisc_priv(sch);
1544
        struct htb_class *cl = (struct htb_class *)arg;
1545
        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1546

    
1547
        return fl;
1548
}
1549

    
1550
static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1551
                                     u32 classid)
1552
{
1553
        struct htb_class *cl = htb_find(classid, sch);
1554

    
1555
        /*if (cl && !cl->level) return 0;
1556
         * The line above used to be there to prevent attaching filters to
1557
         * leaves. But at least tc_index filter uses this just to get class
1558
         * for other reasons so that we have to allow for it.
1559
         * ----
1560
         * 19.6.2002 As Werner explained it is ok - bind filter is just
1561
         * another way to "lock" the class - unlike "get" this lock can
1562
         * be broken by class during destroy IIUC.
1563
         */
1564
        if (cl)
1565
                cl->filter_cnt++;
1566
        return (unsigned long)cl;
1567
}
1568

    
1569
static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1570
{
1571
        struct htb_class *cl = (struct htb_class *)arg;
1572

    
1573
        if (cl)
1574
                cl->filter_cnt--;
1575
}
1576

    
1577
static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1578
{
1579
        struct htb_sched *q = qdisc_priv(sch);
1580
        struct htb_class *cl;
1581
        struct hlist_node *n;
1582
        unsigned int i;
1583

    
1584
        if (arg->stop)
1585
                return;
1586

    
1587
        for (i = 0; i < q->clhash.hashsize; i++) {
1588
                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1589
                        if (arg->count < arg->skip) {
1590
                                arg->count++;
1591
                                continue;
1592
                        }
1593
                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1594
                                arg->stop = 1;
1595
                                return;
1596
                        }
1597
                        arg->count++;
1598
                }
1599
        }
1600
}
1601

    
1602
static const struct Qdisc_class_ops htb_class_ops = {
1603
        .graft                =        htb_graft,
1604
        .leaf                =        htb_leaf,
1605
        .qlen_notify        =        htb_qlen_notify,
1606
        .get                =        htb_get,
1607
        .put                =        htb_put,
1608
        .change                =        htb_change_class,
1609
        .delete                =        htb_delete,
1610
        .walk                =        htb_walk,
1611
        .tcf_chain        =        htb_find_tcf,
1612
        .bind_tcf        =        htb_bind_filter,
1613
        .unbind_tcf        =        htb_unbind_filter,
1614
        .dump                =        htb_dump_class,
1615
        .dump_stats        =        htb_dump_class_stats,
1616
};
1617

    
1618
static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1619
        .cl_ops                =        &htb_class_ops,
1620
        .id                =        "htb",
1621
        .priv_size        =        sizeof(struct htb_sched),
1622
        .enqueue        =        htb_enqueue,
1623
        .dequeue        =        htb_dequeue,
1624
        .peek                =        qdisc_peek_dequeued,
1625
        .drop                =        htb_drop,
1626
        .init                =        htb_init,
1627
        .reset                =        htb_reset,
1628
        .destroy        =        htb_destroy,
1629
        .dump                =        htb_dump,
1630
        .owner                =        THIS_MODULE,
1631
};
1632

    
1633
static int __init htb_module_init(void)
1634
{
1635
        return register_qdisc(&htb_qdisc_ops);
1636
}
1637
static void __exit htb_module_exit(void)
1638
{
1639
        unregister_qdisc(&htb_qdisc_ops);
1640
}
1641

    
1642
module_init(htb_module_init)
1643
module_exit(htb_module_exit)
1644
MODULE_LICENSE("GPL");