Revision 9c6620d8

View differences:

util/sch_htb-ofbuf/Makefile
1
obj-m = sch_htb.o
2
KVERSION = $(shell uname -r)
3
all:
4
	        make -C /lib/modules/$(KVERSION)/build M=$(PWD) modules
5
clean:
6
	        make -C /lib/modules/$(KVERSION)/build M=$(PWD) clean
util/sch_htb-ofbuf/README
1
Modified sch_htb implementation with ofbuf support.
2

  
3
To compile, just type make.  To use this module instead
4
of regular sch_htb, do:
5

  
6
0. make
7
1. rmmod sch_htb
8
2. insmod ./sch_htb.ko
9

  
10
To revert, just rmmod sch_htb.
util/sch_htb-ofbuf/sch_htb.c
1
#define OFBUF (1)
2
/*
3
 * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4
 *
5
 *		This program is free software; you can redistribute it and/or
6
 *		modify it under the terms of the GNU General Public License
7
 *		as published by the Free Software Foundation; either version
8
 *		2 of the License, or (at your option) any later version.
9
 *
10
 * Authors:	Martin Devera, <devik@cdi.cz>
11
 *
12
 * Credits (in time order) for older HTB versions:
13
 *              Stef Coene <stef.coene@docum.org>
14
 *			HTB support at LARTC mailing list
15
 *		Ondrej Kraus, <krauso@barr.cz>
16
 *			found missing INIT_QDISC(htb)
17
 *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
18
 *			helped a lot to locate nasty class stall bug
19
 *		Andi Kleen, Jamal Hadi, Bert Hubert
20
 *			code review and helpful comments on shaping
21
 *		Tomasz Wrona, <tw@eter.tym.pl>
22
 *			created test case so that I was able to fix nasty bug
23
 *		Wilfried Weissmann
24
 *			spotted bug in dequeue code and helped with fix
25
 *		Jiri Fojtasek
26
 *			fixed requeue routine
27
 *		and many others. thanks.
28
 */
29
#include <linux/module.h>
30
#include <linux/moduleparam.h>
31
#include <linux/types.h>
32
#include <linux/kernel.h>
33
#include <linux/string.h>
34
#include <linux/errno.h>
35
#include <linux/skbuff.h>
36
#include <linux/list.h>
37
#include <linux/compiler.h>
38
#include <linux/rbtree.h>
39
#include <linux/workqueue.h>
40
#include <linux/slab.h>
41
#include <net/netlink.h>
42
#include <net/pkt_sched.h>
43

  
44
/* HTB algorithm.
45
    Author: devik@cdi.cz
46
    ========================================================================
47
    HTB is like TBF with multiple classes. It is also similar to CBQ because
48
    it allows to assign priority to each class in hierarchy.
49
    In fact it is another implementation of Floyd's formal sharing.
50

  
51
    Levels:
52
    Each class is assigned level. Leaf has ALWAYS level 0 and root
53
    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54
    one less than their parent.
55
*/
56

  
57
static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
58
#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
59

  
60
#if HTB_VER >> 16 != TC_HTB_PROTOVER
61
#error "Mismatched sch_htb.c and pkt_sch.h"
62
#endif
63

  
64
/* Module parameter and sysfs export */
65
module_param    (htb_hysteresis, int, 0640);
66
MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67

  
68
/* used internaly to keep status of single class */
69
enum htb_cmode {
70
	HTB_CANT_SEND,		/* class can't send and can't borrow */
71
	HTB_MAY_BORROW,		/* class can't send but may borrow */
72
	HTB_CAN_SEND		/* class can send */
73
};
74

  
75
/* interior & leaf nodes; props specific to leaves are marked L: */
76
struct htb_class {
77
	struct Qdisc_class_common common;
78
	/* general class parameters */
79
	struct gnet_stats_basic_packed bstats;
80
	struct gnet_stats_queue qstats;
81
	struct gnet_stats_rate_est rate_est;
82
	struct tc_htb_xstats xstats;	/* our special stats */
83
	int refcnt;		/* usage count of this class */
84

  
85
	/* topology */
86
	int level;		/* our level (see above) */
87
	unsigned int children;
88
	struct htb_class *parent;	/* parent class */
89

  
90
	int prio;		/* these two are used only by leaves... */
91
	int quantum;		/* but stored for parent-to-leaf return */
92

  
93
	union {
94
		struct htb_class_leaf {
95
			struct Qdisc *q;
96
			int deficit[TC_HTB_MAXDEPTH];
97
			struct list_head drop_list;
98
		} leaf;
99
		struct htb_class_inner {
100
			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
101
			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
102
			/* When class changes from state 1->2 and disconnects from
103
			 * parent's feed then we lost ptr value and start from the
104
			 * first child again. Here we store classid of the
105
			 * last valid ptr (used when ptr is NULL).
106
			 */
107
			u32 last_ptr_id[TC_HTB_NUMPRIO];
108
		} inner;
109
	} un;
110
	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
111
	struct rb_node pq_node;	/* node for event queue */
112
	psched_time_t pq_key;
113

  
114
	int prio_activity;	/* for which prios are we active */
115
	enum htb_cmode cmode;	/* current mode of the class */
116

  
117
	/* class attached filters */
118
	struct tcf_proto *filter_list;
119
	int filter_cnt;
120

  
121
	/* token bucket parameters */
122
	struct qdisc_rate_table *rate;	/* rate table of the class itself */
123
	struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
124
	long buffer, cbuffer;	/* token bucket depth/rate */
125
	psched_tdiff_t mbuffer;	/* max wait time */
126
	long tokens, ctokens;	/* current number of tokens */
127
	psched_time_t t_c;	/* checkpoint time */
128
};
129

  
130
struct htb_sched {
131
	struct Qdisc_class_hash clhash;
132
	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
133

  
134
	/* self list - roots of self generating tree */
135
	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
136
	int row_mask[TC_HTB_MAXDEPTH];
137
	struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
138
	u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
139

  
140
	/* self wait list - roots of wait PQs per row */
141
	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
142

  
143
	/* time of nearest event per level (row) */
144
	psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
145

  
146
	int defcls;		/* class where unclassified flows go to */
147

  
148
	/* filters for qdisc itself */
149
	struct tcf_proto *filter_list;
150

  
151
	int rate2quantum;	/* quant = rate / rate2quantum */
152
	psched_time_t now;	/* cached dequeue time */
153
	struct qdisc_watchdog watchdog;
154

  
155
	/* non shaped skbs; let them go directly thru */
156
	struct sk_buff_head direct_queue;
157
	int direct_qlen;	/* max qlen of above */
158

  
159
	long direct_pkts;
160

  
161
#if OFBUF
162
	/* overflow buffer */
163
	struct sk_buff_head ofbuf;
164
	int ofbuf_queued;	/* # packets queued in above */
165
#endif
166

  
167
#define HTB_WARN_TOOMANYEVENTS	0x1
168
	unsigned int warned;	/* only one warning */
169
	struct work_struct work;
170
};
171

  
172
/* find class in global hash table using given handle */
173
static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
174
{
175
	struct htb_sched *q = qdisc_priv(sch);
176
	struct Qdisc_class_common *clc;
177

  
178
	clc = qdisc_class_find(&q->clhash, handle);
179
	if (clc == NULL)
180
		return NULL;
181
	return container_of(clc, struct htb_class, common);
182
}
183

  
184
/**
185
 * htb_classify - classify a packet into class
186
 *
187
 * It returns NULL if the packet should be dropped or -1 if the packet
188
 * should be passed directly thru. In all other cases leaf class is returned.
189
 * We allow direct class selection by classid in priority. The we examine
190
 * filters in qdisc and in inner nodes (if higher filter points to the inner
191
 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
192
 * internal fifo (direct). These packets then go directly thru. If we still
193
 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
194
 * then finish and return direct queue.
195
 */
196
#define HTB_DIRECT ((struct htb_class *)-1L)
197

  
198
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
199
				      int *qerr)
200
{
201
	struct htb_sched *q = qdisc_priv(sch);
202
	struct htb_class *cl;
203
	struct tcf_result res;
204
	struct tcf_proto *tcf;
205
	int result;
206

  
207
	/* allow to select class by setting skb->priority to valid classid;
208
	 * note that nfmark can be used too by attaching filter fw with no
209
	 * rules in it
210
	 */
211
	if (skb->priority == sch->handle)
212
		return HTB_DIRECT;	/* X:0 (direct flow) selected */
213
	cl = htb_find(skb->priority, sch);
214
	if (cl && cl->level == 0)
215
		return cl;
216

  
217
	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
218
	tcf = q->filter_list;
219
	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
220
#ifdef CONFIG_NET_CLS_ACT
221
		switch (result) {
222
		case TC_ACT_QUEUED:
223
		case TC_ACT_STOLEN:
224
			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
225
		case TC_ACT_SHOT:
226
			return NULL;
227
		}
228
#endif
229
		cl = (void *)res.class;
230
		if (!cl) {
231
			if (res.classid == sch->handle)
232
				return HTB_DIRECT;	/* X:0 (direct flow) */
233
			cl = htb_find(res.classid, sch);
234
			if (!cl)
235
				break;	/* filter selected invalid classid */
236
		}
237
		if (!cl->level)
238
			return cl;	/* we hit leaf; return it */
239

  
240
		/* we have got inner class; apply inner filter chain */
241
		tcf = cl->filter_list;
242
	}
243
	/* classification failed; try to use default class */
244
	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
245
	if (!cl || cl->level)
246
		return HTB_DIRECT;	/* bad default .. this is safe bet */
247
	return cl;
248
}
249

  
250
/**
251
 * htb_add_to_id_tree - adds class to the round robin list
252
 *
253
 * Routine adds class to the list (actually tree) sorted by classid.
254
 * Make sure that class is not already on such list for given prio.
255
 */
256
static void htb_add_to_id_tree(struct rb_root *root,
257
			       struct htb_class *cl, int prio)
258
{
259
	struct rb_node **p = &root->rb_node, *parent = NULL;
260

  
261
	while (*p) {
262
		struct htb_class *c;
263
		parent = *p;
264
		c = rb_entry(parent, struct htb_class, node[prio]);
265

  
266
		if (cl->common.classid > c->common.classid)
267
			p = &parent->rb_right;
268
		else
269
			p = &parent->rb_left;
270
	}
271
	rb_link_node(&cl->node[prio], parent, p);
272
	rb_insert_color(&cl->node[prio], root);
273
}
274

  
275
/**
276
 * htb_add_to_wait_tree - adds class to the event queue with delay
277
 *
278
 * The class is added to priority event queue to indicate that class will
279
 * change its mode in cl->pq_key microseconds. Make sure that class is not
280
 * already in the queue.
281
 */
282
static void htb_add_to_wait_tree(struct htb_sched *q,
283
				 struct htb_class *cl, long delay)
284
{
285
	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
286

  
287
	cl->pq_key = q->now + delay;
288
	if (cl->pq_key == q->now)
289
		cl->pq_key++;
290

  
291
	/* update the nearest event cache */
292
	if (q->near_ev_cache[cl->level] > cl->pq_key)
293
		q->near_ev_cache[cl->level] = cl->pq_key;
294

  
295
	while (*p) {
296
		struct htb_class *c;
297
		parent = *p;
298
		c = rb_entry(parent, struct htb_class, pq_node);
299
		if (cl->pq_key >= c->pq_key)
300
			p = &parent->rb_right;
301
		else
302
			p = &parent->rb_left;
303
	}
304
	rb_link_node(&cl->pq_node, parent, p);
305
	rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
306
}
307

  
308
/**
309
 * htb_next_rb_node - finds next node in binary tree
310
 *
311
 * When we are past last key we return NULL.
312
 * Average complexity is 2 steps per call.
313
 */
314
static inline void htb_next_rb_node(struct rb_node **n)
315
{
316
	*n = rb_next(*n);
317
}
318

  
319
/**
320
 * htb_add_class_to_row - add class to its row
321
 *
322
 * The class is added to row at priorities marked in mask.
323
 * It does nothing if mask == 0.
324
 */
325
static inline void htb_add_class_to_row(struct htb_sched *q,
326
					struct htb_class *cl, int mask)
327
{
328
	q->row_mask[cl->level] |= mask;
329
	while (mask) {
330
		int prio = ffz(~mask);
331
		mask &= ~(1 << prio);
332
		htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
333
	}
334
}
335

  
336
/* If this triggers, it is a bug in this code, but it need not be fatal */
337
static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
338
{
339
	if (RB_EMPTY_NODE(rb)) {
340
		WARN_ON(1);
341
	} else {
342
		rb_erase(rb, root);
343
		RB_CLEAR_NODE(rb);
344
	}
345
}
346

  
347

  
348
/**
349
 * htb_remove_class_from_row - removes class from its row
350
 *
351
 * The class is removed from row at priorities marked in mask.
352
 * It does nothing if mask == 0.
353
 */
354
static inline void htb_remove_class_from_row(struct htb_sched *q,
355
						 struct htb_class *cl, int mask)
356
{
357
	int m = 0;
358

  
359
	while (mask) {
360
		int prio = ffz(~mask);
361

  
362
		mask &= ~(1 << prio);
363
		if (q->ptr[cl->level][prio] == cl->node + prio)
364
			htb_next_rb_node(q->ptr[cl->level] + prio);
365

  
366
		htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
367
		if (!q->row[cl->level][prio].rb_node)
368
			m |= 1 << prio;
369
	}
370
	q->row_mask[cl->level] &= ~m;
371
}
372

  
373
/**
374
 * htb_activate_prios - creates active classe's feed chain
375
 *
376
 * The class is connected to ancestors and/or appropriate rows
377
 * for priorities it is participating on. cl->cmode must be new
378
 * (activated) mode. It does nothing if cl->prio_activity == 0.
379
 */
380
static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
381
{
382
	struct htb_class *p = cl->parent;
383
	long m, mask = cl->prio_activity;
384

  
385
	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
386
		m = mask;
387
		while (m) {
388
			int prio = ffz(~m);
389
			m &= ~(1 << prio);
390

  
391
			if (p->un.inner.feed[prio].rb_node)
392
				/* parent already has its feed in use so that
393
				 * reset bit in mask as parent is already ok
394
				 */
395
				mask &= ~(1 << prio);
396

  
397
			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
398
		}
399
		p->prio_activity |= mask;
400
		cl = p;
401
		p = cl->parent;
402

  
403
	}
404
	if (cl->cmode == HTB_CAN_SEND && mask)
405
		htb_add_class_to_row(q, cl, mask);
406
}
407

  
408
/**
409
 * htb_deactivate_prios - remove class from feed chain
410
 *
411
 * cl->cmode must represent old mode (before deactivation). It does
412
 * nothing if cl->prio_activity == 0. Class is removed from all feed
413
 * chains and rows.
414
 */
415
static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
416
{
417
	struct htb_class *p = cl->parent;
418
	long m, mask = cl->prio_activity;
419

  
420
	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
421
		m = mask;
422
		mask = 0;
423
		while (m) {
424
			int prio = ffz(~m);
425
			m &= ~(1 << prio);
426

  
427
			if (p->un.inner.ptr[prio] == cl->node + prio) {
428
				/* we are removing child which is pointed to from
429
				 * parent feed - forget the pointer but remember
430
				 * classid
431
				 */
432
				p->un.inner.last_ptr_id[prio] = cl->common.classid;
433
				p->un.inner.ptr[prio] = NULL;
434
			}
435

  
436
			htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
437

  
438
			if (!p->un.inner.feed[prio].rb_node)
439
				mask |= 1 << prio;
440
		}
441

  
442
		p->prio_activity &= ~mask;
443
		cl = p;
444
		p = cl->parent;
445

  
446
	}
447
	if (cl->cmode == HTB_CAN_SEND && mask)
448
		htb_remove_class_from_row(q, cl, mask);
449
}
450

  
451
static inline long htb_lowater(const struct htb_class *cl)
452
{
453
	if (htb_hysteresis)
454
		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
455
	else
456
		return 0;
457
}
458
static inline long htb_hiwater(const struct htb_class *cl)
459
{
460
	if (htb_hysteresis)
461
		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
462
	else
463
		return 0;
464
}
465

  
466

  
467
/**
468
 * htb_class_mode - computes and returns current class mode
469
 *
470
 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
471
 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
472
 * from now to time when cl will change its state.
473
 * Also it is worth to note that class mode doesn't change simply
474
 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
475
 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
476
 * mode transitions per time unit. The speed gain is about 1/6.
477
 */
478
static inline enum htb_cmode
479
htb_class_mode(struct htb_class *cl, long *diff)
480
{
481
	long toks;
482

  
483
	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
484
		*diff = -toks;
485
		return HTB_CANT_SEND;
486
	}
487

  
488
	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
489
		return HTB_CAN_SEND;
490

  
491
	*diff = -toks;
492
	return HTB_MAY_BORROW;
493
}
494

  
495
/**
496
 * htb_change_class_mode - changes classe's mode
497
 *
498
 * This should be the only way how to change classe's mode under normal
499
 * cirsumstances. Routine will update feed lists linkage, change mode
500
 * and add class to the wait event queue if appropriate. New mode should
501
 * be different from old one and cl->pq_key has to be valid if changing
502
 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
503
 */
504
static void
505
htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
506
{
507
	enum htb_cmode new_mode = htb_class_mode(cl, diff);
508

  
509
	if (new_mode == cl->cmode)
510
		return;
511

  
512
	if (cl->prio_activity) {	/* not necessary: speed optimization */
513
		if (cl->cmode != HTB_CANT_SEND)
514
			htb_deactivate_prios(q, cl);
515
		cl->cmode = new_mode;
516
		if (new_mode != HTB_CANT_SEND)
517
			htb_activate_prios(q, cl);
518
	} else
519
		cl->cmode = new_mode;
520
}
521

  
522
/**
523
 * htb_activate - inserts leaf cl into appropriate active feeds
524
 *
525
 * Routine learns (new) priority of leaf and activates feed chain
526
 * for the prio. It can be called on already active leaf safely.
527
 * It also adds leaf into droplist.
528
 */
529
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
530
{
531
	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
532

  
533
	if (!cl->prio_activity) {
534
		cl->prio_activity = 1 << cl->prio;
535
		htb_activate_prios(q, cl);
536
		list_add_tail(&cl->un.leaf.drop_list,
537
			      q->drops + cl->prio);
538
	}
539
}
540

  
541
/**
542
 * htb_deactivate - remove leaf cl from active feeds
543
 *
544
 * Make sure that leaf is active. In the other words it can't be called
545
 * with non-active leaf. It also removes class from the drop list.
546
 */
547
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
548
{
549
	WARN_ON(!cl->prio_activity);
550

  
551
	htb_deactivate_prios(q, cl);
552
	cl->prio_activity = 0;
553
	list_del_init(&cl->un.leaf.drop_list);
554
}
555

  
556
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
557
{
558
	int uninitialized_var(ret);
559
	struct htb_sched *q = qdisc_priv(sch);
560
	struct htb_class *cl = htb_classify(skb, sch, &ret);
561

  
562
#if OFBUF
563
    if(cl != HTB_DIRECT && cl)
564
        skb_get(skb);
565
#endif
566

  
567
	if (cl == HTB_DIRECT) {
568
		/* enqueue to helper queue */
569
		if (q->direct_queue.qlen < q->direct_qlen) {
570
			__skb_queue_tail(&q->direct_queue, skb);
571
			q->direct_pkts++;
572
		} else {
573
			kfree_skb(skb);
574
			sch->qstats.drops++;
575
			return NET_XMIT_DROP;
576
		}
577
#ifdef CONFIG_NET_CLS_ACT
578
	} else if (!cl) {
579
		if (ret & __NET_XMIT_BYPASS)
580
			sch->qstats.drops++;
581
		kfree_skb(skb);
582
		return ret;
583
#endif
584
	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
585
        /* We shouldn't drop this, but enqueue it into ofbuf */
586
        // TODO: is skb actually valid?
587
        // Ans: looks like qdisc_enqueue will end up freeing the packet
588
        // if enqueue failed.  So we should incr refcnt before calling qdisc_enqueue...
589
#if OFBUF
590
        __skb_queue_tail(&q->ofbuf, skb);
591
        q->ofbuf_queued++;
592
#else 
593
		if (net_xmit_drop_count(ret)) {
594
			sch->qstats.drops++;
595
			cl->qstats.drops++;
596
		}
597
		return ret;
598
#endif
599
	} else {
600
		bstats_update(&cl->bstats, skb);
601
		htb_activate(q, cl);
602
#if OFBUF
603
        kfree_skb(skb);
604
#endif
605
	}
606

  
607
	sch->q.qlen++;
608
	return NET_XMIT_SUCCESS;
609
}
610

  
611
static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
612
{
613
	long toks = diff + cl->tokens;
614

  
615
	if (toks > cl->buffer)
616
		toks = cl->buffer;
617
	toks -= (long) qdisc_l2t(cl->rate, bytes);
618
	if (toks <= -cl->mbuffer)
619
		toks = 1 - cl->mbuffer;
620

  
621
	cl->tokens = toks;
622
}
623

  
624
static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
625
{
626
	long toks = diff + cl->ctokens;
627

  
628
	if (toks > cl->cbuffer)
629
		toks = cl->cbuffer;
630
	toks -= (long) qdisc_l2t(cl->ceil, bytes);
631
	if (toks <= -cl->mbuffer)
632
		toks = 1 - cl->mbuffer;
633

  
634
	cl->ctokens = toks;
635
}
636

  
637
/**
638
 * htb_charge_class - charges amount "bytes" to leaf and ancestors
639
 *
640
 * Routine assumes that packet "bytes" long was dequeued from leaf cl
641
 * borrowing from "level". It accounts bytes to ceil leaky bucket for
642
 * leaf and all ancestors and to rate bucket for ancestors at levels
643
 * "level" and higher. It also handles possible change of mode resulting
644
 * from the update. Note that mode can also increase here (MAY_BORROW to
645
 * CAN_SEND) because we can use more precise clock that event queue here.
646
 * In such case we remove class from event queue first.
647
 */
648
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
649
			     int level, struct sk_buff *skb)
650
{
651
	int bytes = qdisc_pkt_len(skb);
652
	enum htb_cmode old_mode;
653
	long diff;
654

  
655
	while (cl) {
656
		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
657
		if (cl->level >= level) {
658
			if (cl->level == level)
659
				cl->xstats.lends++;
660
			htb_accnt_tokens(cl, bytes, diff);
661
		} else {
662
			cl->xstats.borrows++;
663
			cl->tokens += diff;	/* we moved t_c; update tokens */
664
		}
665
		htb_accnt_ctokens(cl, bytes, diff);
666
		cl->t_c = q->now;
667

  
668
		old_mode = cl->cmode;
669
		diff = 0;
670
		htb_change_class_mode(q, cl, &diff);
671
		if (old_mode != cl->cmode) {
672
			if (old_mode != HTB_CAN_SEND)
673
				htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
674
			if (cl->cmode != HTB_CAN_SEND)
675
				htb_add_to_wait_tree(q, cl, diff);
676
		}
677

  
678
		/* update basic stats except for leaves which are already updated */
679
		if (cl->level)
680
			bstats_update(&cl->bstats, skb);
681

  
682
		cl = cl->parent;
683
	}
684
}
685

  
686
/**
687
 * htb_do_events - make mode changes to classes at the level
688
 *
689
 * Scans event queue for pending events and applies them. Returns time of
690
 * next pending event (0 for no event in pq, q->now for too many events).
691
 * Note: Applied are events whose have cl->pq_key <= q->now.
692
 */
693
static psched_time_t htb_do_events(struct htb_sched *q, int level,
694
				   unsigned long start)
695
{
696
	/* don't run for longer than 2 jiffies; 2 is used instead of
697
	 * 1 to simplify things when jiffy is going to be incremented
698
	 * too soon
699
	 */
700
	unsigned long stop_at = start + 2;
701
	while (time_before(jiffies, stop_at)) {
702
		struct htb_class *cl;
703
		long diff;
704
		struct rb_node *p = rb_first(&q->wait_pq[level]);
705

  
706
		if (!p)
707
			return 0;
708

  
709
		cl = rb_entry(p, struct htb_class, pq_node);
710
		if (cl->pq_key > q->now)
711
			return cl->pq_key;
712

  
713
		htb_safe_rb_erase(p, q->wait_pq + level);
714
		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
715
		htb_change_class_mode(q, cl, &diff);
716
		if (cl->cmode != HTB_CAN_SEND)
717
			htb_add_to_wait_tree(q, cl, diff);
718
	}
719

  
720
	/* too much load - let's continue after a break for scheduling */
721
	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
722
		pr_warning("htb: too many events!\n");
723
		q->warned |= HTB_WARN_TOOMANYEVENTS;
724
	}
725

  
726
	return q->now;
727
}
728

  
729
/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
730
 * is no such one exists.
731
 */
732
static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
733
					      u32 id)
734
{
735
	struct rb_node *r = NULL;
736
	while (n) {
737
		struct htb_class *cl =
738
		    rb_entry(n, struct htb_class, node[prio]);
739

  
740
		if (id > cl->common.classid) {
741
			n = n->rb_right;
742
		} else if (id < cl->common.classid) {
743
			r = n;
744
			n = n->rb_left;
745
		} else {
746
			return n;
747
		}
748
	}
749
	return r;
750
}
751

  
752
/**
753
 * htb_lookup_leaf - returns next leaf class in DRR order
754
 *
755
 * Find leaf where current feed pointers points to.
756
 */
757
static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
758
					 struct rb_node **pptr, u32 * pid)
759
{
760
	int i;
761
	struct {
762
		struct rb_node *root;
763
		struct rb_node **pptr;
764
		u32 *pid;
765
	} stk[TC_HTB_MAXDEPTH], *sp = stk;
766

  
767
	BUG_ON(!tree->rb_node);
768
	sp->root = tree->rb_node;
769
	sp->pptr = pptr;
770
	sp->pid = pid;
771

  
772
	for (i = 0; i < 65535; i++) {
773
		if (!*sp->pptr && *sp->pid) {
774
			/* ptr was invalidated but id is valid - try to recover
775
			 * the original or next ptr
776
			 */
777
			*sp->pptr =
778
			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
779
		}
780
		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
781
				 * can become out of date quickly
782
				 */
783
		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
784
			*sp->pptr = sp->root;
785
			while ((*sp->pptr)->rb_left)
786
				*sp->pptr = (*sp->pptr)->rb_left;
787
			if (sp > stk) {
788
				sp--;
789
				if (!*sp->pptr) {
790
					WARN_ON(1);
791
					return NULL;
792
				}
793
				htb_next_rb_node(sp->pptr);
794
			}
795
		} else {
796
			struct htb_class *cl;
797
			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
798
			if (!cl->level)
799
				return cl;
800
			(++sp)->root = cl->un.inner.feed[prio].rb_node;
801
			sp->pptr = cl->un.inner.ptr + prio;
802
			sp->pid = cl->un.inner.last_ptr_id + prio;
803
		}
804
	}
805
	WARN_ON(1);
806
	return NULL;
807
}
808

  
809
/* dequeues packet at given priority and level; call only if
810
 * you are sure that there is active class at prio/level
811
 */
812
static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
813
					int level)
814
{
815
	struct sk_buff *skb = NULL;
816
	struct htb_class *cl, *start;
817
	/* look initial class up in the row */
818
	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
819
				     q->ptr[level] + prio,
820
				     q->last_ptr_id[level] + prio);
821

  
822
	do {
823
next:
824
		if (unlikely(!cl))
825
			return NULL;
826

  
827
		/* class can be empty - it is unlikely but can be true if leaf
828
		 * qdisc drops packets in enqueue routine or if someone used
829
		 * graft operation on the leaf since last dequeue;
830
		 * simply deactivate and skip such class
831
		 */
832
		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
833
			struct htb_class *next;
834
			htb_deactivate(q, cl);
835

  
836
			/* row/level might become empty */
837
			if ((q->row_mask[level] & (1 << prio)) == 0)
838
				return NULL;
839

  
840
			next = htb_lookup_leaf(q->row[level] + prio,
841
					       prio, q->ptr[level] + prio,
842
					       q->last_ptr_id[level] + prio);
843

  
844
			if (cl == start)	/* fix start if we just deleted it */
845
				start = next;
846
			cl = next;
847
			goto next;
848
		}
849

  
850
		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
851
		if (likely(skb != NULL))
852
			break;
853

  
854
		qdisc_warn_nonwc("htb", cl->un.leaf.q);
855
		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
856
				  ptr[0]) + prio);
857
		cl = htb_lookup_leaf(q->row[level] + prio, prio,
858
				     q->ptr[level] + prio,
859
				     q->last_ptr_id[level] + prio);
860

  
861
	} while (cl != start);
862

  
863
	if (likely(skb != NULL)) {
864
		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
865
		if (cl->un.leaf.deficit[level] < 0) {
866
			cl->un.leaf.deficit[level] += cl->quantum;
867
			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
868
					  ptr[0]) + prio);
869
		}
870
		/* this used to be after charge_class but this constelation
871
		 * gives us slightly better performance
872
		 */
873
		if (!cl->un.leaf.q->q.qlen)
874
			htb_deactivate(q, cl);
875
		htb_charge_class(q, cl, level, skb);
876
	}
877
	return skb;
878
}
879

  
880
static struct sk_buff *htb_dequeue(struct Qdisc *sch)
881
{
882
	struct sk_buff *skb;
883
	struct htb_sched *q = qdisc_priv(sch);
884
	int level;
885
	psched_time_t next_event;
886
	unsigned long start_at;
887
    u32 r, i;
888
    struct sk_buff *pkt;
889

  
890
	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
891
	skb = __skb_dequeue(&q->direct_queue);
892
	if (skb != NULL) {
893
ok:
894
		qdisc_bstats_update(sch, skb);
895
		qdisc_unthrottled(sch);
896
		sch->q.qlen--;
897
#if OFBUF
898
        if(q->ofbuf_queued > 0) {
899
            i = 0;
900
            r = net_random() % q->ofbuf_queued;
901
            // enqueue the rth packet and drop the rest
902
            while((pkt = __skb_dequeue(&q->ofbuf)) != NULL) {
903
                if(i == r) {
904
                    // the chosen one
905
                    htb_enqueue(pkt, sch);
906
                } else {
907
                    kfree_skb(pkt);
908
                }
909
                i++;
910
            }
911
            q->ofbuf_queued = 0;
912
        }
913
#endif
914
		return skb;
915
	}
916

  
917
	if (!sch->q.qlen)
918
		goto fin;
919
	q->now = psched_get_time();
920
	start_at = jiffies;
921

  
922
	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
923

  
924
	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
925
		/* common case optimization - skip event handler quickly */
926
		int m;
927
		psched_time_t event;
928

  
929
		if (q->now >= q->near_ev_cache[level]) {
930
			event = htb_do_events(q, level, start_at);
931
			if (!event)
932
				event = q->now + PSCHED_TICKS_PER_SEC;
933
			q->near_ev_cache[level] = event;
934
		} else
935
			event = q->near_ev_cache[level];
936

  
937
		if (next_event > event)
938
			next_event = event;
939

  
940
		m = ~q->row_mask[level];
941
		while (m != (int)(-1)) {
942
			int prio = ffz(m);
943

  
944
			m |= 1 << prio;
945
			skb = htb_dequeue_tree(q, prio, level);
946
			if (likely(skb != NULL))
947
				goto ok;
948
		}
949
	}
950
	sch->qstats.overlimits++;
951
	if (likely(next_event > q->now))
952
		qdisc_watchdog_schedule(&q->watchdog, next_event);
953
	else
954
		schedule_work(&q->work);
955
fin:
956
	return skb;
957
}
958

  
959
/* try to drop from each class (by prio) until one succeed */
960
static unsigned int htb_drop(struct Qdisc *sch)
961
{
962
	struct htb_sched *q = qdisc_priv(sch);
963
	int prio;
964

  
965
	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
966
		struct list_head *p;
967
		list_for_each(p, q->drops + prio) {
968
			struct htb_class *cl = list_entry(p, struct htb_class,
969
							  un.leaf.drop_list);
970
			unsigned int len;
971
			if (cl->un.leaf.q->ops->drop &&
972
			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
973
				sch->q.qlen--;
974
				if (!cl->un.leaf.q->q.qlen)
975
					htb_deactivate(q, cl);
976
				return len;
977
			}
978
		}
979
	}
980
	return 0;
981
}
982

  
983
/* reset all classes */
984
/* always caled under BH & queue lock */
985
static void htb_reset(struct Qdisc *sch)
986
{
987
	struct htb_sched *q = qdisc_priv(sch);
988
	struct htb_class *cl;
989
	struct hlist_node *n;
990
	unsigned int i;
991

  
992
	for (i = 0; i < q->clhash.hashsize; i++) {
993
		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
994
			if (cl->level)
995
				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
996
			else {
997
				if (cl->un.leaf.q)
998
					qdisc_reset(cl->un.leaf.q);
999
				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1000
			}
1001
			cl->prio_activity = 0;
1002
			cl->cmode = HTB_CAN_SEND;
1003

  
1004
		}
1005
	}
1006
	qdisc_watchdog_cancel(&q->watchdog);
1007
	__skb_queue_purge(&q->direct_queue);
1008
	sch->q.qlen = 0;
1009
#if OFBUF
1010
    __skb_queue_purge(&q->ofbuf);
1011
    q->ofbuf_queued = 0;
1012
#endif
1013
	memset(q->row, 0, sizeof(q->row));
1014
	memset(q->row_mask, 0, sizeof(q->row_mask));
1015
	memset(q->wait_pq, 0, sizeof(q->wait_pq));
1016
	memset(q->ptr, 0, sizeof(q->ptr));
1017
	for (i = 0; i < TC_HTB_NUMPRIO; i++)
1018
		INIT_LIST_HEAD(q->drops + i);
1019
}
1020

  
1021
static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1022
	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
1023
	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
1024
	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1025
	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026
};
1027

  
1028
static void htb_work_func(struct work_struct *work)
1029
{
1030
	struct htb_sched *q = container_of(work, struct htb_sched, work);
1031
	struct Qdisc *sch = q->watchdog.qdisc;
1032

  
1033
	__netif_schedule(qdisc_root(sch));
1034
}
1035

  
1036
static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1037
{
1038
	struct htb_sched *q = qdisc_priv(sch);
1039
	struct nlattr *tb[TCA_HTB_INIT + 1];
1040
	struct tc_htb_glob *gopt;
1041
	int err;
1042
	int i;
1043

  
1044
	if (!opt)
1045
		return -EINVAL;
1046

  
1047
	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1048
	if (err < 0)
1049
		return err;
1050

  
1051
	if (tb[TCA_HTB_INIT] == NULL) {
1052
		pr_err("HTB: hey probably you have bad tc tool ?\n");
1053
		return -EINVAL;
1054
	}
1055
	gopt = nla_data(tb[TCA_HTB_INIT]);
1056
	if (gopt->version != HTB_VER >> 16) {
1057
		pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1058
		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1059
		return -EINVAL;
1060
	}
1061

  
1062
	err = qdisc_class_hash_init(&q->clhash);
1063
	if (err < 0)
1064
		return err;
1065
	for (i = 0; i < TC_HTB_NUMPRIO; i++)
1066
		INIT_LIST_HEAD(q->drops + i);
1067

  
1068
	qdisc_watchdog_init(&q->watchdog, sch);
1069
	INIT_WORK(&q->work, htb_work_func);
1070
	skb_queue_head_init(&q->direct_queue);
1071

  
1072
#if OFBUF
1073
    skb_queue_head_init(&q->ofbuf);
1074
    q->ofbuf_queued = 0;
1075
#endif
1076

  
1077
	q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1078

  
1079
	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
1080
		q->direct_qlen = 2;
1081

  
1082
	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1083
		q->rate2quantum = 1;
1084
	q->defcls = gopt->defcls;
1085

  
1086
	return 0;
1087
}
1088

  
1089
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1090
{
1091
	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1092
	struct htb_sched *q = qdisc_priv(sch);
1093
	struct nlattr *nest;
1094
	struct tc_htb_glob gopt;
1095

  
1096
	spin_lock_bh(root_lock);
1097

  
1098
	gopt.direct_pkts = q->direct_pkts;
1099
	gopt.version = HTB_VER;
1100
	gopt.rate2quantum = q->rate2quantum;
1101
	gopt.defcls = q->defcls;
1102
	gopt.debug = 0;
1103

  
1104
	nest = nla_nest_start(skb, TCA_OPTIONS);
1105
	if (nest == NULL)
1106
		goto nla_put_failure;
1107
	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1108
	nla_nest_end(skb, nest);
1109

  
1110
	spin_unlock_bh(root_lock);
1111
	return skb->len;
1112

  
1113
nla_put_failure:
1114
	spin_unlock_bh(root_lock);
1115
	nla_nest_cancel(skb, nest);
1116
	return -1;
1117
}
1118

  
1119
static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1120
			  struct sk_buff *skb, struct tcmsg *tcm)
1121
{
1122
	struct htb_class *cl = (struct htb_class *)arg;
1123
	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1124
	struct nlattr *nest;
1125
	struct tc_htb_opt opt;
1126

  
1127
	spin_lock_bh(root_lock);
1128
	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1129
	tcm->tcm_handle = cl->common.classid;
1130
	if (!cl->level && cl->un.leaf.q)
1131
		tcm->tcm_info = cl->un.leaf.q->handle;
1132

  
1133
	nest = nla_nest_start(skb, TCA_OPTIONS);
1134
	if (nest == NULL)
1135
		goto nla_put_failure;
1136

  
1137
	memset(&opt, 0, sizeof(opt));
1138

  
1139
	opt.rate = cl->rate->rate;
1140
	opt.buffer = cl->buffer;
1141
	opt.ceil = cl->ceil->rate;
1142
	opt.cbuffer = cl->cbuffer;
1143
	opt.quantum = cl->quantum;
1144
	opt.prio = cl->prio;
1145
	opt.level = cl->level;
1146
	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1147

  
1148
	nla_nest_end(skb, nest);
1149
	spin_unlock_bh(root_lock);
1150
	return skb->len;
1151

  
1152
nla_put_failure:
1153
	spin_unlock_bh(root_lock);
1154
	nla_nest_cancel(skb, nest);
1155
	return -1;
1156
}
1157

  
1158
static int
1159
htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1160
{
1161
	struct htb_class *cl = (struct htb_class *)arg;
1162

  
1163
	if (!cl->level && cl->un.leaf.q)
1164
		cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1165
	cl->xstats.tokens = cl->tokens;
1166
	cl->xstats.ctokens = cl->ctokens;
1167

  
1168
	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1169
	    gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1170
	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
1171
		return -1;
1172

  
1173
	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1174
}
1175

  
1176
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1177
		     struct Qdisc **old)
1178
{
1179
	struct htb_class *cl = (struct htb_class *)arg;
1180

  
1181
	if (cl->level)
1182
		return -EINVAL;
1183
	if (new == NULL &&
1184
	    (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1185
				     cl->common.classid)) == NULL)
1186
		return -ENOBUFS;
1187

  
1188
	sch_tree_lock(sch);
1189
	*old = cl->un.leaf.q;
1190
	cl->un.leaf.q = new;
1191
	if (*old != NULL) {
1192
		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1193
		qdisc_reset(*old);
1194
	}
1195
	sch_tree_unlock(sch);
1196
	return 0;
1197
}
1198

  
1199
static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1200
{
1201
	struct htb_class *cl = (struct htb_class *)arg;
1202
	return !cl->level ? cl->un.leaf.q : NULL;
1203
}
1204

  
1205
static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1206
{
1207
	struct htb_class *cl = (struct htb_class *)arg;
1208

  
1209
	if (cl->un.leaf.q->q.qlen == 0)
1210
		htb_deactivate(qdisc_priv(sch), cl);
1211
}
1212

  
1213
static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1214
{
1215
	struct htb_class *cl = htb_find(classid, sch);
1216
	if (cl)
1217
		cl->refcnt++;
1218
	return (unsigned long)cl;
1219
}
1220

  
1221
static inline int htb_parent_last_child(struct htb_class *cl)
1222
{
1223
	if (!cl->parent)
1224
		/* the root class */
1225
		return 0;
1226
	if (cl->parent->children > 1)
1227
		/* not the last child */
1228
		return 0;
1229
	return 1;
1230
}
1231

  
1232
static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1233
			       struct Qdisc *new_q)
1234
{
1235
	struct htb_class *parent = cl->parent;
1236

  
1237
	WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1238

  
1239
	if (parent->cmode != HTB_CAN_SEND)
1240
		htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1241

  
1242
	parent->level = 0;
1243
	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1244
	INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1245
	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1246
	parent->tokens = parent->buffer;
1247
	parent->ctokens = parent->cbuffer;
1248
	parent->t_c = psched_get_time();
1249
	parent->cmode = HTB_CAN_SEND;
1250
}
1251

  
1252
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1253
{
1254
	if (!cl->level) {
1255
		WARN_ON(!cl->un.leaf.q);
1256
		qdisc_destroy(cl->un.leaf.q);
1257
	}
1258
	gen_kill_estimator(&cl->bstats, &cl->rate_est);
1259
	qdisc_put_rtab(cl->rate);
1260
	qdisc_put_rtab(cl->ceil);
1261

  
1262
	tcf_destroy_chain(&cl->filter_list);
1263
	kfree(cl);
1264
}
1265

  
1266
static void htb_destroy(struct Qdisc *sch)
1267
{
1268
	struct htb_sched *q = qdisc_priv(sch);
1269
	struct hlist_node *n, *next;
1270
	struct htb_class *cl;
1271
	unsigned int i;
1272

  
1273
	cancel_work_sync(&q->work);
1274
	qdisc_watchdog_cancel(&q->watchdog);
1275
	/* This line used to be after htb_destroy_class call below
1276
	 * and surprisingly it worked in 2.4. But it must precede it
1277
	 * because filter need its target class alive to be able to call
1278
	 * unbind_filter on it (without Oops).
1279
	 */
1280
	tcf_destroy_chain(&q->filter_list);
1281

  
1282
	for (i = 0; i < q->clhash.hashsize; i++) {
1283
		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1284
			tcf_destroy_chain(&cl->filter_list);
1285
	}
1286
	for (i = 0; i < q->clhash.hashsize; i++) {
1287
		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1288
					  common.hnode)
1289
			htb_destroy_class(sch, cl);
1290
	}
1291
	qdisc_class_hash_destroy(&q->clhash);
1292
	__skb_queue_purge(&q->direct_queue);
1293
#if OFBUF
1294
    __skb_queue_purge(&q->ofbuf);
1295
    q->ofbuf_queued = 0;
1296
#endif
1297
}
1298

  
1299
static int htb_delete(struct Qdisc *sch, unsigned long arg)
1300
{
1301
	struct htb_sched *q = qdisc_priv(sch);
1302
	struct htb_class *cl = (struct htb_class *)arg;
1303
	unsigned int qlen;
1304
	struct Qdisc *new_q = NULL;
1305
	int last_child = 0;
1306

  
1307
	// TODO: why don't allow to delete subtree ? references ? does
1308
	// tc subsys quarantee us that in htb_destroy it holds no class
1309
	// refs so that we can remove children safely there ?
1310
	if (cl->children || cl->filter_cnt)
1311
		return -EBUSY;
1312

  
1313
	if (!cl->level && htb_parent_last_child(cl)) {
1314
		new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1315
					  cl->parent->common.classid);
1316
		last_child = 1;
1317
	}
1318

  
1319
	sch_tree_lock(sch);
1320

  
1321
	if (!cl->level) {
1322
		qlen = cl->un.leaf.q->q.qlen;
1323
		qdisc_reset(cl->un.leaf.q);
1324
		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1325
	}
1326

  
1327
	/* delete from hash and active; remainder in destroy_class */
1328
	qdisc_class_hash_remove(&q->clhash, &cl->common);
1329
	if (cl->parent)
1330
		cl->parent->children--;
1331

  
1332
	if (cl->prio_activity)
1333
		htb_deactivate(q, cl);
1334

  
1335
	if (cl->cmode != HTB_CAN_SEND)
1336
		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1337

  
1338
	if (last_child)
1339
		htb_parent_to_leaf(q, cl, new_q);
1340

  
1341
	BUG_ON(--cl->refcnt == 0);
1342
	/*
1343
	 * This shouldn't happen: we "hold" one cops->get() when called
1344
	 * from tc_ctl_tclass; the destroy method is done from cops->put().
1345
	 */
1346

  
1347
	sch_tree_unlock(sch);
1348
	return 0;
1349
}
1350

  
1351
static void htb_put(struct Qdisc *sch, unsigned long arg)
1352
{
1353
	struct htb_class *cl = (struct htb_class *)arg;
1354

  
1355
	if (--cl->refcnt == 0)
1356
		htb_destroy_class(sch, cl);
1357
}
1358

  
1359
static int htb_change_class(struct Qdisc *sch, u32 classid,
1360
			    u32 parentid, struct nlattr **tca,
1361
			    unsigned long *arg)
1362
{
1363
	int err = -EINVAL;
1364
	struct htb_sched *q = qdisc_priv(sch);
1365
	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1366
	struct nlattr *opt = tca[TCA_OPTIONS];
1367
	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1368
	struct nlattr *tb[__TCA_HTB_MAX];
1369
	struct tc_htb_opt *hopt;
1370

  
1371
	/* extract all subattrs from opt attr */
1372
	if (!opt)
1373
		goto failure;
1374

  
1375
	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1376
	if (err < 0)
1377
		goto failure;
1378

  
1379
	err = -EINVAL;
1380
	if (tb[TCA_HTB_PARMS] == NULL)
1381
		goto failure;
1382

  
1383
	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1384

  
1385
	hopt = nla_data(tb[TCA_HTB_PARMS]);
1386

  
1387
	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1388
	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1389
	if (!rtab || !ctab)
1390
		goto failure;
1391

  
1392
	if (!cl) {		/* new class */
1393
		struct Qdisc *new_q;
1394
		int prio;
1395
		struct {
1396
			struct nlattr		nla;
1397
			struct gnet_estimator	opt;
1398
		} est = {
1399
			.nla = {
1400
				.nla_len	= nla_attr_size(sizeof(est.opt)),
1401
				.nla_type	= TCA_RATE,
1402
			},
1403
			.opt = {
1404
				/* 4s interval, 16s averaging constant */
1405
				.interval	= 2,
1406
				.ewma_log	= 2,
1407
			},
1408
		};
1409

  
1410
		/* check for valid classid */
1411
		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1412
		    htb_find(classid, sch))
1413
			goto failure;
1414

  
1415
		/* check maximal depth */
1416
		if (parent && parent->parent && parent->parent->level < 2) {
1417
			pr_err("htb: tree is too deep\n");
1418
			goto failure;
1419
		}
1420
		err = -ENOBUFS;
1421
		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1422
		if (!cl)
1423
			goto failure;
1424

  
1425
		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1426
					qdisc_root_sleeping_lock(sch),
1427
					tca[TCA_RATE] ? : &est.nla);
1428
		if (err) {
1429
			kfree(cl);
1430
			goto failure;
1431
		}
1432

  
1433
		cl->refcnt = 1;
1434
		cl->children = 0;
1435
		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1436
		RB_CLEAR_NODE(&cl->pq_node);
1437

  
1438
		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1439
			RB_CLEAR_NODE(&cl->node[prio]);
1440

  
1441
		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1442
		 * so that can't be used inside of sch_tree_lock
1443
		 * -- thanks to Karlis Peisenieks
1444
		 */
1445
		new_q = qdisc_create_dflt(sch->dev_queue,
1446
					  &pfifo_qdisc_ops, classid);
1447
		sch_tree_lock(sch);
1448
		if (parent && !parent->level) {
1449
			unsigned int qlen = parent->un.leaf.q->q.qlen;
1450

  
1451
			/* turn parent into inner node */
1452
			qdisc_reset(parent->un.leaf.q);
1453
			qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1454
			qdisc_destroy(parent->un.leaf.q);
1455
			if (parent->prio_activity)
1456
				htb_deactivate(q, parent);
1457

  
1458
			/* remove from evt list because of level change */
1459
			if (parent->cmode != HTB_CAN_SEND) {
1460
				htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1461
				parent->cmode = HTB_CAN_SEND;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff