提交 67159f1c 编写于 作者: M Michal Privoznik

bandwidth: Create hierarchical shaping classes

These classes can borrow unused bandwidth. Basically,
only egress qdsics can have classes, therefore we can
do this kind of traffic shaping only on host's outgoing,
that is domain's incoming traffic.
上级 ec6474b2
...@@ -341,7 +341,8 @@ static int virLXCProcessSetupInterfaceBridged(virConnectPtr conn, ...@@ -341,7 +341,8 @@ static int virLXCProcessSetupInterfaceBridged(virConnectPtr conn,
goto cleanup; goto cleanup;
if (virNetDevBandwidthSet(net->ifname, if (virNetDevBandwidthSet(net->ifname,
virDomainNetGetActualBandwidth(net)) < 0) { virDomainNetGetActualBandwidth(net),
false) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"), _("cannot set bandwidth limits on %s"),
net->ifname); net->ifname);
......
...@@ -2459,7 +2459,8 @@ networkStartNetworkVirtual(struct network_driver *driver, ...@@ -2459,7 +2459,8 @@ networkStartNetworkVirtual(struct network_driver *driver,
VIR_FORCE_CLOSE(tapfd); VIR_FORCE_CLOSE(tapfd);
} }
if (virNetDevBandwidthSet(network->def->bridge, network->def->bandwidth) < 0) { if (virNetDevBandwidthSet(network->def->bridge,
network->def->bandwidth, true) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"), _("cannot set bandwidth limits on %s"),
network->def->bridge); network->def->bridge);
......
...@@ -292,7 +292,8 @@ qemuNetworkIfaceConnect(virDomainDefPtr def, ...@@ -292,7 +292,8 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
if (tapfd >= 0 && if (tapfd >= 0 &&
virNetDevBandwidthSet(net->ifname, virNetDevBandwidthSet(net->ifname,
virDomainNetGetActualBandwidth(net)) < 0) { virDomainNetGetActualBandwidth(net),
false) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"), _("cannot set bandwidth limits on %s"),
net->ifname); net->ifname);
......
...@@ -9034,7 +9034,7 @@ qemuDomainSetInterfaceParameters(virDomainPtr dom, ...@@ -9034,7 +9034,7 @@ qemuDomainSetInterfaceParameters(virDomainPtr dom,
sizeof(*newBandwidth->out)); sizeof(*newBandwidth->out));
} }
if (virNetDevBandwidthSet(net->ifname, newBandwidth) < 0) { if (virNetDevBandwidthSet(net->ifname, newBandwidth, false) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"), _("cannot set bandwidth limits on %s"),
device); device);
......
...@@ -45,17 +45,21 @@ virNetDevBandwidthFree(virNetDevBandwidthPtr def) ...@@ -45,17 +45,21 @@ virNetDevBandwidthFree(virNetDevBandwidthPtr def)
* virNetDevBandwidthSet: * virNetDevBandwidthSet:
* @ifname: on which interface * @ifname: on which interface
* @bandwidth: rates to set (may be NULL) * @bandwidth: rates to set (may be NULL)
* @hierarchical_class: whether to create hierarchical class
* *
* This function enables QoS on specified interface * This function enables QoS on specified interface
* and set given traffic limits for both, incoming * and set given traffic limits for both, incoming
* and outgoing traffic. Any previous setting get * and outgoing traffic. Any previous setting get
* overwritten. * overwritten. If @hierarchical_class is TRUE, create
* hierarchical class. It is used to guarantee minimal
* throughput ('floor' attribute in NIC).
* *
* Return 0 on success, -1 otherwise. * Return 0 on success, -1 otherwise.
*/ */
int int
virNetDevBandwidthSet(const char *ifname, virNetDevBandwidthSet(const char *ifname,
virNetDevBandwidthPtr bandwidth) virNetDevBandwidthPtr bandwidth,
bool hierarchical_class)
{ {
int ret = -1; int ret = -1;
virCommandPtr cmd = NULL; virCommandPtr cmd = NULL;
...@@ -71,7 +75,7 @@ virNetDevBandwidthSet(const char *ifname, ...@@ -71,7 +75,7 @@ virNetDevBandwidthSet(const char *ifname,
virNetDevBandwidthClear(ifname); virNetDevBandwidthClear(ifname);
if (bandwidth->in) { if (bandwidth->in && bandwidth->in->average) {
if (virAsprintf(&average, "%llukbps", bandwidth->in->average) < 0) if (virAsprintf(&average, "%llukbps", bandwidth->in->average) < 0)
goto cleanup; goto cleanup;
if (bandwidth->in->peak && if (bandwidth->in->peak &&
...@@ -83,15 +87,89 @@ virNetDevBandwidthSet(const char *ifname, ...@@ -83,15 +87,89 @@ virNetDevBandwidthSet(const char *ifname,
cmd = virCommandNew(TC); cmd = virCommandNew(TC);
virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "root", virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "root",
"handle", "1:", "htb", "default", "1", NULL); "handle", "1:", "htb", "default",
hierarchical_class ? "2" : "1", NULL);
if (virCommandRun(cmd, NULL) < 0) if (virCommandRun(cmd, NULL) < 0)
goto cleanup; goto cleanup;
/* If we are creating a hierarchical class, all non guaranteed traffic
* goes to the 1:2 class which will adjust 'rate' dynamically as NICs
* with guaranteed throughput are plugged and unplugged. Class 1:1
* exists so we don't exceed the maximum limit for the network. For each
* NIC with guaranteed throughput a separate classid will be created.
* NB '1:' is just a shorter notation of '1:0'.
*
* To get a picture how this works:
*
* +-----+ +---------+ +-----------+ +-----------+ +-----+
* | | | qdisc | | class 1:1 | | class 1:2 | | |
* | NIC | | def 1:2 | | rate | | rate | | sfq |
* | | --> | | --> | peak | -+-> | peak | --> | |
* +-----+ +---------+ +-----------+ | +-----------+ +-----+
* |
* | +-----------+ +-----+
* | | class 1:3 | | |
* | | rate | | sfq |
* +-> | peak | --> | |
* | +-----------+ +-----+
* ...
* | +-----------+ +-----+
* | | class 1:n | | |
* | | rate | | sfq |
* +-> | peak | --> | |
* +-----------+ +-----+
*
* After the routing decision, when is it clear a packet is to be sent
* via a particular NIC, it is sent to the root qdisc (queueing
* discipline). In this case HTB (Hierarchical Token Bucket). It has
* only one direct child class (with id 1:1) which shapes the overall
* rate that is sent through the NIC. This class has at least one child
* (1:2) which is meant for all non-privileged (non guaranteed) traffic
* from all domains. Then, for each interface with guaranteed
* throughput, a separate class (1:n) is created. Imagine a class is a
* box. Whenever a packet ends up in a class it is stored in this box
* until the kernel sends it, then it is removed from box. Packets are
* placed into boxes based on rules (filters) - e.g. depending on
* destination IP/MAC address. If there is no rule to be applied, the
* root qdisc has a default where such packets go (1:2 in this case).
* Packets come in over and over again and boxes get filled more and
* more. Imagine that kernel sends packets just once a second. So it
* starts to traverse through this tree. It starts with the root qdisc
* and through 1:1 it gets to 1:2. It sends packets up to 1:2's 'rate'.
* Then it moves to 1:3 and again sends packets up to 1:3's 'rate'. The
* whole process is repeated until 1:n is processed. So now we have
* ensured each class its guaranteed bandwidth. If the sum of sent data
* doesn't exceed the 'rate' in 1:1 class, we can go further and send
* more packets. The rest of available bandwidth is distributed to the
* 1:2,1:3...1:n classes by ratio of their 'rate'. As soon as the root
* 'rate' limit is reached or there are no more packets to send, we stop
* sending and wait another second. Each class has an SFQ qdisc which
* shuffles packets in boxes stochastically, so one sender cannot
* starve others.
*
* Therefore, whenever we want to plug in a new guaranteed interface, we
* need to create a new class and adjust the 'rate' of the 1:2 class.
* When unplugging we do the exact opposite - remove the associated
* class, and adjust the 'rate'.
*
* This description is rather long, but it is still a good idea to read
* it before you dig into the code.
*/
if (hierarchical_class) {
virCommandFree(cmd);
cmd = virCommandNew(TC);
virCommandAddArgList(cmd, "class", "add", "dev", ifname, "parent",
"1:", "classid", "1:1", "htb", "rate", average,
"ceil", peak ? peak : average, NULL);
if (virCommandRun(cmd, NULL) < 0)
goto cleanup;
}
virCommandFree(cmd); virCommandFree(cmd);
cmd = virCommandNew(TC); cmd = virCommandNew(TC);
virCommandAddArgList(cmd,"class", "add", "dev", ifname, "parent", virCommandAddArgList(cmd,"class", "add", "dev", ifname, "parent",
"1:", "classid", "1:1", "htb", NULL); hierarchical_class ? "1:1" : "1:", "classid",
virCommandAddArgList(cmd, "rate", average, NULL); hierarchical_class ? "1:2" : "1:1", "htb",
"rate", average, NULL);
if (peak) if (peak)
virCommandAddArgList(cmd, "ceil", peak, NULL); virCommandAddArgList(cmd, "ceil", peak, NULL);
...@@ -104,7 +182,8 @@ virNetDevBandwidthSet(const char *ifname, ...@@ -104,7 +182,8 @@ virNetDevBandwidthSet(const char *ifname,
virCommandFree(cmd); virCommandFree(cmd);
cmd = virCommandNew(TC); cmd = virCommandNew(TC);
virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "parent", virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "parent",
"1:1", "handle", "2:", "sfq", "perturb", hierarchical_class ? "1:2" : "1:1",
"handle", "2:", "sfq", "perturb",
"10", NULL); "10", NULL);
if (virCommandRun(cmd, NULL) < 0) if (virCommandRun(cmd, NULL) < 0)
......
...@@ -42,7 +42,9 @@ struct _virNetDevBandwidth { ...@@ -42,7 +42,9 @@ struct _virNetDevBandwidth {
void virNetDevBandwidthFree(virNetDevBandwidthPtr def); void virNetDevBandwidthFree(virNetDevBandwidthPtr def);
int virNetDevBandwidthSet(const char *ifname, virNetDevBandwidthPtr bandwidth) int virNetDevBandwidthSet(const char *ifname,
virNetDevBandwidthPtr bandwidth,
bool hierarchical_class)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK;
int virNetDevBandwidthClear(const char *ifname) int virNetDevBandwidthClear(const char *ifname)
ATTRIBUTE_NONNULL(1); ATTRIBUTE_NONNULL(1);
......
...@@ -925,7 +925,7 @@ create_name: ...@@ -925,7 +925,7 @@ create_name:
rc = 0; rc = 0;
} }
if (virNetDevBandwidthSet(cr_ifname, bandwidth) < 0) { if (virNetDevBandwidthSet(cr_ifname, bandwidth, false) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, virReportError(VIR_ERR_INTERNAL_ERROR,
_("cannot set bandwidth limits on %s"), _("cannot set bandwidth limits on %s"),
cr_ifname); cr_ifname);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册