运行
编译好dpdk示例程序之后,可使用以下命令运行程序。我编译的环境是绑定了两张dpdk网卡,主机是64核,2个numa节点。
./build/load_balancer -l 3-7 -n 4 -- --rx "(0,0,3),(1,0,3)" --tx "(0,3),(1,3)" --w "4,5,6,7" --lpm "100.200.1.0/24=>0; 100.200.2.0/24=>1;" --pos-lb 29
其中,参数表示:
- –rx “(PORT, QUEUE, LCORE), ...”: The list of NIC RX ports and queues handled by the I/O RX lcores. This parameter also implicitly defines the list of I/O RX lcores. This is a mandatory parameter.
- –tx “(PORT, LCORE), ... ”: The list of NIC TX ports handled by the I/O TX lcores. This parameter also implicitly defines the list of I/O TX lcores. This is a mandatory parameter.
- –w “LCORE, ...”: The list of the worker lcores. This is a mandatory parameter.
- –lpm “IP / PREFIX => PORT; ...”: The list of LPM rules used by the worker lcores for packet forwarding. This is a mandatory parameter.
- –pos-lb POS: The position of the 1-byte field within the input packet used by the I/O RX lcores to identify the worker lcore for the current packet. This field needs to be within the first 64 bytes of the input packet.
因此上面运行的命令的含义就是使用3-7号核,4个memory channels,其中rx使用3号核0号队列来收取port0、port1上的包;tx也使用3号核来发送port0、port1上的包。可以看出3号核是处理主I/O队列的。用4 、5、6、7号核来做worker核,这些worker核有自己的编号分别是0-3,分别对应4号核、5号核、6号核和7号核。3号来处理从workers过来的数据包,或者处理好发送给workers. lpm指定了workers核包转发的LPM规则,即l3路由规则,哪些网段的包到port0端口、哪些网段的包到port1端口。pos-lb指定了在input包中1-byte栏位地址。给I/O rx核(这里的3号核)分辨当前的worker核的输入包。该值必须在input包的前64字节内。这里29表示的是IP包头的29字节偏移量,刚好是dstip位置。
处理过程
处理过程就是如下图所示,rx使用0号队列从两张网卡上获取数据包,然后纷发给四个worker,每个worker做业务工作,这里就是LPM分流,最后tx获取到这些数据包转发到对应的端口。这里可以看出3号核处理I/O,四个worker负载均衡来做业务处理。
这里数据的处理逻辑可以从数据定义中看出:
struct app_lcore_params_io {
/* I/O RX */
struct {
/* NIC */
struct {
uint16_t port;
uint8_t queue;
} nic_queues[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE];
uint32_t n_nic_queues;
/* Rings */
struct rte_ring *rings[APP_MAX_WORKER_LCORES];
uint32_t n_rings;
/* Internal buffers */
struct app_mbuf_array mbuf_in;
struct app_mbuf_array mbuf_out[APP_MAX_WORKER_LCORES];
uint8_t mbuf_out_flush[APP_MAX_WORKER_LCORES];
/* Stats */
uint32_t nic_queues_count[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE];
uint32_t nic_queues_iters[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE];
uint32_t rings_count[APP_MAX_WORKER_LCORES];
uint32_t rings_iters[APP_MAX_WORKER_LCORES];
} rx;
/* I/O TX */
struct {
/* Rings */
struct rte_ring *rings[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES];
/* NIC */
uint16_t nic_ports[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
uint32_t n_nic_ports;
/* Internal buffers */
// [study] dequeue data from rings and put into mbuf_out
struct app_mbuf_array mbuf_out[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
uint8_t mbuf_out_flush[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
/* Stats */
uint32_t rings_count[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES];
uint32_t rings_iters[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES];
uint32_t nic_ports_count[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
uint32_t nic_ports_iters[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
} tx;
};
struct app_lcore_params_worker {
/* Rings */
// [study] point to rx.rings of app_lcore_params_io
struct rte_ring *rings_in[APP_MAX_IO_LCORES];
uint32_t n_rings_in;
// [study] point to tx.rings of app_lcore_params_io
struct rte_ring *rings_out[APP_MAX_NIC_PORTS];
/* LPM table */
struct rte_lpm *lpm_table;
uint32_t worker_id;
/* Internal buffers */
struct app_mbuf_array mbuf_in;
// [study] worker read packets from rings_in and split packets by LPM into different mbuf_out and next to rings_out
struct app_mbuf_array mbuf_out[APP_MAX_NIC_PORTS];
uint8_t mbuf_out_flush[APP_MAX_NIC_PORTS];
/* Stats */
uint32_t rings_in_count[APP_MAX_IO_LCORES];
uint32_t rings_in_iters[APP_MAX_IO_LCORES];
uint32_t rings_out_count[APP_MAX_NIC_PORTS];
uint32_t rings_out_iters[APP_MAX_NIC_PORTS];
};
标签:入门,MAX,APP,LoadbalanceSampleApplication,struct,rings,NIC,uint32,dpdk
From: https://www.cnblogs.com/janeysj/p/15219989.html