前言
- 之前使用了XILINX 提供的 DMA IP做了一个小实验,完成了PL和PS端通过DDR的方式进行数据交互,但是只能在XILINX的器件上使用,如果在一些国产FPGA板卡使用的话还需要使用的自己的IP, 幸运的是,MLK团队他们的FDMA造福无数人,代码质量比较优秀,于是在网上找到一个FDMA3.2版本的进行一个测试,完成LOOP的功能
- 既然要实现LOOP的功能就需要有一个临时存储空间来存储数据
基本图示
-
- DMA先从PS端DDR中读数据,到内部的FIFO
- DMA从PL端的FIFO读数据,到PS端DDR
- 但是这个流程不是将TX_ADDR中的数据完全存入到FIFO内,再从FIFO中将数据搬到RX_ADDR中,而是根据FIFO已有数据量来决定什么时候从PL端的FIFO读数据到PS端DDR
结果
-
PS 串口打印接口
-
检查10次
-
内存查看
-
波形查看
BD设计
-
地址分配
-
- 1:是控制UI-FDMA-DBUF-LOOP IP内部寄存器的起始地址,这个地址比较重要,因为要在SDK设计中使用,即使导出硬件描述文件后会有对应的参数的宏定义
- 2:DDR其实地址,但不代表从地址0就可以给PL端使用,因为DDR中从地址0开始有一部分是存放启动应用程序的,一般从0x100000地址开始就可以使用了
-
UI-FDMA-DBUF-LOOP IP参数
-
这些参数都很重要!!
-
uiFDMA IP参数
-
这里的数据位宽最大支持128bit,是因为SOC的AXI接口只支持128bit, 手册上也有说明,但没说原因,我猜测是为了使用最大突发长度增加传输效率,而且又要保证4K边界对齐
- 4K边界对齐:(AXI_DATA_width / 8) x AXI_BURST_LEN <= 4096
IP封装
- 自定义IP的打包和封装参考[[5-1 创建和打包AXI Interface IP]]
- M_UFDMA和S_UFDMA接口总线的封装参考[[5-3 自定义Interface BUS]]
- 自定义接口总线正确自动连线参考[[5-2 User Inteface BUS MAP]]
IP逻辑介绍
- uiFDMA IP 使用的MLK的RTL 封装单独讲
uiFDMA_DBUF_LOOP IP
- 读DDR状态机
-
R0_FS:为读开始信号,由PS端应用程序通过写UI-FDMA-DBUF-LOOP IP内部寄存器进行控制
-
fdma_rbusy:读忙碌信号,表示正在进行一次突发读,突发长度为IP设置参数
-
如果没有传输指定个数,继续传输,否则进入空闲态
-
这里有个点,什么时候fdma_rbusy信号拉高?
- if(fdma_wbusy == 1’b0 && W0_REQ)的时候
- W0_REQ <= (W0_rcnt > X_BURST_LEN - 2)&&(~W0_rbusy)
- W0_rcnt:是FIFO中已有的数据量,这是一个异步的过程
- X_BURST_LEN:指定的一次突发长度
- 当FIFO中的数据量大于一次突发写长度的时候就从DDR中写数据
- if(fdma_wbusy == 1’b0 && W0_REQ)的时候
- 写DDR状态机
-
W0_FS:为写开始信号,由PS端应用程序通过写UI-FDMA-DBUF-LOOP IP内部寄存器进行控制
-
fdma_wbusy:写忙碌信号,表示正在进行一次突发写,突发长度为IP设置参数
-
如果没有传输指定个数,继续传输,否则进入空闲态
-
同理,什么时候fdma_rbusy信号拉高?
- if(fdma_rbusy == 1’b0 && R0_REQ)的时候
- R0_REQ <= (R0_wcnt < X_BURST_LEN - 2)&&(~R0_wbusy)
- R0_wcnt:是FIFO中已有的数据量,这是一个异步的过程
- X_BURST_LEN:指定的一次突发长度
- 当FIFO中的数据量小于一次突发写长度的时候就从DDR中读数据
- if(fdma_rbusy == 1’b0 && R0_REQ)的时候
- 中断信号
- 读完成中断
- 在S_DATA2转换到S_IDLE状态的时候触发中断,表示一次完成的读结束
- 写完成中断
- 在S_DATA2转换到S_IDLE状态的时候触发中断,表示一次完成的写结束
- 读完成中断
控制寄存器
SDK开发
开发流程
主要技术栈
- 中断的使用 , 参考1-2 SDK Interrupt 测试
- 用户IP寄存器读写, 参考[4 AXI USER IP]
相关IP 参数
程序设计
#include "xparameters.h"
#include "xil_printf.h"
#include "xscugic.h"
#include "xil_exception.h"
#include <sleep.h>
/* define GIC parameters */
#define INTR_DEVICE_ID XPAR_SCUGIC_SINGLE_DEVICE_ID
#define FDMAW_INT_ID 121U
#define FDMAR_INT_ID 122U
/* define UI_FDMA_DBUF_LOOP control interface parameters */
#define FDMA_BASE_ADDR XPAR_UIFDMA_DBUF_LOOP_0_BASEADDR // 分配给 UI_FDMA_DBUF_LOOP IP核控制接口的基地址,其实就是BD分配的地址0x00_A000_0000
#define FDMA_DBUF_REG0 FDMA_BASE_ADDR
#define FDMA_DBUF_REG1 (FDMA_BASE_ADDR+0x4)
/* define UI_FDMA_DBUF_LOOP M_FDMA interface parameters */
#define FDMA_ADDR_OFFSET 0x00000000 // 这个值与 UI_FDMA_DBUF_LOOP IP核 Addr Offset参数一致
#define FDMA_BUF_BASE_ADDR (XPAR_PSU_DDR_0_S_AXI_BASEADDR+FDMA_ADDR_OFFSET)
#define FDMA_RBUF0_ADDR (FDMA_BUF_BASE_ADDR + 0x1000000) // 偏移量最低位16M 这个和 UI_FDMA_DBUF_LOOP 内部设置有关
#define FDMA_RBUF1_ADDR (FDMA_BUF_BASE_ADDR + 0x2000000)
#define TEST_TIMES 10
#define TEST_SIZE 480 * 1080 * 4 // 这个值与UI_FDMA_DBUF_LOOP IP核参数有关( x_cnt*y_cnt*(axi_data_width/32) ) -- 除去32是因为axi_data_width是128,但是写入数据位宽是32位(u32)
u32 *PS_TxBufferPtr = (u32 *)FDMA_RBUF0_ADDR;
u32 *PS_RxBufferPtr = (u32 *)FDMA_RBUF1_ADDR;
XScuGic Intc; //GIC
int PS_RxDone = 0;
int main(void)
{
int Status;
Status = Init_Intr_System(&Intc, INTR_DEVICE_ID, FDMAW_INT_ID, FDMAR_INT_ID);
if (Status != XST_SUCCESS)
{
xil_printf("Init_Intr_System failed\r\n");
return XST_FAILURE;
}
xil_printf("Init_Intr_System success\r\n");
//reset fdma_dbufc
Xil_Out32(FDMA_DBUF_REG0,0); // 初始化 data reg 0
Xil_Out32(FDMA_DBUF_REG1,0); // 初始化 reset reg 1
usleep(100);
Xil_Out32(FDMA_DBUF_REG1,1); // enable fdma_dbufc reset
u32 Value = 0;
for (int i = 0; i < TEST_TIMES; i++)
{
Xil_Out32(FDMA_DBUF_REG0, 0x00000201); // 设置读写的地址
Value = i;
for (int Index = 0; Index < TEST_SIZE; Index++)
{
PS_TxBufferPtr[Index] = Value;
Value = Value + 1;
}
/* Flush the SrcBuffer before the DMA transfer, in case the Data Cache
* is enabled
*/
Xil_DCacheFlushRange((u32)PS_TxBufferPtr, TEST_SIZE * sizeof(u32)); //
// eable fdma_dbufc read and write
Xil_Out32(FDMA_DBUF_REG0, 0x00030201);
// Wait TX done and RX done
while (!PS_RxDone)
{
}
PS_RxDone = 0;
/*
* Test finished, check data
*/
Status = FDMA_CheckData(TEST_SIZE, i);
if (Status != XST_SUCCESS)
{
xil_printf("Data check failed = %d\r\n", i);
}
else
xil_printf("Data check success = %d\n\r", i);
}
return 0;
}
/* 读数据并检查数据 */
int FDMA_CheckData(int Length, u32 StartValue)
{
u32 Value;
Value = StartValue;
Xil_DCacheInvalidateRange((u32)PS_RxBufferPtr, Length*4);
for(int Index = 0; Index < Length; Index++) {
if (PS_RxBufferPtr[Index] != Value) {
xil_printf("Data error %d: %x/%x\r\n",Index, PS_RxBufferPtr[Index], Value);
return XST_FAILURE;
}
Value = Value + 1;
}
return XST_SUCCESS;
}
/* 初始化中断和建立中断函数 */
int Init_Intr_System(XScuGic *InstancePtr, u16 DeviceId, u16 IntrID_1, u16 IntrID_2)
{
int Status;
XScuGic_Config *IntcConfig;
// Initialize the interrupt controller driver so that it is ready to use.
IntcConfig = XScuGic_LookupConfig(DeviceId);
if (NULL == IntcConfig) {
return XST_FAILURE;
}
Status = XScuGic_CfgInitialize(InstancePtr, IntcConfig, IntcConfig->CpuBaseAddress);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
// Initialize the exception table
Xil_ExceptionInit();
// Connect the interrupt controller interrupt handler to the hardware interrupt handling logic in the processor.
Xil_ExceptionRegisterHandler(XIL_EXCEPTION_ID_IRQ_INT, (Xil_ExceptionHandler)XScuGic_InterruptHandler, InstancePtr);
// Enable interrupts in the Processor.
Xil_ExceptionEnable();
/* 设置优先级和触发方式 */
XScuGic_SetPriorityTriggerType(InstancePtr, IntrID_1, 0xA0, 0x3);
XScuGic_SetPriorityTriggerType(InstancePtr, IntrID_2, 0xA0, 0x3);
Status = XScuGic_Connect(InstancePtr, IntrID_1, (Xil_ExceptionHandler)PS_RX_intr_Handler, (void *)1);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
Status = XScuGic_Connect(InstancePtr, IntrID_2, (Xil_ExceptionHandler)PS_TX_intr_Handler, (void *)2);
if (Status != XST_SUCCESS) {
return XST_FAILURE;
}
XScuGic_Enable(InstancePtr, IntrID_1);
XScuGic_Enable(InstancePtr, IntrID_2);
return XST_SUCCESS;
}
void PS_RX_intr_Handler(void *param)
{
PS_RxDone = 1;
}
void PS_TX_intr_Handler(void *param)
{
return ;
}
UI_FDMA_DBUF_LOOP RTL使用的是MLK的
`timescale 1ns / 1ps
/*
Company : Liyang Milian Electronic Technology Co., Ltd.
Brand: 米联客(milianke)
Technical forum:www.uisrc.com
taobao1: https://milianke.taobao.com
taobao2: https://osrc.taobao.com
jd:https://milianke.jd.com
Create Date: 2019/12/17
Module Name: uidbufc_loop
Description:
Copyright: Copyright (c) milianke
Revision: 1.0
Signal description:
1) _i input
2) _o output
3) _n activ low
4) _dg debug signal
5) _r delay or register
6) _s state mechine
*/
//
module uidbufc_loop#(
parameter integer ADDR_OFFSET = 0,
parameter integer Y_CNT = 1080,
parameter integer X_CNT = 480,
parameter integer X_DIV = 2,
parameter integer FDMA_WIDTH = 128,
parameter integer FIFO_DEPTH = 2048
)
(
input ui_clk,
input ui_rstn,
input [7:0] W0_Fbuf,
input [7:0] R0_Fbuf,
input W0_FS_i,
input R0_FS_i,
//----------fdma signals write-------
output [31: 0] fdma_waddr,
output reg fdma_wareq= 1'b0,
output [19: 0] fdma_wsize,
input fdma_wbusy,
output [127:0] fdma_wdata,
input fdma_wvalid,
output reg fdma_wready= 1'b0,
output fdma_wirq,
//----------fdma signals read-------
output [31: 0] fdma_raddr,
output reg fdma_rareq= 1'b0,
output [19: 0] fdma_rsize,
input fdma_rbusy,
input [127:0] fdma_rdata,
input fdma_rvalid,
output reg fdma_rready = 1'b0,
output fdma_rirq
);
function integer clogb2 (input integer bit_depth);
begin
for(clogb2=0; bit_depth>0; clogb2=clogb2+1)
bit_depth = bit_depth >> 1;
end
endfunction
localparam Y_BURST_CNT = Y_CNT*X_DIV;//一共需要多少FDMA
localparam X_BURST_LEN = X_CNT/X_DIV;//X FDMA BURST 长度,BURST_DIV越大,每次FDMA BURST 数量量越少,也就是需要更多次数,但是可以减少FIFO资源
localparam X_ADDR_SIZE = X_CNT*16/X_DIV;//计算每次X BURST的地址大小以BYTE 为单位
localparam FIFO_COUNT_WIDTH = clogb2(FIFO_DEPTH - 1) + 1;
localparam S_IDLE = 2'd0;
localparam S_DATA1 = 2'd2;
localparam S_DATA2 = 2'd3;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) wire W0_FS;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) reg [1 :0] W_MS=0;
reg [23:0] W0_addr=0;
reg [15:0] W0_bcnt=0;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) wire[FIFO_COUNT_WIDTH-1'b1:0] W0_rcnt;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) reg W0_REQ=0;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) reg [3 :0] irq_dly_cnt =0;
assign fmda_wbuf = W0_Fbuf;
assign fdma_wsize = X_BURST_LEN;
assign fdma_wirq = (irq_dly_cnt>0);
assign fdma_waddr = {W0_Fbuf,W0_addr}+ ADDR_OFFSET;
//generate if(ENABLE_WRITE == 1)begin : FDMA_WRITE
always @(posedge ui_clk) begin
if(ui_rstn == 1'b0)begin
irq_dly_cnt <= 4'd0;
end
else if(W0_bcnt == Y_BURST_CNT - 1'b1)
irq_dly_cnt <= 15;
else if(irq_dly_cnt >0)
irq_dly_cnt <= irq_dly_cnt - 1'b1;
end
// 打拍跨时钟域
fs_cap fs_cap_W0(
.clk_i(ui_clk),
.rstn_i(ui_rstn),
.vs_i(W0_FS_i),
.fs_cap_o(W0_FS)
);
//assign pkg_wr_data = W0_fcnt;
///--------一副图像写入DDR------------
always @(posedge ui_clk) begin
if(!ui_rstn)begin
W_MS <= S_IDLE;
W0_addr <= 24'd0;
fdma_wareq <= 1'd0;
fdma_wready <= 1'b0;
W0_bcnt <= 0;
end
else begin
case(W_MS)
S_IDLE:begin
W0_addr <= 24'd0;
W0_bcnt <= 16'd0;
if(W0_FS)
W_MS <= S_DATA1;
end
S_DATA1:begin
if(fdma_wbusy == 1'b0 && W0_REQ )begin
fdma_wready <= 1'b1;
fdma_wareq <= 1'b1;
end
else if(fdma_wbusy == 1'b1) begin
fdma_wareq <= 1'b0;
W_MS <= S_DATA2;
end
end
S_DATA2:begin
if(fdma_wbusy == 1'b0)begin
fdma_wready <= 1'b0;
if(W0_bcnt == Y_BURST_CNT - 1'b1)
W_MS <= S_IDLE;
else begin
W0_addr <= W0_addr + X_ADDR_SIZE;
W0_bcnt <= W0_bcnt + 1'b1;
W_MS <= S_DATA1;
end
end
end
default:W_MS <= S_IDLE;
endcase
end
end
wire W0_rbusy,W0_wfull,W0_rempty;
always@(posedge ui_clk)
W0_REQ <= (W0_rcnt > X_BURST_LEN - 2)&&(~W0_rbusy);
(*mark_debug = "true"*) (* KEEP = "TRUE" *) wire R0_FS;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) reg [3 :0] rirq_dly_cnt =0;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) reg [1 :0] R_MS =0;
reg [23:0] R0_addr =0;
reg [15:0] R0_bcnt =0;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) wire[FIFO_COUNT_WIDTH-1'b1:0] R0_wcnt;
(*mark_debug = "true"*) (* KEEP = "TRUE" *) reg R0_REQ = 0;
assign fmda_rbuf = R0_Fbuf;
assign fdma_rsize = X_BURST_LEN;
assign fdma_rirq = (rirq_dly_cnt>0);
assign fdma_raddr = {R0_Fbuf,R0_addr}+ ADDR_OFFSET;
//generate if(ENABLE_READ == 1)begin : FDMA_READ
always @(posedge ui_clk) begin
if(ui_rstn == 1'b0)begin
rirq_dly_cnt <= 4'd0;
end
else if(R0_bcnt == Y_BURST_CNT - 1'b1)
rirq_dly_cnt <= 15;
else if(rirq_dly_cnt >0)
rirq_dly_cnt <= rirq_dly_cnt - 1'b1;
end
fs_cap fs_cap_R0(
.clk_i(ui_clk),
.rstn_i(ui_rstn),
.vs_i(R0_FS_i),
.fs_cap_o(R0_FS)
);
//--------一副图像读出DDR------------
always @(posedge ui_clk) begin
if(!ui_rstn)begin
R_MS <= S_IDLE;
R0_addr <= 24'd0;
fdma_rareq <= 1'd0;
fdma_rready <= 1'b0;
R0_bcnt <= 0;
end
else begin
case(R_MS)
S_IDLE:begin
R0_addr <= 24'd0;
R0_bcnt <= 16'd0;
if(R0_FS)
R_MS <= S_DATA1;
end
S_DATA1:begin
if(fdma_rbusy == 1'b0 && R0_REQ )begin
fdma_rready <= 1'b1;
fdma_rareq <= 1'b1;
end
else if(fdma_rbusy == 1'b1) begin
fdma_rareq <= 1'b0;
R_MS <= S_DATA2;
end
end
S_DATA2:begin
if(fdma_rbusy == 1'b0)begin
fdma_rready <= 1'b0;
if(R0_bcnt == Y_BURST_CNT - 1'b1)
R_MS <= S_IDLE;
else begin
R0_addr <= R0_addr + X_ADDR_SIZE;
R0_bcnt <= R0_bcnt + 1'b1;
R_MS <= S_DATA1;
end
end
end
default:R_MS <= S_IDLE;
endcase
end
end
//ila_0 ila_db (
// .clk(ui_clk),
// .probe0({R0_REQ,W0_REQ,R_MS,W_MS,fdma_wirq,fdma_rirq})
//);
wire R0_wbusy;
always@(posedge ui_clk)
R0_REQ <= (R0_wcnt < X_BURST_LEN - 2)&&(~R0_wbusy);
xpm_fifo_async # (
.FIFO_MEMORY_TYPE ("auto"), //string; "auto", "block", or "distributed";
.ECC_MODE ("no_ecc"), //string; "no_ecc" or "en_ecc";
.RELATED_CLOCKS (0), //positive integer; 0 or 1
.FIFO_WRITE_DEPTH (FIFO_DEPTH), //positive integer
.WRITE_DATA_WIDTH (128), //positive integer
.WR_DATA_COUNT_WIDTH (FIFO_COUNT_WIDTH), //positive integer
.PROG_FULL_THRESH (7), //positive integer
.FULL_RESET_VALUE (0), //positive integer; 0 or 1
.USE_ADV_FEATURES ("0707"), //string; "0000" to "1F1F";
.READ_MODE ("fwft"), //string; "std" or "fwft";
.FIFO_READ_LATENCY (0), //positive integer;
.READ_DATA_WIDTH (128), //positive integer
.RD_DATA_COUNT_WIDTH (FIFO_COUNT_WIDTH), //positive integer
.PROG_EMPTY_THRESH (5), //positive integer
.DOUT_RESET_VALUE ("0"), //string
.CDC_SYNC_STAGES (2), //positive integer
.WAKEUP_TIME (0) //positive integer; 0 or 2;
) xpm_fifo_R0_inst (
.rst (ui_rstn==1'b0),
.wr_clk (ui_clk),
.wr_en (fdma_rvalid),
.din (fdma_rdata),
.full (),
.overflow (),
.prog_full (),
.wr_data_count (R0_wcnt),
.almost_full (R0_wfull),
.wr_ack (),
.wr_rst_busy (R0_wbusy),
.rd_clk (ui_clk),
.rd_en (fdma_wvalid),
.dout (fdma_wdata),
.empty (W0_rempty),
.underflow (),
.rd_rst_busy (W0_rbusy),
.prog_empty (),
.rd_data_count (W0_rcnt),
.almost_empty (),
.data_valid (W0_dvalid),
.sleep (1'b0),
.injectsbiterr (1'b0),
.injectdbiterr (1'b0),
.sbiterr (),
.dbiterr ()
);
//end
//endgenerate
endmodule
标签:PS,ADDR,FDMA,IP,fdma,LOOP,DBUF
From: https://blog.csdn.net/ssh18581030544/article/details/145212912