根据百度百科的解释,PCIE(peripheral component interconnect express)是一种高速串行计算机扩展总线标准,它原来的名称为“3GIO”,是由英特尔在2001年提出的,旨在替代旧的PCI,PCI-X和AGP总线标准。PCIe属于高速串行点对点双通道高带宽传输,所连接的设备分配独享通道带宽,不共享总线带宽,主要支持主动电源管理,错误报告,端对端的可靠性传输,热插拔以及服务质量(QOS)等功能。PCIe闪存卡的供应商包括:INTEL、IBM、LSI、OCZ、三星(计划中)、SanDisk、STEC、SuperTalent和东芝(计划中)等,而针对海量的数据增长使得用户对规模更大、可扩展性更强的系统所应用,PCIe 3.0技术的加入最新的LSI MegaRAID控制器及HBA产品的出色性能,就可以实现更大的系统设计灵活性。
查看系统中的PCIE设备
执行命令lspci命令,查看系统中的PCIE设备信息
可以看到,系统中有PCIE USB EHCI控制器,以及PCIE网卡,现在我们进一步用命令分析他们。命令格式是: sudo lspci -vvv -s #bdf,其中bdf是BUS,DEVICE,FUNCTION 的缩写,要了解它需要具备一些PCIE的基础知识,简单来说,挂载在PCIE总线上的PCIE设备尽管拓扑结构非常复杂,但是可以唯一的通过bus:device.function去定位,BUS很好理解,它代表的设备挂载的那条PCIE 总线ID,而一条BUS上可以挂载多个设备,通过device区分,对于每个device来说,可以具备多个function,默认的function 0都支持,可以类比一个USB设备可以支持多个配置,而默认的端点0的配置都是支持的。
话说回来,我们可以通过上述命令来针对某个设备DUMP更多的信息出来。
根据LSPCI的输出可以知道,网卡的BDF为02:00.0,我们通过截图中的命令,分析得到网卡设备的中断号为19,支持三个地址空间REGION,HOST端(CPU端)可以通过主机的MMAP函数将REGION中的存储区域映射到系统中进行访问,这样可以直接操纵版卡上的存储资源。另外根据输出信息,网卡支持MSI-X中断,MSI/MSI-X是基于消息传递的中断机制,MSI-X中断允许网卡固件向BAR空间的一片RINGBUF写入16个字节的数据,触发MSI中断,通知HOST端进行响应,是一种设备端和主机端高效的通知机制。
在看一下PCIE USB控制器,它的BDF为00:14.0
USB控制器支持1个REGION,大小64K(很可能是EHCI寄存器空间,有待证实),中断号为127,同样支持MSI/MSI-X中断机制。
基本可以确定PCI USB Controller的EHCI寄存器空间既是对PCI USB Controller BAR0的映射。 如下图所示:
dmesg print
you can see the EHCI length 64k is exactly identical with the output,and so does the base address 0xa4100000,.
modify the code and print the iomapped EHCI registers. according the iommap funcdtion.
the iomapped address and its content is:
use the user space mmap bar0 to display the content
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <errno.h>
// bdf:bus, device, function,bus number take 8bits, device number take 5bits, function number
// take 3bits,so, pcie RC support max 256 child buses,support 32 devices each buses, 8 functions each
// device. BUS0 is RC(root complex).
void dump_memory(unsigned char *buf, int len)
{
int i;
for(i = 0; i < len; i ++)
{
if(i % 16 == 0 )
printf("%p:", buf + i);
printf("0x%02x ", buf[i]);
if(i % 16 == 15)
printf("\n");
}
return;
}
int main(int argc, char **argv)
{
char *filename;
struct stat statbuf;
int bar=0;
// bdf address, domain:bus:slot.func
filename = "/sys/bus/pci/devices/0000:00:14.0/resource0";
printf("open file %s.\n", filename);
int fd = open(filename, O_RDWR | O_SYNC);
if(fd < 0)
{
printf("%s line %d, fatal error, open file failure.\n", __func__, __LINE__);
return -1;
}
int status = fstat(fd, &statbuf);
if(status < 0)
{
printf("%s line %d, status file failure.\n", __func__, __LINE__);
close(fd);
return -1;
}
printf("%s line %d, bar zone size %ld bytes, %ld Kbytes, %ld Mbytes, %ld Gbytes.\n", \
__func__, __LINE__, statbuf.st_size, statbuf.st_size/1024, statbuf.st_size/1024/1024, statbuf.st_size/1024/1024/1024);
unsigned char* maddr = (unsigned char *)mmap(NULL,(size_t)(statbuf.st_size),PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
if(maddr == (unsigned char *)MAP_FAILED)
{
printf("%s line %d, failure for mmap bar err %s.\n", __func__, __LINE__, strerror(errno));
close(fd);
return -1;
}
printf("%s line %d, fw 0x%p.\n", __func__, __LINE__, maddr);
filename = "/sys/bus/pci/devices/0000:00:14.0/config";
int fdcfg = open(filename, O_RDWR | O_SYNC);
if(fdcfg < 0)
{
printf("%s line %d, fatal error, open file failure.\n", __func__, __LINE__);
close(fd);
return -1;
}
status = lseek(fdcfg, 0x10 + 4*bar, SEEK_SET);
if(status < 0)
{
printf("%s line %d, status file failure.\n", __func__, __LINE__);
close(fd);
close(fdcfg);
return -1;
}
unsigned int phys;
status = read(fdcfg, &phys, 4);
if(status < 0)
{
printf("%s line %d, status file failure.\n", __func__, __LINE__);
close(fd);
close(fdcfg);
return -1;
}
printf("%s line %d phys 0x%x.\n", __func__, __LINE__, phys);
int offset = ((phys & 0xFFFFFFF0) % 0x1000);
unsigned char* addr = maddr + offset;
printf("%s line %d, addr = %p.\n", __func__, __LINE__, addr);
dump_memory(addr, 256);
close(fd);
close(fdcfg);
return 0;
}
以上代码依据的是PCIE的配置空间分布
user space print, you can find it is very same with the upper kernel printk output.
compare allwinner sunxi usb ECHI controller register, you can sure that we above really get the acdtual EHCI Base address from bar space.
the hcd->regs value origin from usb_vbase.
the usb_vbase are initialize from the DEVICE TREE.
which define the EHCI Base
up is the pci driver register flow,but we know device and driver are couples in linux driver, so where is the device register?
in pci driver, the devie register function is pci_device_add.
the device register flow is;
pci_device_add
acpi_init
写代码MAP BAR空间:
下面代码将网卡设备的BAR4空间映射到系统,并且打印部分数据.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <errno.h>
// bdf:bus, device, function,bus number take 8bits, device number take 5bits, function number
// take 3bits,so, pcie RC support max 256 child buses,support 32 devices each buses, 8 functions each
// device. BUS0 is RC(root complex).
void dump_memory(unsigned char *buf, int len)
{
int i;
for(i = 0; i < len; i ++)
{
if(i % 16 == 0 )
printf("%p:", buf + i);
printf("0x%02x ", buf[i]);
if(i % 16 == 15)
printf("\n");
}
return;
}
int main(int argc, char **argv)
{
char filename[256];
struct stat statbuf;
int domain = 0;
if(argc != 5)
{
printf("%s line %d, the command use like this: ./program bus slot function bar.\n", __func__, __LINE__);
return -1;
}
int bus = atoi(argv[1]);
int slot = atoi(argv[2]);
int func = atoi(argv[3]);
int bar = atoi(argv[4]);
memset(filename, 0x00, 256);
// bdf address, domain:bus:slot.func
snprintf(filename, 99, "/sys/bus/pci/devices/%04x:%02x:%02x.%1x/resource%d", domain, bus, slot, func, bar);
printf("open file %s.\n", filename);
int fd = open(filename, O_RDWR | O_SYNC);
if(fd < 0)
{
printf("%s line %d, fatal error, open file failure.\n", __func__, __LINE__);
return -1;
}
int status = fstat(fd, &statbuf);
if(status < 0)
{
printf("%s line %d, status file failure.\n", __func__, __LINE__);
close(fd);
return -1;
}
printf("%s line %d, bar zone size %ld bytes, %ld Kbytes, %ld Mbytes, %ld Gbytes.\n", \
__func__, __LINE__, statbuf.st_size, statbuf.st_size/1024, statbuf.st_size/1024/1024, statbuf.st_size/1024/1024/1024);
unsigned char* maddr = (unsigned char *)mmap(NULL,(size_t)(statbuf.st_size),PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);
if(maddr == (unsigned char *)MAP_FAILED)
{
printf("%s line %d, failure for mmap bar err %s.\n", __func__, __LINE__, strerror(errno));
close(fd);
return -1;
}
printf("%s line %d, fw 0x%p.\n", __func__, __LINE__, maddr);
memset(filename, 0x00, 256);
snprintf(filename, 99, "/sys/bus/pci/devices/%04x:%02x:%02x.%1x/config", domain, bus, slot, func);
int fdcfg = open(filename, O_RDWR | O_SYNC);
if(fdcfg < 0)
{
printf("%s line %d, fatal error, open file failure.\n", __func__, __LINE__);
close(fd);
return -1;
}
status = lseek(fdcfg, 0x10 + 4*bar, SEEK_SET);
if(status < 0)
{
printf("%s line %d, status file failure.\n", __func__, __LINE__);
close(fd);
close(fdcfg);
return -1;
}
unsigned int phys;
status = read(fdcfg, &phys, 4);
if(status < 0)
{
printf("%s line %d, status file failure.\n", __func__, __LINE__);
close(fd);
close(fdcfg);
return -1;
}
printf("%s line %d phys 0x%x.\n", __func__, __LINE__, phys);
int offset = ((phys & 0xFFFFFFF0) % 0x1000);
unsigned char* addr = maddr + offset;
printf("%s line %d, addr = %p.\n", __func__, __LINE__, addr);
dump_memory(addr, 256);
close(fd);
close(fdcfg);
return 0;
}
执行结果,可以看到正确的读出了BAR4空间的内存。
关于BAR空间映射:
代码中对BAR的映射基于resource节点
/sys/bus/pci/devices/%04x:%02x:%02x.%1x/resource%
resource节点在内核代码中的创建是在如下位置pci_create_attr:
发起调用的地方在sysfs_kf_bin_mmap:
map调用路径为pci_mmap_resource:
可以看到,对BAR空间的映射也是基于通用的IOMAP函数。
configuration
in pci device system directory including many different files:
config is a binary files, you can read the orignal configuration from this file. vendor, device, subsystem_device, subsystem_vendor, and class are all represents the specify value of pci device.
you also can get he IRQ NO from the the irq file.
install PCIE工具
sudo apt install pciutils-dev