PCIe在調試過程中,經常會出現掃描不到對端EP設備的問題,在問題定位過程中,瞭解內核中pcie枚舉流程至關重要。
PCIe枚舉過程一般分爲三步:
1.創建根節點
2.掃描根節點下設備
3.爲根節點下設備分配資源
那麼如何發現設備?
從總線掃描pcie設備的函數pci_scan_child_bus開始分析
unsigned int pci_scan_child_bus(struct pci_bus *bus)
{
unsigned int devfn, pass, max = bus->busn_res.start;
struct pci_dev *dev;
dev_dbg(&bus->dev, "scanning bus\n");
/* Go find them, Rover! */
for (devfn = 0; devfn < 0x100; devfn += 8)
pci_scan_slot(bus, devfn);
/* Reserve buses for SR-IOV capability. */
max += pci_iov_bus_range(bus);
/*
* After performing arch-dependent fixup of the bus, look behind
* all PCI-to-PCI bridges on this bus.
*/
if (!bus->is_added) {
dev_dbg(&bus->dev, "fixups for bus\n");
pcibios_fixup_bus(bus);
bus->is_added = 1;
}
for (pass = 0; pass < 2; pass++)
list_for_each_entry(dev, &bus->devices, bus_list) {
if (pci_is_bridge(dev))
max = pci_scan_bridge(bus, dev, max, pass);
}
/*
* Make sure a hotplug bridge has at least the minimum requested
* number of buses.
*/
if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) {
if (max - bus->busn_res.start < pci_hotplug_bus_size - 1)
max = bus->busn_res.start + pci_hotplug_bus_size - 1;
}
/*
* We've scanned the bus and so we know all about what's on
* the other side of any bridges that may be on this bus plus
* any devices.
*
* Return how far we've got finding sub-buses.
*/
dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
return max;
}
EXPORT_SYMBOL_GPL(pci_scan_child_bus);
該函數的核心代碼爲
for (devfn = 0; devfn < 0x100; devfn += 8)
pci_scan_slot(bus, devfn);
這裏的bus變量來源於pci_create_root_bus,也就是創建的根總線的總線號
devfn : 設備和功能號。
這裏使用的是窮舉法,把所有的dev和function都嘗試一次。
pci_scan_slot函數:
int pci_scan_slot(struct pci_bus *bus, int devfn)
{
unsigned fn, nr = 0;
struct pci_dev *dev;
if (only_one_child(bus) && (devfn > 0))
return 0; /* Already scanned the entire slot */
dev = pci_scan_single_device(bus, devfn);
if (!dev)
return 0;
if (!dev->is_added)
nr++;
for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
dev = pci_scan_single_device(bus, devfn + fn);
if (dev) {
if (!dev->is_added)
nr++;
dev->multifunction = 1;
}
}
/* only one slot has pcie device */
if (bus->self && nr)
pcie_aspm_init_link_state(bus->self);
return nr;
}
pci_scan_single_device:
struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
dev = pci_get_slot(bus, devfn);
if (dev) {
pci_dev_put(dev);
return dev;
}
dev = pci_scan_device(bus, devfn);
if (!dev)
return NULL;
pci_device_add(dev, bus);
return dev;
}
核心函數爲pci_scan_device:
static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
u32 l;
if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
return NULL;
dev = pci_alloc_dev(bus);
if (!dev)
return NULL;
dev->devfn = devfn;
dev->vendor = l & 0xffff;
dev->device = (l >> 16) & 0xffff;
pci_set_of_node(dev);
if (pci_setup_device(dev)) {
pci_bus_put(dev->bus);
kfree(dev);
return NULL;
}
return dev;
}
pci_bus_read_dev_vendor_id會去讀設備的devid和venderid,如果讀不到,說明設備不存在了,如果讀到了,就會創建一個pdev。
所以能不能掃描對端設備,就看能不能讀到設備的vendorid。
bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
int crs_timeout)
{
int delay = 1;
if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
return false;
/* some broken boards return 0 or ~0 if a slot is empty: */
if (*l == 0xffffffff || *l == 0x00000000 ||
*l == 0x0000ffff || *l == 0xffff0000)
return false;
/*
* Configuration Request Retry Status. Some root ports return the
* actual device ID instead of the synthetic ID (0xFFFF) required
* by the PCIe spec. Ignore the device ID and only check for
* (vendor id == 1).
*/
while ((*l & 0xffff) == 0x0001) {
if (!crs_timeout)
return false;
msleep(delay);
delay *= 2;
if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
return false;
/* Card hasn't responded in 60 seconds? Must be stuck. */
if (delay > crs_timeout) {
printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n",
pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn));
return false;
}
}
return true;
}
while循環裏每隔一段時間就會進行一次venderid的配置讀寫,從上述代碼中可以看出,退出循環的原因會有兩個
1. config配置讀寫失敗了 (說明鏈路不通)
2. 超時沒有得到響應(預留時間爲60s,已經非常長了,第一點失敗的可能性更大)
所以掃描不到對端設備時:
1.確認建鏈是否成功,建鏈失敗,肯定掃描不到對端
2.確認對端的配置空間是否可寫(對端的pcie模塊是否處於解復位狀態)
3.確認type0,type1,iatu等參數配置是否正確,如果正確了,確認配置訪問的地址空間大小是否足夠。