AER及linux內核驅動簡介：https://blog.csdn.net/u010443710/article/details/104649179

1. AER 中斷

首先AER驅動作爲錯誤上報和處理的機制，必須有一個錯誤上報的入口。

這個入口就是AER中斷。linux的AER驅動只針對RC，也就是說軟件需要處理RC的AER中斷請求。

並在中斷處理函數中通過AER 寄存器來判斷錯誤類型並作出相應處理。

1.1 AER中斷產生

在PCIe spec中定義了2種AER中斷產生方式，類似於ep設備，可以選擇legacy的INTx或者MSI/MSIx的方式來產生中斷。

但對於RC而言，無論是INTx還是MSI/MSIx，都不需要像ep那樣真的來觸發INTx邊帶信號或發送MSI tlp來告知RC。

因爲RC作爲根節點，內部的中斷就是報給自己，可以直接在chip內部處理，不需要在PCIe協議上走一圈。

這就涉及RC內部中斷上報的機制，由於RC內部中斷不僅限於AER中斷，所以這部分單獨開一篇進行闡述。

1.2 如何使能AER中斷?

AER Capability -> Root Error Command Register (Offset 2Ch)

打開相應的報告使能bit位，當錯誤發生後，就會有中斷產生。

2. AER驅動

AER驅動與PME、pciehp、pcie-dpc一樣是作爲pcie port的可選service。

service掛載在pcie port驅動上，由portdrv_core進行管理，service通過pcie_port_service_register進行註冊。

具體來說，service的數據結構如下：

struct pcie_port_service_driver {
	const char *name;
	int (*probe) (struct pcie_device *dev);
	void (*remove) (struct pcie_device *dev);
	int (*suspend) (struct pcie_device *dev);
	int (*resume) (struct pcie_device *dev);

	/* Service Error Recovery Handler */
	const struct pci_error_handlers *err_handler;

	/* Link Reset Capability - AER service driver specific */
	pci_ers_result_t (*reset_link) (struct pci_dev *dev);

	int port_type;  /* Type of the port this driver can handle */
	u32 service;    /* Port service this device represents */

	struct device_driver driver;
};

以下是AER的service結構

static struct pcie_port_service_driver aerdriver = {
	.name		= "aer",
	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
	.service	= PCIE_PORT_SERVICE_AER,

	.probe		= aer_probe,
	.remove		= aer_remove,

	.err_handler	= &aer_error_handlers,

	.reset_link	= aer_root_reset,
};

2.1 初始化（aer_probe）

AER初始化主要完成2件事情：

爲錯誤處理入口aer_irq，申請中斷；request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev);
配置AER功能相關的cap寄存器，打開AER能使，中斷上報使能等；aer_enable_rootport(rpc);

/**
 * aer_probe - initialize resources
 * @dev: pointer to the pcie_dev data structure
 *
 * Invoked when PCI Express bus loads AER service driver.
 */
static int aer_probe(struct pcie_device *dev)
{
	int status;
	struct aer_rpc *rpc;
	struct device *device = &dev->device;

	/* Alloc rpc data structure */
	rpc = aer_alloc_rpc(dev);
	if (!rpc) {
		dev_printk(KERN_DEBUG, device, "alloc rpc failed\n");
		aer_remove(dev);
		return -ENOMEM;
	}

	/* Request IRQ ISR */
	status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev);
	if (status) {
		dev_printk(KERN_DEBUG, device, "request IRQ failed\n");
		aer_remove(dev);
		return status;
	}

	rpc->isr = 1;

	aer_enable_rootport(rpc);

	return status;
}

這裏的中斷向量dev->irq，是pcie port驅動初始化時已經申請好了可能是lagecy的或者是MSI/MSIx，AER只需要再註冊一個share中斷上去。

看一下aer_enable_rootport，先清楚了所有device的狀態位，再把上下游設備的錯誤上報全部使能。

/**
 * aer_enable_rootport - enable Root Port's interrupts when receiving messages
 * @rpc: pointer to a Root Port data structure
 *
 * Invoked when PCIe bus loads AER service driver.
 */
static void aer_enable_rootport(struct aer_rpc *rpc)
{
	struct pci_dev *pdev = rpc->rpd->port;
	int aer_pos;
	u16 reg16;
	u32 reg32;

	/* Clear PCIe Capability's Device Status */
	pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, &reg16);
	pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);

	/* Disable system error generation in response to error messages */
	pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
				   SYSTEM_ERROR_INTR_ON_MESG_MASK);

	aer_pos = pdev->aer_cap;
	/* Clear error status */
	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
	pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
	pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
	pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
	pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);

	/*
	 * Enable error reporting for the root port device and downstream port
	 * devices.
	 */
	set_downstream_devices_error_reporting(pdev, true);

	/* Enable Root Port's interrupt in response to error messages */
	pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, &reg32);
	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
	pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32);
}

2.2 中斷處理

AER中斷分爲上下半部，上半部aer_irq：

irqreturn_t aer_irq(int irq, void *context)
{
	unsigned int status, id;
	struct pcie_device *pdev = (struct pcie_device *)context;
	struct aer_rpc *rpc = get_service_data(pdev);
	int next_prod_idx;
	unsigned long flags;
	int pos;

	pos = pdev->port->aer_cap;
	/*
	 * Must lock access to Root Error Status Reg, Root Error ID Reg,
	 * and Root error producer/consumer index
	 */
	spin_lock_irqsave(&rpc->e_lock, flags);

	/* Read error status */
	pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status);
	if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) {
		spin_unlock_irqrestore(&rpc->e_lock, flags);
		return IRQ_NONE;
	}

	/* Read error source and clear error status */
	pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id);
	pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status);

	/* Store error source for later DPC handler */
	next_prod_idx = rpc->prod_idx + 1;
	if (next_prod_idx == AER_ERROR_SOURCES_MAX)
		next_prod_idx = 0;
	if (next_prod_idx == rpc->cons_idx) {
		/*
		 * Error Storm Condition - possibly the same error occurred.
		 * Drop the error.
		 */
		spin_unlock_irqrestore(&rpc->e_lock, flags);
		return IRQ_HANDLED;
	}
	rpc->e_sources[rpc->prod_idx].status =  status;
	rpc->e_sources[rpc->prod_idx].id = id;
	rpc->prod_idx = next_prod_idx;
	spin_unlock_irqrestore(&rpc->e_lock, flags);

	/*  Invoke DPC handler */
	schedule_work(&rpc->dpc_handler);

	return IRQ_HANDLED;
}

irq中首先讀取 Root Error Status Register，看看是不是有錯誤產生了。（PCI_ERR_ROOT_STATUS）

獲取錯誤源ID，Error Source Identification Register，PCI_ERR_ROOT_ERR_SRC

並保存在rpc->e_sources數組裏面。

中斷處理下半部：aer_isr，這是一個worker

/**
 * aer_isr - consume errors detected by root port
 * @work: definition of this work item
 *
 * Invoked, as DPC, when root port records new detected error
 */
void aer_isr(struct work_struct *work)
{
	struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
	struct pcie_device *p_device = rpc->rpd;
	struct aer_err_source uninitialized_var(e_src);

	mutex_lock(&rpc->rpc_mutex);
	while (get_e_source(rpc, &e_src))
		aer_isr_one_error(p_device, &e_src);
	mutex_unlock(&rpc->rpc_mutex);
}

把剛纔上半部保存在rpc->e_sources裏面的錯誤源取出來，一個一個調用aer_isr_one_error進行處理

/**
 * aer_isr_one_error - consume an error detected by root port
 * @p_device: pointer to error root port service device
 * @e_src: pointer to an error source
 */
static void aer_isr_one_error(struct pcie_device *p_device,
		struct aer_err_source *e_src)
{
	struct aer_rpc *rpc = get_service_data(p_device);
	struct aer_err_info *e_info = &rpc->e_info;

	/*
	 * There is a possibility that both correctable error and
	 * uncorrectable error being logged. Report correctable error first.
	 */
	if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
		e_info->id = ERR_COR_ID(e_src->id);
		e_info->severity = AER_CORRECTABLE;

		if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
			e_info->multi_error_valid = 1;
		else
			e_info->multi_error_valid = 0;

		aer_print_port_info(p_device->port, e_info);

		if (find_source_device(p_device->port, e_info))
			aer_process_err_devices(p_device, e_info);
	}

	if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
		e_info->id = ERR_UNCOR_ID(e_src->id);

		if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
			e_info->severity = AER_FATAL;
		else
			e_info->severity = AER_NONFATAL;

		if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
			e_info->multi_error_valid = 1;
		else
			e_info->multi_error_valid = 0;

		aer_print_port_info(p_device->port, e_info);

		if (find_source_device(p_device->port, e_info))
			aer_process_err_devices(p_device, e_info);
	}
}

aer_isr_one_error就是處理錯誤的具體實現了，這裏按照AER的不同類型進行錯誤報告或者恢復處理。

【PCIe】AER linux 驅動淺析

1. AER 中斷

1.1 AER中斷產生

1.2 如何使能AER中斷?

2. AER驅動

2.1 初始化（aer_probe）

2.2 中斷處理

[SDIO] SD card 初始化及常用命令解析（附波形，uboot代碼）

[CAN BUS] USB-CAN adpter / USB轉CAN 開源項目推薦（CANable candlelight cangaroo）

[PCIe] SR-IOV （單根虛擬化）及linux驅動淺析（device的PF和VF及其驅動）

eMMC Sampling Tuning and linux mmc driver support

[linux] .gpg 格式加密文件解密操作

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結