Skip to content

Commit 665745f

Browse files
ij-intelbjorn-helgaas
authored andcommitted
PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller
This mostly reverts the commit b4c7d20 ("PCI/LINK: Remove bandwidth notification"). An upcoming commit extends this driver building PCIe bandwidth controller on top of it. PCIe bandwidth notifications were first added in the commit e8303bb ("PCI/LINK: Report degraded links via link bandwidth notification") but later had to be removed. The significant changes compared with the old bandwidth notification driver include: 1) Don't print the notifications into kernel log, just keep the Link Speed cached in struct pci_bus updated. While somewhat unfortunate, the log spam was the source of complaints that eventually lead to the removal of the bandwidth notifications driver (see the links below for further information). 2) Besides the Link Bandwidth Management Interrupt, also enable Link Autonomous Bandwidth Interrupt to cover the other source of bandwidth changes. 3) Handle Link Speed updates robustly. Refresh the cached Link Speed when enabling Bandwidth Notification Interrupts, and solve the race between Link Speed read and LBMS/LABS update in pcie_bwnotif_irq_thread(). 4) Use concurrency safe LNKCTL RMW operations. 5) The driver is now called PCIe bwctrl (bandwidth controller) instead of just bandwidth notifications because of increased scope and functionality within the driver. 6) Coexist with the Target Link Speed quirk in pcie_failed_link_retrain(). Provide LBMS counting API for it. 7) Tweaks to variable/functions names for consistency and length reasons. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. [bhelgaas: This is based on previous work by Alexandru Gagniuc <mr.nuke.me@gmail.com>; see e8303bb ("PCI/LINK: Report degraded links via link bandwidth notification")] Link: https://lore.kernel.org/r/20241018144755.7875-7-ilpo.jarvinen@linux.intel.com Link: https://lore.kernel.org/all/20190429185611.121751-1-helgaas@kernel.org/ Link: https://lore.kernel.org/linux-pci/20190501142942.26972-1-keith.busch@intel.com/ Link: https://lore.kernel.org/linux-pci/20200115221008.GA191037@google.com/ Suggested-by: Lukas Wunner <lukas@wunner.de> # Building bwctrl on top of bwnotif Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> [bhelgaas: squash fix to drop IRQF_ONESHOT and convert to hardirq handler: https://lore.kernel.org/r/20241115165717.15233-1-ilpo.jarvinen@linux.intel.com] Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Tested-by: Stefan Wahren <wahrenst@gmx.net> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
1 parent 3491f50 commit 665745f

10 files changed

Lines changed: 229 additions & 9 deletions

File tree

MAINTAINERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17933,6 +17933,12 @@ F: include/linux/of_pci.h
1793317933
F: include/linux/pci*
1793417934
F: include/uapi/linux/pci*
1793517935

17936+
PCIE BANDWIDTH CONTROLLER
17937+
M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
17938+
L: linux-pci@vger.kernel.org
17939+
S: Supported
17940+
F: drivers/pci/pcie/bwctrl.c
17941+
1793617942
PCIE DRIVER FOR AMAZON ANNAPURNA LABS
1793717943
M: Jonathan Chocron <jonnyc@amazon.com>
1793817944
L: linux-pci@vger.kernel.org

drivers/pci/hotplug/pciehp_ctrl.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include <linux/types.h>
2020
#include <linux/pm_runtime.h>
2121
#include <linux/pci.h>
22+
23+
#include "../pci.h"
2224
#include "pciehp.h"
2325

2426
/* The following routines constitute the bulk of the
@@ -127,6 +129,9 @@ static void remove_board(struct controller *ctrl, bool safe_removal)
127129

128130
pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
129131
INDICATOR_NOOP);
132+
133+
/* Don't carry LBMS indications across */
134+
pcie_reset_lbms_count(ctrl->pcie->port);
130135
}
131136

132137
static int pciehp_enable_slot(struct controller *ctrl);

drivers/pci/pci.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4740,7 +4740,7 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
47404740
* to track link speed or width changes made by hardware itself
47414741
* in attempt to correct unreliable link operation.
47424742
*/
4743-
pcie_capability_write_word(pdev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
4743+
pcie_reset_lbms_count(pdev);
47444744
return rc;
47454745
}
47464746

drivers/pci/pci.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,17 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
698698
static inline void pcie_ecrc_get_policy(char *str) { }
699699
#endif
700700

701+
#ifdef CONFIG_PCIEPORTBUS
702+
void pcie_reset_lbms_count(struct pci_dev *port);
703+
int pcie_lbms_count(struct pci_dev *port, unsigned long *val);
704+
#else
705+
static inline void pcie_reset_lbms_count(struct pci_dev *port) {}
706+
static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
707+
{
708+
return -EOPNOTSUPP;
709+
}
710+
#endif
711+
701712
struct pci_dev_reset_methods {
702713
u16 vendor;
703714
u16 device;

drivers/pci/pcie/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
pcieportdrv-y := portdrv.o rcec.o
66

7-
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
7+
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o
88

99
obj-y += aspm.o
1010
obj-$(CONFIG_PCIEAER) += aer.o err.o

drivers/pci/pcie/bwctrl.c

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
// SPDX-License-Identifier: GPL-2.0+
2+
/*
3+
* PCIe bandwidth controller
4+
*
5+
* Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
6+
*
7+
* Copyright (C) 2019 Dell Inc
8+
* Copyright (C) 2023-2024 Intel Corporation
9+
*
10+
* This service port driver hooks into the Bandwidth Notification interrupt
11+
* watching for changes or links becoming degraded in operation. It updates
12+
* the cached Current Link Speed that is exposed to user space through sysfs.
13+
*/
14+
15+
#define dev_fmt(fmt) "bwctrl: " fmt
16+
17+
#include <linux/atomic.h>
18+
#include <linux/cleanup.h>
19+
#include <linux/errno.h>
20+
#include <linux/interrupt.h>
21+
#include <linux/pci.h>
22+
#include <linux/rwsem.h>
23+
#include <linux/slab.h>
24+
#include <linux/types.h>
25+
26+
#include "../pci.h"
27+
#include "portdrv.h"
28+
29+
/**
30+
* struct pcie_bwctrl_data - PCIe bandwidth controller
31+
* @lbms_count: Count for LBMS (since last reset)
32+
*/
33+
struct pcie_bwctrl_data {
34+
atomic_t lbms_count;
35+
};
36+
37+
/* Prevents port removal during LBMS count accessors */
38+
static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
39+
40+
static void pcie_bwnotif_enable(struct pcie_device *srv)
41+
{
42+
struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
43+
struct pci_dev *port = srv->port;
44+
u16 link_status;
45+
int ret;
46+
47+
/* Count LBMS seen so far as one */
48+
ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
49+
if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
50+
atomic_inc(&data->lbms_count);
51+
52+
pcie_capability_set_word(port, PCI_EXP_LNKCTL,
53+
PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
54+
pcie_capability_write_word(port, PCI_EXP_LNKSTA,
55+
PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
56+
57+
/*
58+
* Update after enabling notifications & clearing status bits ensures
59+
* link speed is up to date.
60+
*/
61+
pcie_update_link_speed(port->subordinate);
62+
}
63+
64+
static void pcie_bwnotif_disable(struct pci_dev *port)
65+
{
66+
pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
67+
PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
68+
}
69+
70+
static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
71+
{
72+
struct pcie_device *srv = context;
73+
struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
74+
struct pci_dev *port = srv->port;
75+
u16 link_status, events;
76+
int ret;
77+
78+
ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
79+
if (ret != PCIBIOS_SUCCESSFUL)
80+
return IRQ_NONE;
81+
82+
events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
83+
if (!events)
84+
return IRQ_NONE;
85+
86+
if (events & PCI_EXP_LNKSTA_LBMS)
87+
atomic_inc(&data->lbms_count);
88+
89+
pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
90+
91+
/*
92+
* Interrupts will not be triggered from any further Link Speed
93+
* change until LBMS is cleared by the write. Therefore, re-read the
94+
* speed (inside pcie_update_link_speed()) after LBMS has been
95+
* cleared to avoid missing link speed changes.
96+
*/
97+
pcie_update_link_speed(port->subordinate);
98+
99+
return IRQ_HANDLED;
100+
}
101+
102+
void pcie_reset_lbms_count(struct pci_dev *port)
103+
{
104+
struct pcie_bwctrl_data *data;
105+
106+
guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
107+
data = port->link_bwctrl;
108+
if (data)
109+
atomic_set(&data->lbms_count, 0);
110+
else
111+
pcie_capability_write_word(port, PCI_EXP_LNKSTA,
112+
PCI_EXP_LNKSTA_LBMS);
113+
}
114+
115+
int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
116+
{
117+
struct pcie_bwctrl_data *data;
118+
119+
guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
120+
data = port->link_bwctrl;
121+
if (!data)
122+
return -ENOTTY;
123+
124+
*val = atomic_read(&data->lbms_count);
125+
126+
return 0;
127+
}
128+
129+
static int pcie_bwnotif_probe(struct pcie_device *srv)
130+
{
131+
struct pci_dev *port = srv->port;
132+
int ret;
133+
134+
struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
135+
sizeof(*data), GFP_KERNEL);
136+
if (!data)
137+
return -ENOMEM;
138+
139+
ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq,
140+
IRQF_SHARED, "PCIe bwctrl", srv);
141+
if (ret)
142+
return ret;
143+
144+
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
145+
port->link_bwctrl = no_free_ptr(data);
146+
pcie_bwnotif_enable(srv);
147+
}
148+
149+
pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
150+
151+
return 0;
152+
}
153+
154+
static void pcie_bwnotif_remove(struct pcie_device *srv)
155+
{
156+
pcie_bwnotif_disable(srv->port);
157+
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem)
158+
srv->port->link_bwctrl = NULL;
159+
}
160+
161+
static int pcie_bwnotif_suspend(struct pcie_device *srv)
162+
{
163+
pcie_bwnotif_disable(srv->port);
164+
return 0;
165+
}
166+
167+
static int pcie_bwnotif_resume(struct pcie_device *srv)
168+
{
169+
pcie_bwnotif_enable(srv);
170+
return 0;
171+
}
172+
173+
static struct pcie_port_service_driver pcie_bwctrl_driver = {
174+
.name = "pcie_bwctrl",
175+
.port_type = PCIE_ANY_PORT,
176+
.service = PCIE_PORT_SERVICE_BWCTRL,
177+
.probe = pcie_bwnotif_probe,
178+
.suspend = pcie_bwnotif_suspend,
179+
.resume = pcie_bwnotif_resume,
180+
.remove = pcie_bwnotif_remove,
181+
};
182+
183+
int __init pcie_bwctrl_init(void)
184+
{
185+
return pcie_port_service_register(&pcie_bwctrl_driver);
186+
}

drivers/pci/pcie/portdrv.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask,
6868
*/
6969

7070
if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
71-
PCIE_PORT_SERVICE_BWNOTIF)) {
71+
PCIE_PORT_SERVICE_BWCTRL)) {
7272
pcie_capability_read_word(dev, PCI_EXP_FLAGS, &reg16);
7373
*pme = FIELD_GET(PCI_EXP_FLAGS_IRQ, reg16);
7474
nvec = *pme + 1;
@@ -150,11 +150,11 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask)
150150

151151
/* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */
152152
if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
153-
PCIE_PORT_SERVICE_BWNOTIF)) {
153+
PCIE_PORT_SERVICE_BWCTRL)) {
154154
pcie_irq = pci_irq_vector(dev, pme);
155155
irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq;
156156
irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq;
157-
irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq;
157+
irqs[PCIE_PORT_SERVICE_BWCTRL_SHIFT] = pcie_irq;
158158
}
159159

160160
if (mask & PCIE_PORT_SERVICE_AER)
@@ -271,7 +271,7 @@ static int get_port_device_capability(struct pci_dev *dev)
271271

272272
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap);
273273
if (linkcap & PCI_EXP_LNKCAP_LBNC)
274-
services |= PCIE_PORT_SERVICE_BWNOTIF;
274+
services |= PCIE_PORT_SERVICE_BWCTRL;
275275
}
276276

277277
return services;
@@ -828,6 +828,7 @@ static void __init pcie_init_services(void)
828828
pcie_aer_init();
829829
pcie_pme_init();
830830
pcie_dpc_init();
831+
pcie_bwctrl_init();
831832
pcie_hp_init();
832833
}
833834

drivers/pci/pcie/portdrv.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
#define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT)
2121
#define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */
2222
#define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT)
23-
#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */
24-
#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT)
23+
#define PCIE_PORT_SERVICE_BWCTRL_SHIFT 4 /* Bandwidth Controller (notifications) */
24+
#define PCIE_PORT_SERVICE_BWCTRL (1 << PCIE_PORT_SERVICE_BWCTRL_SHIFT)
2525

2626
#define PCIE_PORT_DEVICE_MAXSERVICES 5
2727

@@ -51,6 +51,8 @@ int pcie_dpc_init(void);
5151
static inline int pcie_dpc_init(void) { return 0; }
5252
#endif
5353

54+
int pcie_bwctrl_init(void);
55+
5456
/* Port Type */
5557
#define PCIE_ANY_PORT (~0)
5658

drivers/pci/quirks.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,14 @@
3535

3636
static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta)
3737
{
38-
return lnksta & PCI_EXP_LNKSTA_LBMS;
38+
unsigned long count;
39+
int ret;
40+
41+
ret = pcie_lbms_count(dev, &count);
42+
if (ret < 0)
43+
return lnksta & PCI_EXP_LNKSTA_LBMS;
44+
45+
return count > 0;
3946
}
4047

4148
/*

include/linux/pci.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ struct pci_vpd {
313313
};
314314

315315
struct irq_affinity;
316+
struct pcie_bwctrl_data;
316317
struct pcie_link_state;
317318
struct pci_sriov;
318319
struct pci_p2pdma;
@@ -502,6 +503,7 @@ struct pci_dev {
502503
unsigned int dpc_rp_extensions:1;
503504
u8 dpc_rp_log_size;
504505
#endif
506+
struct pcie_bwctrl_data *link_bwctrl;
505507
#ifdef CONFIG_PCI_ATS
506508
union {
507509
struct pci_sriov *sriov; /* PF: SR-IOV info */

0 commit comments

Comments
 (0)