[PATCH v2] nvdimm: Avoid race between probe and reading device attributes
by Richard Palethorpe
It is possible to cause a division error and use-after-free by querying the
nmem device before the driver data is fully initialised in nvdimm_probe. E.g
by doing
(while true; do
cat /sys/bus/nd/devices/nmem*/available_slots 2>&1 > /dev/null
done) &
while true; do
for i in $(seq 0 4); do
echo nmem$i > /sys/bus/nd/drivers/nvdimm/bind
done
for i in $(seq 0 4); do
echo nmem$i > /sys/bus/nd/drivers/nvdimm/unbind
done
done
On 5.7-rc3 this causes:
[ 12.711578] divide error: 0000 [#1] SMP KASAN PTI
[ 12.712321] CPU: 0 PID: 231 Comm: cat Not tainted 5.7.0-rc3 #48
[ 12.713188] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014
[ 12.714857] RIP: 0010:nd_label_nfree+0x134/0x1a0 [libnvdimm]
[ 12.715772] Code: ba 00 00 00 00 00 fc ff df 48 89 f9 48 c1 e9 03 0f b6 14 11 84 d2 74 05 80 fa 03 7e 52 8b 73 08 31 d2 89 c1 48 83 c4 08 5b 5d <f7> f6 31 d2 41 5c 83 c0 07 c1 e8 03 48 8d 84 00 8e 02 00 00 25 00
[ 12.718311] RSP: 0018:ffffc9000046fd08 EFLAGS: 00010282
[ 12.719030] RAX: 0000000000000000 RBX: ffffffffc0073aa0 RCX: 0000000000000000
[ 12.720005] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888060931808
[ 12.720970] RBP: ffff88806609d018 R08: 0000000000000001 R09: ffffed100cc0a2b1
[ 12.721889] R10: ffff888066051587 R11: ffffed100cc0a2b0 R12: ffff888060931800
[ 12.722744] R13: ffff888064362000 R14: ffff88806609d018 R15: ffffffff8b1a2520
[ 12.723602] FS: 00007fd16f3d5580(0000) GS:ffff88806b400000(0000) knlGS:0000000000000000
[ 12.724600] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 12.725308] CR2: 00007fd16f1ec000 CR3: 0000000064322006 CR4: 0000000000160ef0
[ 12.726268] Call Trace:
[ 12.726633] available_slots_show+0x4e/0x120 [libnvdimm]
[ 12.727380] dev_attr_show+0x42/0x80
[ 12.727891] ? memset+0x20/0x40
[ 12.728341] sysfs_kf_seq_show+0x218/0x410
[ 12.728923] seq_read+0x389/0xe10
[ 12.729415] vfs_read+0x101/0x2d0
[ 12.729891] ksys_read+0xf9/0x1d0
[ 12.730361] ? kernel_write+0x120/0x120
[ 12.730915] do_syscall_64+0x95/0x4a0
[ 12.731435] entry_SYSCALL_64_after_hwframe+0x49/0xb3
[ 12.732163] RIP: 0033:0x7fd16f2fe4be
[ 12.732685] Code: c0 e9 c6 fe ff ff 50 48 8d 3d 2e 12 0a 00 e8 69 e9 01 00 66 0f 1f 84 00 00 00 00 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00 f0 ff ff 77 5a c3 66 0f 1f 84 00 00 00 00 00 48 83 ec 28
[ 12.735207] RSP: 002b:00007ffd3177b838 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[ 12.736261] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fd16f2fe4be
[ 12.737233] RDX: 0000000000020000 RSI: 00007fd16f1ed000 RDI: 0000000000000003
[ 12.738203] RBP: 00007fd16f1ed000 R08: 00007fd16f1ec010 R09: 0000000000000000
[ 12.739172] R10: 00007fd16f3f4f70 R11: 0000000000000246 R12: 00007ffd3177ce23
[ 12.740144] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000
[ 12.741139] Modules linked in: nfit libnvdimm
[ 12.741783] ---[ end trace 99532e4b82410044 ]---
[ 12.742452] RIP: 0010:nd_label_nfree+0x134/0x1a0 [libnvdimm]
[ 12.743167] Code: ba 00 00 00 00 00 fc ff df 48 89 f9 48 c1 e9 03 0f b6 14 11 84 d2 74 05 80 fa 03 7e 52 8b 73 08 31 d2 89 c1 48 83 c4 08 5b 5d <f7> f6 31 d2 41 5c 83 c0 07 c1 e8 03 48 8d 84 00 8e 02 00 00 25 00
[ 12.745709] RSP: 0018:ffffc9000046fd08 EFLAGS: 00010282
[ 12.746340] RAX: 0000000000000000 RBX: ffffffffc0073aa0 RCX: 0000000000000000
[ 12.747209] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888060931808
[ 12.748081] RBP: ffff88806609d018 R08: 0000000000000001 R09: ffffed100cc0a2b1
[ 12.748977] R10: ffff888066051587 R11: ffffed100cc0a2b0 R12: ffff888060931800
[ 12.749849] R13: ffff888064362000 R14: ffff88806609d018 R15: ffffffff8b1a2520
[ 12.750729] FS: 00007fd16f3d5580(0000) GS:ffff88806b400000(0000) knlGS:0000000000000000
[ 12.751708] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 12.752441] CR2: 00007fd16f1ec000 CR3: 0000000064322006 CR4: 0000000000160ef0
[ 12.821357] ==================================================================
[ 12.822284] BUG: KASAN: use-after-free in __mutex_lock+0x111c/0x11a0
[ 12.823084] Read of size 4 at addr ffff888065c26238 by task reproducer/218
[ 12.823968]
[ 12.824183] CPU: 2 PID: 218 Comm: reproducer Tainted: G D 5.7.0-rc3 #48
[ 12.825167] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014
[ 12.826595] Call Trace:
[ 12.826926] dump_stack+0x97/0xe0
[ 12.827362] print_address_description.constprop.0+0x1b/0x210
[ 12.828111] ? __mutex_lock+0x111c/0x11a0
[ 12.828645] __kasan_report.cold+0x37/0x92
[ 12.829179] ? __mutex_lock+0x111c/0x11a0
[ 12.829706] kasan_report+0x38/0x50
[ 12.830158] __mutex_lock+0x111c/0x11a0
[ 12.830666] ? ftrace_graph_stop+0x10/0x10
[ 12.831193] ? is_nvdimm_bus+0x40/0x40 [libnvdimm]
[ 12.831820] ? mutex_trylock+0x2b0/0x2b0
[ 12.832333] ? nvdimm_probe+0x259/0x420 [libnvdimm]
[ 12.832975] ? mutex_trylock+0x2b0/0x2b0
[ 12.833500] ? nvdimm_probe+0x259/0x420 [libnvdimm]
[ 12.834122] ? prepare_ftrace_return+0xa1/0xf0
[ 12.834724] ? ftrace_graph_caller+0x6b/0xa0
[ 12.835269] ? acpi_label_write+0x390/0x390 [nfit]
[ 12.835909] ? nvdimm_probe+0x259/0x420 [libnvdimm]
[ 12.836558] ? nvdimm_probe+0x259/0x420 [libnvdimm]
[ 12.837179] nvdimm_probe+0x259/0x420 [libnvdimm]
[ 12.837802] nvdimm_bus_probe+0x110/0x6b0 [libnvdimm]
[ 12.838470] really_probe+0x212/0x9a0
[ 12.838954] driver_probe_device+0x1cd/0x300
[ 12.839511] ? driver_probe_device+0x5/0x300
[ 12.840063] device_driver_attach+0xe7/0x120
[ 12.840623] bind_store+0x18d/0x230
[ 12.841075] kernfs_fop_write+0x200/0x420
[ 12.841606] vfs_write+0x154/0x450
[ 12.842047] ksys_write+0xf9/0x1d0
[ 12.842497] ? __ia32_sys_read+0xb0/0xb0
[ 12.843010] do_syscall_64+0x95/0x4a0
[ 12.843495] entry_SYSCALL_64_after_hwframe+0x49/0xb3
[ 12.844140] RIP: 0033:0x7f5b235d3563
[ 12.844607] Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 55 c3 0f 1f 40 00 48 83 ec 28 48 89 54 24 18
[ 12.846877] RSP: 002b:00007fff1c3bc578 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 12.847822] RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f5b235d3563
[ 12.848717] RDX: 0000000000000006 RSI: 000055f9576710d0 RDI: 0000000000000001
[ 12.849594] RBP: 000055f9576710d0 R08: 000000000000000a R09: 0000000000000000
[ 12.850470] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000006
[ 12.851333] R13: 00007f5b236a3500 R14: 0000000000000006 R15: 00007f5b236a3700
[ 12.852247]
[ 12.852466] Allocated by task 225:
[ 12.852893] save_stack+0x1b/0x40
[ 12.853310] __kasan_kmalloc.constprop.0+0xc2/0xd0
[ 12.853918] kmem_cache_alloc_node+0xef/0x270
[ 12.854475] copy_process+0x485/0x6130
[ 12.854945] _do_fork+0xf1/0xb40
[ 12.855353] __do_sys_clone+0xc3/0x100
[ 12.855843] do_syscall_64+0x95/0x4a0
[ 12.856302] entry_SYSCALL_64_after_hwframe+0x49/0xb3
[ 12.856939]
[ 12.857140] Freed by task 0:
[ 12.857522] save_stack+0x1b/0x40
[ 12.857940] __kasan_slab_free+0x12c/0x170
[ 12.858464] kmem_cache_free+0xb0/0x330
[ 12.858945] rcu_core+0x55f/0x19f0
[ 12.859385] __do_softirq+0x228/0x944
[ 12.859869]
[ 12.860075] The buggy address belongs to the object at ffff888065c26200
[ 12.860075] which belongs to the cache task_struct of size 6016
[ 12.861638] The buggy address is located 56 bytes inside of
[ 12.861638] 6016-byte region [ffff888065c26200, ffff888065c27980)
[ 12.863084] The buggy address belongs to the page:
[ 12.863702] page:ffffea0001970800 refcount:1 mapcount:0 mapping:0000000021ee3712 index:0x0 head:ffffea0001970800 order:3 compound_mapcount:0 compound_pincount:0
[ 12.865478] flags: 0x80000000010200(slab|head)
[ 12.866039] raw: 0080000000010200 0000000000000000 0000000100000001 ffff888066c0f980
[ 12.867010] raw: 0000000000000000 0000000080050005 00000001ffffffff 0000000000000000
[ 12.867986] page dumped because: kasan: bad access detected
[ 12.868696]
[ 12.868900] Memory state around the buggy address:
[ 12.869514] ffff888065c26100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 12.870414] ffff888065c26180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 12.871318] >ffff888065c26200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 12.872238] ^
[ 12.872870] ffff888065c26280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 12.873754] ffff888065c26300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 12.874640]
==================================================================
This can be prevented by setting the driver data after initialisation is
complete.
Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver infrastructure")
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: linux-nvdimm(a)lists.01.org
Cc: linux-kernel(a)vger.kernel.org
Cc: Coly Li <colyli(a)suse.com>
Signed-off-by: Richard Palethorpe <rpalethorpe(a)suse.com>
---
V2:
+ Reviewed by Coly and removed unecessary lock
drivers/nvdimm/dimm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 7d4ddc4d9322..3d3988e1d9a0 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -43,7 +43,6 @@ static int nvdimm_probe(struct device *dev)
if (!ndd)
return -ENOMEM;
- dev_set_drvdata(dev, ndd);
ndd->dpa.name = dev_name(dev);
ndd->ns_current = -1;
ndd->ns_next = -1;
@@ -106,6 +105,8 @@ static int nvdimm_probe(struct device *dev)
if (rc)
goto err;
+ dev_set_drvdata(dev, ndd);
+
return 0;
err:
--
2.26.2
3 weeks, 5 days
[PATCH 1/1] ndctl/namespace: Fix disable-namespace accounting relative to seed devices
by Redhairer Li
Seed namespaces are included in "ndctl disable-namespace all". However
since the user never "creates" them it is surprising to see
"disable-namespace" report 1 more namespace relative to the number that
have been created. Catch attempts to disable a zero-sized namespace:
Before:
{
"dev":"namespace1.0",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1"
}
{
"dev":"namespace1.1",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.1"
}
{
"dev":"namespace1.2",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.2"
}
disabled 4 namespaces
After:
{
"dev":"namespace1.0",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1"
}
{
"dev":"namespace1.3",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.3"
}
{
"dev":"namespace1.1",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.1"
}
disabled 3 namespaces
Signed-off-by: Redhairer Li <redhairer.li(a)intel.com>
---
ndctl/lib/libndctl.c | 11 ++++++++---
ndctl/region.c | 4 +++-
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index ee737cb..49f362b 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -4231,6 +4231,7 @@ NDCTL_EXPORT int ndctl_namespace_disable_safe(struct ndctl_namespace *ndns)
const char *bdev = NULL;
char path[50];
int fd;
+ unsigned long long size = ndctl_namespace_get_size(ndns);
if (pfn && ndctl_pfn_is_enabled(pfn))
bdev = ndctl_pfn_get_block_device(pfn);
@@ -4260,9 +4261,13 @@ NDCTL_EXPORT int ndctl_namespace_disable_safe(struct ndctl_namespace *ndns)
devname, bdev, strerror(errno));
return -errno;
}
- } else
- ndctl_namespace_disable_invalidate(ndns);
-
+ } else {
+ if (size == 0)
+ /* Don't try to disable idle namespace (no capacity allocated) */
+ return -ENXIO;
+ else
+ ndctl_namespace_disable_invalidate(ndns);
+ }
return 0;
}
diff --git a/ndctl/region.c b/ndctl/region.c
index 7945007..0014bb9 100644
--- a/ndctl/region.c
+++ b/ndctl/region.c
@@ -72,6 +72,7 @@ static int region_action(struct ndctl_region *region, enum device_action mode)
{
struct ndctl_namespace *ndns;
int rc = 0;
+ unsigned long long size;
switch (mode) {
case ACTION_ENABLE:
@@ -80,7 +81,8 @@ static int region_action(struct ndctl_region *region, enum device_action mode)
case ACTION_DISABLE:
ndctl_namespace_foreach(region, ndns) {
rc = ndctl_namespace_disable_safe(ndns);
- if (rc)
+ size = ndctl_namespace_get_size(ndns);
+ if (rc && size != 0)
return rc;
}
rc = ndctl_region_disable_invalidate(region);
--
2.20.1.windows.1
1 month
Feedback requested: Exposing NVDIMM performance statistics in a generic way
by Vaibhav Jain
Hello,
I am looking for some community feedback on these two Problem-statements:
1.How to expose NVDIMM performance statistics in an arch or nvdimm vendor
agnostic manner ?
2. Is there a common set of performance statistics for NVDIMMs that all
vendors should provide ?
Problem context
===============
While working on bring up of PAPR SCM based NVDIMMs[1] for arch/powerpc
we want to expose certain dimm performance statistics like "Media
Read/Write Counts", "Power-on Seconds" etc to user-space [2]. These
performance statistics are similar to what ipmctl[3] reports for Intel®
Optane™ persistent memory via the '-show performance' command line
arg. However the reported set of performance stats doesn't cover the
entirety of all performance stats supported by PAPR SCM based NVDimms.
For example here is a subset of performance stats which are specific to
PAPR SCM NVDimms and that not reported by ipmctl:
* Controller Reset Count
* Controller Reset Elapsed Time
* Power-on Seconds
* Cache Read Hit Count
* Cache Write Hit Count
Possibility of updating ipmctl to add support for these performance
statistics is greatly hampered by no support for ACPI on Powerpc
arch. Secondly vendors who dont support ACPI/NFIT command set
similar to Intel® Optane™ (Example MSFT) are also left out in
lurch. Problem-statement#1 points to this specific problem.
Additionally in absence of any pre-agreed set of performance statistics
which all vendors should support, adding support for such a
functionality in ipmctl may not bode well of other nvdimm vendors. For
example if support for reporting "Controller Reset Count" is added to
ipmctl then it may not be applicable to other vendors such as Intel®
Optane™. This issue is what Problem-statement#2 refers to.
Possible Solution for Problem#1
===============================
One possible solution to Problem#1 can to add support for reporting
NVDIMM performance statistics in 'ndtcl'. 'libndctl' already has a layer
that abstracts underlying NVDIMM vendors (via struct ndctl_dimm_ops),
making supporting different NVDIMM vendors fairly easy. Also ndctl is
more widely used compared to 'ipmctl', hence adding such a functionality
to ndctl would make it more widely used.
Above solution was implemented as RFC patch-set[2] that exposes these
performance statistics through a generic abstraction in libndctl and
added a presentation layer for this data in ndctl[4]. It added a new
command line flags '--stat' to ndctl to report *all* nvdimm vendor
reported performance stats. The output is similar to one below:
# ndctl list -D --stats
[
{
"dev":"nmem0",
"stats":{
"Power-on Seconds":603931,
"Media Read Count":0,
"Media Write Count":6313,
}
}
]
This was done by adding two new dimm-ops callbacks that were
implemented by the papr_scm implementation within libndctl. These
callbacks are invoked by newly introduce code in 'util/json-smart.c'
that format the returned stats from these new dimm-ops and transform
them into a json-object to later presentation. I would request you to
look at RFC patch-set[2] to understand the implementation details.
Possibled Solution for Problem#2
================================
Solution to Problem-statement#2 is what eludes me though. If there is a
minimal set of performance stats (similar to what ndctl enforces for
health-stats) then implementation of such a functionality in
ndctl/ipmctl would be easy to implement. But is it really possible to
have such a common set of performance stats that NVDIMM vendors can
expose.
Patch-set[2] though tries to bypass this problem by letting the vendor
descide which performance stats to expose. This opens up a possibility
of this functionality to abused by dimm vendors to reports arbirary data
through this flag that may not be performance-stats.
Summing-up
==========
In light of above, requesting your feedback as to how
problem-statements#{1, 2} can be addressed within ndctl subsystem. Also
are these problems even worth solving.
References
==========
[1] https://github.com/torvalds/linux/blob/master/Documentation/powerpc/papr_...
[2] "[ndctl RFC-PATCH 0/4] Add support for reporting PAPR NVDIMM
Statistics"
https://lore.kernel.org/linux-nvdimm/20200518110814.145644-1-vaibhav@linu...
[3] https://docs.pmem.io/ipmctl-user-guide/instrumentation/show-device-perfor...
[4] "[RFC-PATCH 1/4] ndctl,libndctl: Implement new dimm-ops 'new_stats'
and 'get_stat'"
https://lore.kernel.org/linux-nvdimm/20200514225258.508463-2-vaibhav@linu...
Thanks,
~ Vaibhav
3 months, 1 week
[RFC PATCH 0/4] powerpc/papr_scm: Add support for reporting NVDIMM performance statistics
by Vaibhav Jain
The patch-set proposes to add support for fetching and reporting
performance statistics for PAPR compliant NVDIMMs as described in
documentation for H_SCM_PERFORMANCE_STATS hcall Ref[1]. The patch-set
also implements mechanisms to expose NVDIMM performance stats via
sysfs and newly introduced PDSMs[2] for libndctl.
This patch-set combined with corresponding ndctl and libndctl changes
proposed at Ref[3] should enable user to fetch PAPR compliant NVDIMMs
using following command:
# ndctl list -D --stats
[
{
"dev":"nmem0",
"stats":{
"Controller Reset Count":2,
"Controller Reset Elapsed Time":603331,
"Power-on Seconds":603931,
"Life Remaining":"100%",
"Critical Resource Utilization":"0%",
"Host Load Count":5781028,
"Host Store Count":8966800,
"Host Load Duration":975895365,
"Host Store Duration":716230690,
"Media Read Count":0,
"Media Write Count":6313,
"Media Read Duration":0,
"Media Write Duration":9679615,
"Cache Read Hit Count":5781028,
"Cache Write Hit Count":8442479,
"Fast Write Count":8969912
}
}
]
The patchset is dependent on existing patch-set "[PATCH v7 0/5]
powerpc/papr_scm: Add support for reporting nvdimm health" available
at Ref[2] that adds support for reporting PAPR compliant NVDIMMs in
'papr_scm' kernel module.
Structure of the patch-set
==========================
The patch-set starts with implementing functionality in papr_scm
module to issue H_SCM_PERFORMANCE_STATS hcall, fetch & parse dimm
performance stats and exposing them as a PAPR specific libnvdimm
attribute named 'perf_stats'
Patch-2 introduces a new PDSM named FETCH_PERF_STATS that can be
issued by libndctl asking papr_scm to issue the
H_SCM_PERFORMANCE_STATS hcall using helpers introduced earlier and
storing the results in a dimm specific perf-stats-buffer.
Patch-3 introduces a new PDSM named READ_PERF_STATS that can be
issued by libndctl to read the perf-stats-buffer in an incremental
manner to workaround the 256-bytes envelop limitation of libnvdimm.
Finally Patch-4 introduces a new PDSM named GET_PERF_STAT that can be
issued by libndctl to read values of a specific NVDIMM performance
stat like "Life Remaining".
References
==========
[1] Documentation/powerpc/papr_hcals.rst
[2] https://lore.kernel.org/linux-nvdimm/20200508104922.72565-1-vaibhav@linux...
[3] https://github.com/vaibhav92/ndctl/tree/papr_scm_stats_v1
Vaibhav Jain (4):
powerpc/papr_scm: Fetch nvdimm performance stats from PHYP
powerpc/papr_scm: Add support for PAPR_SCM_PDSM_FETCH_PERF_STATS
powerpc/papr_scm: Implement support for PAPR_SCM_PDSM_READ_PERF_STATS
powerpc/papr_scm: Add support for PDSM GET_PERF_STAT
Documentation/ABI/testing/sysfs-bus-papr-scm | 27 ++
arch/powerpc/include/uapi/asm/papr_scm_pdsm.h | 60 +++
arch/powerpc/platforms/pseries/papr_scm.c | 391 ++++++++++++++++++
3 files changed, 478 insertions(+)
--
2.26.2
4 months, 1 week
[PATCH v2] ACPI: Drop rcu usage for MMIO mappings
by Dan Williams
Recently a performance problem was reported for a process invoking a
non-trival ASL program. The method call in this case ends up
repetitively triggering a call path like:
acpi_ex_store
acpi_ex_store_object_to_node
acpi_ex_write_data_to_field
acpi_ex_insert_into_field
acpi_ex_write_with_update_rule
acpi_ex_field_datum_io
acpi_ex_access_region
acpi_ev_address_space_dispatch
acpi_ex_system_memory_space_handler
acpi_os_map_cleanup.part.14
_synchronize_rcu_expedited.constprop.89
schedule
The end result of frequent synchronize_rcu_expedited() invocation is
tiny sub-millisecond spurts of execution where the scheduler freely
migrates this apparently sleepy task. The overhead of frequent scheduler
invocation multiplies the execution time by a factor of 2-3X.
For example, performance improves from 16 minutes to 7 minutes for a
firmware update procedure across 24 devices.
Perhaps the rcu usage was intended to allow for not taking a sleeping
lock in the acpi_os_{read,write}_memory() path which ostensibly could be
called from an APEI NMI error interrupt? Neither rcu_read_lock() nor
ioremap() are interrupt safe, so add a WARN_ONCE() to validate that rcu
was not serving as a mechanism to avoid direct calls to ioremap(). Even
the original implementation had a spin_lock_irqsave(), but that is not
NMI safe.
APEI itself already has some concept of avoiding ioremap() from
interrupt context (see erst_exec_move_data()), if the new warning
triggers it means that APEI either needs more instrumentation like that
to pre-emptively fail, or more infrastructure to arrange for pre-mapping
the resources it needs in NMI context.
Cc: <stable(a)vger.kernel.org>
Fixes: 620242ae8c3d ("ACPI: Maintain a list of ACPI memory mapped I/O remappings")
Cc: Len Brown <lenb(a)kernel.org>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: James Morse <james.morse(a)arm.com>
Cc: Erik Kaneda <erik.kaneda(a)intel.com>
Cc: Myron Stowe <myron.stowe(a)redhat.com>
Cc: "Rafael J. Wysocki" <rjw(a)rjwysocki.net>
Cc: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
Changes since v1 [1]:
- Actually cc: the most important list for ACPI changes (Rafael)
- Cleanup unnecessary variable initialization (Andy)
Link: https://lore.kernel.org/linux-nvdimm/158880834905.2183490.156163294694202...
drivers/acpi/osl.c | 117 +++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 60 deletions(-)
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 762c5d50b8fe..a44b75aac5d0 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -214,13 +214,13 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
return pa;
}
-/* Must be called with 'acpi_ioremap_lock' or RCU read lock held. */
static struct acpi_ioremap *
acpi_map_lookup(acpi_physical_address phys, acpi_size size)
{
struct acpi_ioremap *map;
- list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
+ lockdep_assert_held(&acpi_ioremap_lock);
+ list_for_each_entry(map, &acpi_ioremaps, list)
if (map->phys <= phys &&
phys + size <= map->phys + map->size)
return map;
@@ -228,7 +228,6 @@ acpi_map_lookup(acpi_physical_address phys, acpi_size size)
return NULL;
}
-/* Must be called with 'acpi_ioremap_lock' or RCU read lock held. */
static void __iomem *
acpi_map_vaddr_lookup(acpi_physical_address phys, unsigned int size)
{
@@ -263,7 +262,8 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
{
struct acpi_ioremap *map;
- list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
+ lockdep_assert_held(&acpi_ioremap_lock);
+ list_for_each_entry(map, &acpi_ioremaps, list)
if (map->virt <= virt &&
virt + size <= map->virt + map->size)
return map;
@@ -360,7 +360,7 @@ void __iomem __ref
map->size = pg_sz;
map->refcount = 1;
- list_add_tail_rcu(&map->list, &acpi_ioremaps);
+ list_add_tail(&map->list, &acpi_ioremaps);
out:
mutex_unlock(&acpi_ioremap_lock);
@@ -374,20 +374,13 @@ void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size)
}
EXPORT_SYMBOL_GPL(acpi_os_map_memory);
-/* Must be called with mutex_lock(&acpi_ioremap_lock) */
-static unsigned long acpi_os_drop_map_ref(struct acpi_ioremap *map)
-{
- unsigned long refcount = --map->refcount;
-
- if (!refcount)
- list_del_rcu(&map->list);
- return refcount;
-}
-
-static void acpi_os_map_cleanup(struct acpi_ioremap *map)
+static void acpi_os_drop_map_ref(struct acpi_ioremap *map)
{
- synchronize_rcu_expedited();
+ lockdep_assert_held(&acpi_ioremap_lock);
+ if (--map->refcount > 0)
+ return;
acpi_unmap(map->phys, map->virt);
+ list_del(&map->list);
kfree(map);
}
@@ -408,7 +401,6 @@ static void acpi_os_map_cleanup(struct acpi_ioremap *map)
void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size)
{
struct acpi_ioremap *map;
- unsigned long refcount;
if (!acpi_permanent_mmap) {
__acpi_unmap_table(virt, size);
@@ -422,11 +414,8 @@ void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size)
WARN(true, PREFIX "%s: bad address %p\n", __func__, virt);
return;
}
- refcount = acpi_os_drop_map_ref(map);
+ acpi_os_drop_map_ref(map);
mutex_unlock(&acpi_ioremap_lock);
-
- if (!refcount)
- acpi_os_map_cleanup(map);
}
EXPORT_SYMBOL_GPL(acpi_os_unmap_iomem);
@@ -461,7 +450,6 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
{
u64 addr;
struct acpi_ioremap *map;
- unsigned long refcount;
if (gas->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
return;
@@ -477,11 +465,8 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
mutex_unlock(&acpi_ioremap_lock);
return;
}
- refcount = acpi_os_drop_map_ref(map);
+ acpi_os_drop_map_ref(map);
mutex_unlock(&acpi_ioremap_lock);
-
- if (!refcount)
- acpi_os_map_cleanup(map);
}
EXPORT_SYMBOL(acpi_os_unmap_generic_address);
@@ -700,55 +685,71 @@ int acpi_os_read_iomem(void __iomem *virt_addr, u64 *value, u32 width)
return 0;
}
+static void __iomem *acpi_os_rw_map(acpi_physical_address phys_addr,
+ unsigned int size, bool *did_fallback)
+{
+ void __iomem *virt_addr;
+
+ if (WARN_ONCE(in_interrupt(), "ioremap in interrupt context\n"))
+ return NULL;
+
+ /* Try to use a cached mapping and fallback otherwise */
+ *did_fallback = false;
+ mutex_lock(&acpi_ioremap_lock);
+ virt_addr = acpi_map_vaddr_lookup(phys_addr, size);
+ if (virt_addr)
+ return virt_addr;
+ mutex_unlock(&acpi_ioremap_lock);
+
+ virt_addr = acpi_os_ioremap(phys_addr, size);
+ *did_fallback = true;
+
+ return virt_addr;
+}
+
+static void acpi_os_rw_unmap(void __iomem *virt_addr, bool did_fallback)
+{
+ if (did_fallback) {
+ /* in the fallback case no lock is held */
+ iounmap(virt_addr);
+ return;
+ }
+
+ mutex_unlock(&acpi_ioremap_lock);
+}
+
acpi_status
acpi_os_read_memory(acpi_physical_address phys_addr, u64 *value, u32 width)
{
- void __iomem *virt_addr;
unsigned int size = width / 8;
- bool unmap = false;
+ bool did_fallback = false;
+ void __iomem *virt_addr;
u64 dummy;
int error;
- rcu_read_lock();
- virt_addr = acpi_map_vaddr_lookup(phys_addr, size);
- if (!virt_addr) {
- rcu_read_unlock();
- virt_addr = acpi_os_ioremap(phys_addr, size);
- if (!virt_addr)
- return AE_BAD_ADDRESS;
- unmap = true;
- }
-
+ virt_addr = acpi_os_rw_map(phys_addr, size, &did_fallback);
+ if (!virt_addr)
+ return AE_BAD_ADDRESS;
if (!value)
value = &dummy;
error = acpi_os_read_iomem(virt_addr, value, width);
BUG_ON(error);
- if (unmap)
- iounmap(virt_addr);
- else
- rcu_read_unlock();
-
+ acpi_os_rw_unmap(virt_addr, did_fallback);
return AE_OK;
}
acpi_status
acpi_os_write_memory(acpi_physical_address phys_addr, u64 value, u32 width)
{
- void __iomem *virt_addr;
unsigned int size = width / 8;
- bool unmap = false;
+ bool did_fallback = false;
+ void __iomem *virt_addr;
- rcu_read_lock();
- virt_addr = acpi_map_vaddr_lookup(phys_addr, size);
- if (!virt_addr) {
- rcu_read_unlock();
- virt_addr = acpi_os_ioremap(phys_addr, size);
- if (!virt_addr)
- return AE_BAD_ADDRESS;
- unmap = true;
- }
+ virt_addr = acpi_os_rw_map(phys_addr, size, &did_fallback);
+ if (!virt_addr)
+ return AE_BAD_ADDRESS;
switch (width) {
case 8:
@@ -767,11 +768,7 @@ acpi_os_write_memory(acpi_physical_address phys_addr, u64 value, u32 width)
BUG();
}
- if (unmap)
- iounmap(virt_addr);
- else
- rcu_read_unlock();
-
+ acpi_os_rw_unmap(virt_addr, did_fallback);
return AE_OK;
}
7 months, 2 weeks
[PATCH ndctl v1 00/10] daxctl: Support for sub-dividing soft-reserved regions
by Joao Martins
Hey,
This series introduces the daxctl support for sub-dividing soft-reserved
regions created by EFI/HMAT/efi_fake_mem. It's the userspace counterpart
of this recent patch series [0].
These new 'dynamic' regions can be partitioned into multiple different devices
which its subdivisions can consist of one or more ranges. This
is in contrast to static dax regions -- created with ndctl-create-namespace
-m devdax -- which can't be subdivided neither discontiguous.
See also cover-letter of [0].
The daxctl changes in these patches are depicted as:
* {create,destroy,disable,enable}-device:
These orchestrate/manage the sub-division devices.
It mimmics the same as namespaces equivalent commands.
* Allow reconfigure-device to change the size of an existing *dynamic* dax
device.
* Add test coverage (so far tried to cover all range allocation code paths,
but I am still fishing for bugs). Additionally, there are bugs so applying
[0] may not make it pass the added functional test yet.
I am sending the series earlier (i.e. before the kernel patches get merged)
mainly to share a common unit tests, and also letting others try it out.
The only TODOs left is documentation, and perhaps listing of the mappingX sysfs
entries.
Thoughts, comments appreciated. :)
Thanks!
Joao
[0] "device-dax: Support sub-dividing soft-reserved ranges",
https://lore.kernel.org/linux-nvdimm/158500767138.2088294.171316462598039...
Dan Williams (1):
daxctl: Cleanup whitespace
Joao Martins (9):
libdaxctl: add daxctl_dev_set_size()
daxctl: add resize support in reconfigure-device
daxctl: add command to disable devdax device
daxctl: add command to enable devdax device
libdaxctl: add daxctl_region_create_dev()
daxctl: add command to create device
libdaxctl: add daxctl_region_destroy_dev()
daxctl: add command to destroy device
daxctl/test: Add tests for dynamic dax regions
daxctl/builtin.h | 4 +
daxctl/daxctl.c | 4 +
daxctl/device.c | 310 ++++++++++++++++++++++++++++++++++++++-
daxctl/lib/libdaxctl.c | 67 +++++++++
daxctl/lib/libdaxctl.sym | 7 +
daxctl/libdaxctl.h | 3 +
test/Makefile.am | 1 +
test/daxctl-create.sh | 293 ++++++++++++++++++++++++++++++++++++
util/filter.c | 2 +-
9 files changed, 686 insertions(+), 5 deletions(-)
create mode 100755 test/daxctl-create.sh
--
2.17.1
7 months, 2 weeks
[PATCH 00/12] device-dax: Support sub-dividing soft-reserved ranges
by Dan Williams
The device-dax facility allows an address range to be directly mapped
through a chardev, or turned around and hotplugged to the core kernel
page allocator as System-RAM. It is the baseline mechanism for
converting persistent memory (pmem) to be used as another volatile
memory pool i.e. the current Memory Tiering hot topic on linux-mm.
In the case of pmem the nvdimm-namespace-label mechanism can sub-divide
it, but that labeling mechanism is not available / applicable to
soft-reserved ("EFI specific purpose") memory [1]. This series provides
a sysfs-mechanism for the daxctl utility to enable provisioning of
volatile-soft-reserved memory ranges.
The motivations for this facility are:
1/ Allow performance differentiated memory ranges to be split between
kernel-managed and directly-accessed use cases.
2/ Allow physical memory to be provisioned along performance relevant
address boundaries. For example, divide a memory-side cache [2] along
cache-color boundaries.
3/ Parcel out soft-reserved memory to VMs using device-dax as a security
/ permissions boundary [3]. Specifically I have seen people (ab)using
memmap=nn!ss (mark System-RAM as Peristent Memory) just to get the
device-dax interface on custom address ranges.
The baseline for this series is today's next/master + "[PATCH v2 0/6]
Manual definition of Soft Reserved memory devices" [4].
Big thanks to Joao for the early testing and feedback on this series!
Given the dependencies on the memremap_pages() reworks in Andrew's tree
and the proximity to v5.7 this is clearly v5.8 material. The patches in
most need of a second opinion are the memremap_pages() reworks to switch
from 'struct resource' to 'struct range' and allow for an array of
ranges to be mapped at once.
[1]: https://lore.kernel.org/r/157309097008.1579826.12818463304589384434.stgit...
[2]: https://lore.kernel.org/r/154899811738.3165233.12325692939590944259.stgit...
[3]: https://lore.kernel.org/r/20200110190313.17144-1-joao.m.martins@oracle.com/
[4]: http://lore.kernel.org/r/158489354353.1457606.8327903161927980740.stgit@d...
---
Dan Williams (12):
device-dax: Drop the dax_region.pfn_flags attribute
device-dax: Move instance creation parameters to 'struct dev_dax_data'
device-dax: Make pgmap optional for instance creation
device-dax: Kill dax_kmem_res
device-dax: Add an allocation interface for device-dax instances
device-dax: Introduce seed devices
drivers/base: Make device_find_child_by_name() compatible with sysfs inputs
device-dax: Add resize support
mm/memremap_pages: Convert to 'struct range'
mm/memremap_pages: Support multiple ranges per invocation
device-dax: Add dis-contiguous resource support
device-dax: Introduce 'mapping' devices
arch/powerpc/kvm/book3s_hv_uvmem.c | 14 -
drivers/base/core.c | 2
drivers/dax/bus.c | 877 ++++++++++++++++++++++++++++++--
drivers/dax/bus.h | 28 +
drivers/dax/dax-private.h | 36 +
drivers/dax/device.c | 97 ++--
drivers/dax/hmem/hmem.c | 18 -
drivers/dax/kmem.c | 170 +++---
drivers/dax/pmem/compat.c | 2
drivers/dax/pmem/core.c | 22 +
drivers/gpu/drm/nouveau/nouveau_dmem.c | 4
drivers/nvdimm/badrange.c | 26 -
drivers/nvdimm/claim.c | 13
drivers/nvdimm/nd.h | 3
drivers/nvdimm/pfn_devs.c | 13
drivers/nvdimm/pmem.c | 27 +
drivers/nvdimm/region.c | 21 -
drivers/pci/p2pdma.c | 12
include/linux/memremap.h | 9
include/linux/range.h | 6
mm/memremap.c | 297 ++++++-----
tools/testing/nvdimm/dax-dev.c | 22 +
tools/testing/nvdimm/test/iomap.c | 2
23 files changed, 1318 insertions(+), 403 deletions(-)
7 months, 3 weeks
[PATCH 00/12] ACPI/NVDIMM: Runtime Firmware Activation
by Dan Williams
Quoting the documentation:
Some persistent memory devices run a firmware locally on the device /
"DIMM" to perform tasks like media management, capacity provisioning,
and health monitoring. The process of updating that firmware typically
involves a reboot because it has implications for in-flight memory
transactions. However, reboots are disruptive and at least the Intel
persistent memory platform implementation, described by the Intel ACPI
DSM specification [1], has added support for activating firmware at
runtime.
[1]: https://docs.pmem.io/persistent-memory/
The approach taken is to abstract the Intel platform specific mechanism
behind a libnvdimm-generic sysfs interface. The interface could support
runtime-firmware-activation on another architecture without need to
change userspace tooling.
The ACPI NFIT implementation involves a set of device-specific-methods
(DSMs) to 'arm' individual devices for activation and bus-level
'trigger' method to execute the activation. Informational / enumeration
methods are also provided at the bus and device level.
One complicating aspect of the memory device firmware activation is that
the memory controller may need to be quiesced, no memory cycles, during
the activation. While the platform has mechanisms to support holding off
in-flight DMA during the activation, the device response to that delay
is potentially undefined. The platform may reject a runtime firmware
update if, for example a PCI-E device does not support its completion
timeout value being increased to meet the activation time. Outside of
device timeouts the quiesce period may also violate application
timeouts.
Given the above device and application timeout considerations the
implementation defaults to hooking into the suspend path to trigger the
activation, i.e. that a suspend-resume cycle (at least up to the syscore
suspend point) is required. That default policy ensures that the system
is in a quiescent state before ceasing memory controller responses for
the activate. However, if desired, runtime activation without suspend
can be forced as an override.
The ndctl utility grows the following extensions / commands to drive
this mechanism:
1/ The existing update-firmware command will 'arm' devices where the
firmware image is staged by default.
ndctl update-firmware all -f firmware_image.bin
2/ The existing ability to enumerate firmware-update capabilities now
includes firmware activate capabilities at the 'bus' and 'dimm/device'
level:
ndctl list -BDF -b nfit_test.0
[
{
"provider":"nfit_test.0",
"dev":"ndbus2",
"scrub_state":"idle",
"firmware":{
"activate_method":"suspend",
"activate_state":"idle"
},
"dimms":[
{
"dev":"nmem1",
"id":"cdab-0a-07e0-ffffffff",
"handle":0,
"phys_id":0,
"security":"disabled",
"firmware":{
"current_version":0,
"can_update":true
}
},
...
3/ When the system can support activation without quiesce, or when the
suspend-resume requirement is going to be suppressed, the new
activate-firmware command wraps that functionality:
ndctl activate-firmware nfit_test.0 --force
One major open question for review is how users can trigger
firmware-activation via suspend without doing a full trip through the
BIOS. The activation currently requires CONFIG_PM_DEBUG to enable that
flow. This seems an awkward dependency for something that is expected to
be a production capability.
---
Dan Williams (12):
libnvdimm: Validate command family indices
ACPI: NFIT: Move bus_dsm_mask out of generic nvdimm_bus_descriptor
ACPI: NFIT: Define runtime firmware activation commands
tools/testing/nvdimm: Cleanup dimm index passing
tools/testing/nvdimm: Add command debug messages
tools/testing/nvdimm: Prepare nfit_ctl_test() for ND_CMD_CALL emulation
tools/testing/nvdimm: Emulate firmware activation commands
driver-core: Introduce DEVICE_ATTR_ADMIN_{RO,RW}
libnvdimm: Convert to DEVICE_ATTR_ADMIN_RO()
libnvdimm: Add runtime firmware activation sysfs interface
PM, libnvdimm: Add syscore_quiesced() callback for firmware activation
ACPI: NFIT: Add runtime firmware activate support
Documentation/ABI/testing/sysfs-bus-nfit | 35 ++
Documentation/ABI/testing/sysfs-bus-nvdimm | 2
.../driver-api/nvdimm/firmware-activate.rst | 74 +++
drivers/acpi/nfit/core.c | 146 +++++--
drivers/acpi/nfit/intel.c | 426 ++++++++++++++++++++
drivers/acpi/nfit/intel.h | 61 +++
drivers/acpi/nfit/nfit.h | 39 ++
drivers/base/syscore.c | 18 +
drivers/nvdimm/bus.c | 46 ++
drivers/nvdimm/core.c | 103 +++++
drivers/nvdimm/dimm_devs.c | 99 +++++
drivers/nvdimm/namespace_devs.c | 2
drivers/nvdimm/nd-core.h | 1
drivers/nvdimm/pfn_devs.c | 2
drivers/nvdimm/region_devs.c | 2
include/linux/device.h | 4
include/linux/libnvdimm.h | 53 ++
include/linux/syscore_ops.h | 2
include/linux/sysfs.h | 7
include/uapi/linux/ndctl.h | 5
kernel/power/suspend.c | 2
tools/testing/nvdimm/test/nfit.c | 367 ++++++++++++++---
22 files changed, 1382 insertions(+), 114 deletions(-)
create mode 100644 Documentation/ABI/testing/sysfs-bus-nvdimm
create mode 100644 Documentation/driver-api/nvdimm/firmware-activate.rst
base-commit: 48778464bb7d346b47157d21ffde2af6b2d39110
7 months, 3 weeks
[PATCH] libnvdimm/security: Fix key lookup permissions
by Dan Williams
As of commit 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather
than a mask") lookup_user_key() needs an explicit declaration of what it
wants to do with the key. Add KEY_NEED_SEARCH to fix a warning with the
below signature, and fixes the inability to retrieve a key.
WARNING: CPU: 15 PID: 6276 at security/keys/permission.c:35 key_task_permission+0xd3/0x140
[..]
RIP: 0010:key_task_permission+0xd3/0x140
[..]
Call Trace:
lookup_user_key+0xeb/0x6b0
? vsscanf+0x3df/0x840
? key_validate+0x50/0x50
? key_default_cmp+0x20/0x20
nvdimm_get_user_key_payload.part.0+0x21/0x110 [libnvdimm]
nvdimm_security_store+0x67d/0xb20 [libnvdimm]
security_store+0x67/0x1a0 [libnvdimm]
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xde/0x1d0
ksys_write+0x68/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xb3
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Dave Jiang <dave.jiang(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Suggested-by: David Howells <dhowells(a)redhat.com>
Fixes: 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask")
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/nvdimm/security.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 89b85970912d..4cef69bd3c1b 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -95,7 +95,7 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
struct encrypted_key_payload *epayload;
struct device *dev = &nvdimm->dev;
- keyref = lookup_user_key(id, 0, 0);
+ keyref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
if (IS_ERR(keyref))
return NULL;
7 months, 3 weeks
rename ->make_request_fn and move it to the block_device_operations
by Christoph Hellwig
Hi Jens,
this series moves the make_request_fn method into block_device_operations
with the much more descriptive ->submit_bio name. It then also gives
generic_make_request a more descriptive name, and further optimize the
path to issue to blk-mq, removing the need for the direct_make_request
bypass.
Diffstat:
Documentation/block/biodoc.rst | 2
Documentation/block/writeback_cache_control.rst | 2
Documentation/fault-injection/fault-injection.rst | 2
Documentation/trace/ftrace.rst | 4
arch/m68k/emu/nfblock.c | 8
arch/xtensa/platforms/iss/simdisk.c | 9
block/bio.c | 14 -
block/blk-cgroup.c | 2
block/blk-core.c | 255 +++++++++-------------
block/blk-crypto-fallback.c | 2
block/blk-crypto.c | 2
block/blk-merge.c | 23 -
block/blk-mq.c | 12 -
block/blk-throttle.c | 4
block/blk.h | 5
block/bounce.c | 2
drivers/block/brd.c | 5
drivers/block/drbd/drbd_int.h | 8
drivers/block/drbd/drbd_main.c | 12 -
drivers/block/drbd/drbd_receiver.c | 2
drivers/block/drbd/drbd_req.c | 8
drivers/block/drbd/drbd_worker.c | 2
drivers/block/null_blk_main.c | 19 +
drivers/block/pktcdvd.c | 15 -
drivers/block/ps3vram.c | 20 -
drivers/block/rsxx/dev.c | 14 -
drivers/block/umem.c | 11
drivers/block/zram/zram_drv.c | 14 -
drivers/lightnvm/core.c | 8
drivers/lightnvm/pblk-init.c | 16 -
drivers/lightnvm/pblk-read.c | 2
drivers/md/bcache/bcache.h | 2
drivers/md/bcache/btree.c | 2
drivers/md/bcache/request.c | 11
drivers/md/bcache/request.h | 4
drivers/md/bcache/super.c | 24 +-
drivers/md/dm-cache-target.c | 6
drivers/md/dm-clone-target.c | 10
drivers/md/dm-crypt.c | 6
drivers/md/dm-delay.c | 2
drivers/md/dm-era-target.c | 2
drivers/md/dm-integrity.c | 4
drivers/md/dm-mpath.c | 2
drivers/md/dm-raid1.c | 2
drivers/md/dm-snap-persistent.c | 2
drivers/md/dm-snap.c | 6
drivers/md/dm-thin.c | 4
drivers/md/dm-verity-target.c | 2
drivers/md/dm-writecache.c | 2
drivers/md/dm-zoned-target.c | 2
drivers/md/dm.c | 41 +--
drivers/md/md-faulty.c | 4
drivers/md/md-linear.c | 4
drivers/md/md-multipath.c | 4
drivers/md/md.c | 7
drivers/md/raid0.c | 8
drivers/md/raid1.c | 14 -
drivers/md/raid10.c | 28 +-
drivers/md/raid5.c | 10
drivers/nvdimm/blk.c | 5
drivers/nvdimm/btt.c | 5
drivers/nvdimm/pmem.c | 5
drivers/nvme/host/core.c | 1
drivers/nvme/host/multipath.c | 18 -
drivers/nvme/host/nvme.h | 1
drivers/s390/block/dcssblk.c | 11
drivers/s390/block/xpram.c | 8
fs/buffer.c | 5
include/linux/blk-mq.h | 2
include/linux/blkdev.h | 12 -
include/linux/lightnvm.h | 3
71 files changed, 387 insertions(+), 408 deletions(-)
8 months