aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c25
5 files changed, 72 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 828a9ceef1e7..8535a52a62ca 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1070,7 +1070,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
svm_range_list_lock_and_flush_work(&p->svms, current->mm);
mutex_lock(&p->svms.lock);
mmap_write_unlock(current->mm);
- if (interval_tree_iter_first(&p->svms.objects,
+
+ /* Skip a special case that allocates VRAM without VA,
+ * VA will be invalid of 0.
+ */
+ if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&
+ interval_tree_iter_first(&p->svms.objects,
args->va_addr >> PAGE_SHIFT,
(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
pr_err("Address: 0x%llx already allocated by SVM\n",
@@ -2566,8 +2571,8 @@ static int criu_restore(struct file *filep,
pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
- if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
- !args->num_devices || !args->num_bos)
+ if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||
+ !args->priv_data_size || !args->num_devices)
return -EINVAL;
mutex_lock(&p->mutex);
@@ -3252,8 +3257,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
int retcode = -EINVAL;
bool ptrace_attached = false;
- if (nr >= AMDKFD_CORE_IOCTL_COUNT)
+ if (nr >= AMDKFD_CORE_IOCTL_COUNT) {
+ retcode = -ENOTTY;
goto err_i1;
+ }
if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
u32 amdkfd_size;
@@ -3266,8 +3273,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
asize = amdkfd_size;
cmd = ioctl->cmd;
- } else
+ } else {
+ retcode = -ENOTTY;
goto err_i1;
+ }
dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 349c351e242b..e9cfb80bd436 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -495,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
mutex_init(&kfd->doorbell_mutex);
ida_init(&kfd->doorbell_ida);
+ atomic_set(&kfd->kfd_processes_count, 0);
return kfd;
}
@@ -1133,7 +1134,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
}
for (i = 0; i < kfd->num_nodes; i++) {
- node = kfd->nodes[i];
+ /* Race if another thread in b/w
+ * kfd_cleanup_nodes and kfree(kfd),
+ * when kfd->nodes[i] = NULL
+ */
+ if (kfd->nodes[i])
+ node = kfd->nodes[i];
+ else
+ return;
+
spin_lock_irqsave(&node->interrupt_lock, flags);
if (node->interrupts_active
@@ -1485,6 +1494,15 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
mutex_lock(&kfd_processes_mutex);
+ /* kfd_processes_count is per kfd_dev, return -EBUSY without
+ * further check
+ */
+ if (!!atomic_read(&kfd->kfd_processes_count)) {
+ pr_debug("process_wq_release not finished\n");
+ r = -EBUSY;
+ goto out;
+ }
+
if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 67694bcd9464..70ef051511bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -111,7 +111,14 @@
#define KFD_KERNEL_QUEUE_SIZE 2048
-#define KFD_UNMAP_LATENCY_MS (4000)
+/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC
+ * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time
+ * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is
+ * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload
+ * The format here makes CP workload 10% of total timeout
+ */
+#define KFD_UNMAP_LATENCY_MS \
+ ((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1)
#define KFD_MAX_SDMA_QUEUES 128
@@ -375,6 +382,8 @@ struct kfd_dev {
/* for dynamic partitioning */
int kfd_dev_lock;
+
+ atomic_t kfd_processes_count;
};
enum kfd_mempool {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 5be28c6c4f6a..ddfe30c13e9d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1088,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pdd->runtime_inuse = false;
}
+ atomic_dec(&pdd->dev->kfd->kfd_processes_count);
+
kfree(pdd);
p->pdds[i] = NULL;
}
@@ -1649,6 +1651,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
/* Init idr used for memory handle translation */
idr_init(&pdd->alloc_idr);
+ atomic_inc(&dev->kfd->kfd_processes_count);
+
return pdd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3d8b20828c06..827507cfed7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1714,6 +1714,29 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
next = min(vma->vm_end, end);
npages = (next - addr) >> PAGE_SHIFT;
+ /* HMM requires at least READ permissions. If provided with PROT_NONE,
+ * unmap the memory. If it's not already mapped, this is a no-op
+ * If PROT_WRITE is provided without READ, warn first then unmap
+ */
+ if (!(vma->vm_flags & VM_READ)) {
+ unsigned long e, s;
+
+ svm_range_lock(prange);
+ if (vma->vm_flags & VM_WRITE)
+ pr_debug("VM_WRITE without VM_READ is not supported");
+ s = max(start, prange->start);
+ e = min(end, prange->last);
+ if (e >= s)
+ r = svm_range_unmap_from_gpus(prange, s, e,
+ KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU);
+ svm_range_unlock(prange);
+ /* If unmap returns non-zero, we'll bail on the next for loop
+ * iteration, so just leave r and continue
+ */
+ addr = next;
+ continue;
+ }
+
WRITE_ONCE(p->svms.faulting_task, current);
r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
readonly, owner, NULL,
@@ -3023,6 +3046,8 @@ retry_write_locked:
if (svms->checkpoint_ts[gpuidx] != 0) {
if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ if (write_locked)
+ mmap_write_downgrade(mm);
r = -EAGAIN;
goto out_unlock_svms;
} else {