31 files changed, 9318 insertions, 0 deletions
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/Kconfig b/kernel/drivers/gpu/drm/amd/amdkfd/Kconfig
new file mode 100644
index 000000000..8dfac37ff
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -0,0 +1,9 @@
+#
+# Heterogenous system architecture configuration
+#
+
+config HSA_AMD
+	tristate "HSA kernel driver for AMD GPU devices"
+	depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64
+	help
+	  Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/Makefile b/kernel/drivers/gpu/drm/amd/amdkfd/Makefile
new file mode 100644
index 000000000..0f4960148
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for Heterogenous System Architecture support for AMD GPU devices
+#
+
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
+
+amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
+		kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
+		kfd_process.o kfd_queue.o kfd_mqd_manager.o \
+		kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
+		kfd_kernel_queue.o kfd_kernel_queue_cik.o \
+		kfd_kernel_queue_vi.o kfd_packet_manager.o \
+		kfd_process_queue_manager.o kfd_device_queue_manager.o \
+		kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+
+obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/kernel/drivers/gpu/drm/amd/amdkfd/cik_regs.h
new file mode 100644
index 000000000..01ff332fa
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CIK_REGS_H
+#define CIK_REGS_H
+
+#define IH_VMID_0_LUT					0x3D40u
+
+#define BIF_DOORBELL_CNTL				0x530Cu
+
+#define	SRBM_GFX_CNTL					0xE44
+#define	PIPEID(x)					((x) << 0)
+#define	MEID(x)						((x) << 2)
+#define	VMID(x)						((x) << 4)
+#define	QUEUEID(x)					((x) << 8)
+
+#define	SQ_CONFIG					0x8C00
+
+#define	SH_MEM_BASES					0x8C28
+/* if PTR32, these are the bases for scratch and lds */
+#define	PRIVATE_BASE(x)					((x) << 0) /* scratch */
+#define	SHARED_BASE(x)					((x) << 16) /* LDS */
+#define	SH_MEM_APE1_BASE				0x8C2C
+/* if PTR32, this is the base location of GPUVM */
+#define	SH_MEM_APE1_LIMIT				0x8C30
+/* if PTR32, this is the upper limit of GPUVM */
+#define	SH_MEM_CONFIG					0x8C34
+#define	PTR32						(1 << 0)
+#define PRIVATE_ATC					(1 << 1)
+#define	ALIGNMENT_MODE(x)				((x) << 2)
+#define	SH_MEM_ALIGNMENT_MODE_DWORD			0
+#define	SH_MEM_ALIGNMENT_MODE_DWORD_STRICT		1
+#define	SH_MEM_ALIGNMENT_MODE_STRICT			2
+#define	SH_MEM_ALIGNMENT_MODE_UNALIGNED			3
+#define	DEFAULT_MTYPE(x)				((x) << 4)
+#define	APE1_MTYPE(x)					((x) << 7)
+
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+#define	MTYPE_CACHED					0
+#define	MTYPE_NONCACHED					3
+
+
+#define SH_STATIC_MEM_CONFIG				0x9604u
+
+#define	TC_CFG_L1_LOAD_POLICY0				0xAC68
+#define	TC_CFG_L1_LOAD_POLICY1				0xAC6C
+#define	TC_CFG_L1_STORE_POLICY				0xAC70
+#define	TC_CFG_L2_LOAD_POLICY0				0xAC74
+#define	TC_CFG_L2_LOAD_POLICY1				0xAC78
+#define	TC_CFG_L2_STORE_POLICY0				0xAC7C
+#define	TC_CFG_L2_STORE_POLICY1				0xAC80
+#define	TC_CFG_L2_ATOMIC_POLICY				0xAC84
+#define	TC_CFG_L1_VOLATILE				0xAC88
+#define	TC_CFG_L2_VOLATILE				0xAC8C
+
+#define CP_PQ_WPTR_POLL_CNTL				0xC20C
+#define	WPTR_POLL_EN					(1 << 31)
+
+#define CPC_INT_CNTL					0xC2D0
+#define CP_ME1_PIPE0_INT_CNTL				0xC214
+#define CP_ME1_PIPE1_INT_CNTL				0xC218
+#define CP_ME1_PIPE2_INT_CNTL				0xC21C
+#define CP_ME1_PIPE3_INT_CNTL				0xC220
+#define CP_ME2_PIPE0_INT_CNTL				0xC224
+#define CP_ME2_PIPE1_INT_CNTL				0xC228
+#define CP_ME2_PIPE2_INT_CNTL				0xC22C
+#define CP_ME2_PIPE3_INT_CNTL				0xC230
+#define DEQUEUE_REQUEST_INT_ENABLE			(1 << 13)
+#define WRM_POLL_TIMEOUT_INT_ENABLE			(1 << 17)
+#define PRIV_REG_INT_ENABLE				(1 << 23)
+#define TIME_STAMP_INT_ENABLE				(1 << 26)
+#define GENERIC2_INT_ENABLE				(1 << 29)
+#define GENERIC1_INT_ENABLE				(1 << 30)
+#define GENERIC0_INT_ENABLE				(1 << 31)
+#define CP_ME1_PIPE0_INT_STATUS				0xC214
+#define CP_ME1_PIPE1_INT_STATUS				0xC218
+#define CP_ME1_PIPE2_INT_STATUS				0xC21C
+#define CP_ME1_PIPE3_INT_STATUS				0xC220
+#define CP_ME2_PIPE0_INT_STATUS				0xC224
+#define CP_ME2_PIPE1_INT_STATUS				0xC228
+#define CP_ME2_PIPE2_INT_STATUS				0xC22C
+#define CP_ME2_PIPE3_INT_STATUS				0xC230
+#define DEQUEUE_REQUEST_INT_STATUS			(1 << 13)
+#define WRM_POLL_TIMEOUT_INT_STATUS			(1 << 17)
+#define PRIV_REG_INT_STATUS				(1 << 23)
+#define TIME_STAMP_INT_STATUS				(1 << 26)
+#define GENERIC2_INT_STATUS				(1 << 29)
+#define GENERIC1_INT_STATUS				(1 << 30)
+#define GENERIC0_INT_STATUS				(1 << 31)
+
+#define CP_HPD_EOP_BASE_ADDR				0xC904
+#define CP_HPD_EOP_BASE_ADDR_HI				0xC908
+#define CP_HPD_EOP_VMID					0xC90C
+#define CP_HPD_EOP_CONTROL				0xC910
+#define	EOP_SIZE(x)					((x) << 0)
+#define	EOP_SIZE_MASK					(0x3f << 0)
+#define CP_MQD_BASE_ADDR				0xC914
+#define CP_MQD_BASE_ADDR_HI				0xC918
+#define CP_HQD_ACTIVE					0xC91C
+#define CP_HQD_VMID					0xC920
+
+#define CP_HQD_PERSISTENT_STATE				0xC924u
+#define	DEFAULT_CP_HQD_PERSISTENT_STATE			(0x33U << 8)
+#define	PRELOAD_REQ					(1 << 0)
+
+#define CP_HQD_PIPE_PRIORITY				0xC928u
+#define CP_HQD_QUEUE_PRIORITY				0xC92Cu
+#define CP_HQD_QUANTUM					0xC930u
+#define	QUANTUM_EN					1U
+#define	QUANTUM_SCALE_1MS				(1U << 4)
+#define	QUANTUM_DURATION(x)				((x) << 8)
+
+#define CP_HQD_PQ_BASE					0xC934
+#define CP_HQD_PQ_BASE_HI				0xC938
+#define CP_HQD_PQ_RPTR					0xC93C
+#define CP_HQD_PQ_RPTR_REPORT_ADDR			0xC940
+#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI			0xC944
+#define CP_HQD_PQ_WPTR_POLL_ADDR			0xC948
+#define CP_HQD_PQ_WPTR_POLL_ADDR_HI			0xC94C
+#define CP_HQD_PQ_DOORBELL_CONTROL			0xC950
+#define	DOORBELL_OFFSET(x)				((x) << 2)
+#define	DOORBELL_OFFSET_MASK				(0x1fffff << 2)
+#define	DOORBELL_SOURCE					(1 << 28)
+#define	DOORBELL_SCHD_HIT				(1 << 29)
+#define	DOORBELL_EN					(1 << 30)
+#define	DOORBELL_HIT					(1 << 31)
+#define CP_HQD_PQ_WPTR					0xC954
+#define CP_HQD_PQ_CONTROL				0xC958
+#define	QUEUE_SIZE(x)					((x) << 0)
+#define	QUEUE_SIZE_MASK					(0x3f << 0)
+#define	RPTR_BLOCK_SIZE(x)				((x) << 8)
+#define	RPTR_BLOCK_SIZE_MASK				(0x3f << 8)
+#define	MIN_AVAIL_SIZE(x)				((x) << 20)
+#define	PQ_ATC_EN					(1 << 23)
+#define	PQ_VOLATILE					(1 << 26)
+#define	NO_UPDATE_RPTR					(1 << 27)
+#define	UNORD_DISPATCH					(1 << 28)
+#define	ROQ_PQ_IB_FLIP					(1 << 29)
+#define	PRIV_STATE					(1 << 30)
+#define	KMD_QUEUE					(1 << 31)
+
+#define	DEFAULT_RPTR_BLOCK_SIZE				RPTR_BLOCK_SIZE(5)
+#define	DEFAULT_MIN_AVAIL_SIZE				MIN_AVAIL_SIZE(3)
+
+#define CP_HQD_IB_BASE_ADDR				0xC95Cu
+#define CP_HQD_IB_BASE_ADDR_HI				0xC960u
+#define CP_HQD_IB_RPTR					0xC964u
+#define CP_HQD_IB_CONTROL				0xC968u
+#define	IB_ATC_EN					(1U << 23)
+#define	DEFAULT_MIN_IB_AVAIL_SIZE			(3U << 20)
+
+#define	AQL_ENABLE					1
+
+#define CP_HQD_DEQUEUE_REQUEST				0xC974
+#define	DEQUEUE_REQUEST_DRAIN				1
+#define DEQUEUE_REQUEST_RESET				2
+#define		DEQUEUE_INT					(1U << 8)
+
+#define CP_HQD_SEMA_CMD					0xC97Cu
+#define CP_HQD_MSG_TYPE					0xC980u
+#define CP_HQD_ATOMIC0_PREOP_LO				0xC984u
+#define CP_HQD_ATOMIC0_PREOP_HI				0xC988u
+#define CP_HQD_ATOMIC1_PREOP_LO				0xC98Cu
+#define CP_HQD_ATOMIC1_PREOP_HI				0xC990u
+#define CP_HQD_HQ_SCHEDULER0				0xC994u
+#define CP_HQD_HQ_SCHEDULER1				0xC998u
+
+
+#define CP_MQD_CONTROL					0xC99C
+#define	MQD_VMID(x)					((x) << 0)
+#define	MQD_VMID_MASK					(0xf << 0)
+#define	MQD_CONTROL_PRIV_STATE_EN			(1U << 8)
+
+#define	SDMA_RB_VMID(x)					(x << 24)
+#define	SDMA_RB_ENABLE					(1 << 0)
+#define	SDMA_RB_SIZE(x)					((x) << 1) /* log2 */
+#define	SDMA_RPTR_WRITEBACK_ENABLE			(1 << 12)
+#define	SDMA_RPTR_WRITEBACK_TIMER(x)			((x) << 16) /* log2 */
+#define	SDMA_OFFSET(x)					(x << 0)
+#define	SDMA_DB_ENABLE					(1 << 28)
+#define	SDMA_ATC					(1 << 0)
+#define	SDMA_VA_PTR32					(1 << 4)
+#define	SDMA_VA_SHARED_BASE(x)				(x << 8)
+
+#define GRBM_GFX_INDEX					0x30800
+#define	INSTANCE_INDEX(x)				((x) << 0)
+#define	SH_INDEX(x)					((x) << 8)
+#define	SE_INDEX(x)					((x) << 16)
+#define	SH_BROADCAST_WRITES				(1 << 29)
+#define	INSTANCE_BROADCAST_WRITES			(1 << 30)
+#define	SE_BROADCAST_WRITES				(1 << 31)
+
+#define SQC_CACHES					0x30d20
+#define SQC_POLICY					0x8C38u
+#define SQC_VOLATILE					0x8C3Cu
+
+#define CP_PERFMON_CNTL					0x36020
+
+#define ATC_VMID0_PASID_MAPPING				0x339Cu
+#define	ATC_VMID_PASID_MAPPING_UPDATE_STATUS		0x3398u
+#define	ATC_VMID_PASID_MAPPING_VALID			(1U << 31)
+
+#define ATC_VM_APERTURE0_CNTL				0x3310u
+#define	ATS_ACCESS_MODE_NEVER				0
+#define	ATS_ACCESS_MODE_ALWAYS				1
+
+#define ATC_VM_APERTURE0_CNTL2				0x3318u
+#define ATC_VM_APERTURE0_HIGH_ADDR			0x3308u
+#define ATC_VM_APERTURE0_LOW_ADDR			0x3300u
+#define ATC_VM_APERTURE1_CNTL				0x3314u
+#define ATC_VM_APERTURE1_CNTL2				0x331Cu
+#define ATC_VM_APERTURE1_HIGH_ADDR			0x330Cu
+#define ATC_VM_APERTURE1_LOW_ADDR			0x3304u
+
+#endif
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
new file mode 100644
index 000000000..19a4fba46
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -0,0 +1,643 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <uapi/asm-generic/mman-common.h>
+#include <asm/processor.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+
+static long kfd_ioctl(struct file *, unsigned int, unsigned long);
+static int kfd_open(struct inode *, struct file *);
+static int kfd_mmap(struct file *, struct vm_area_struct *);
+
+static const char kfd_dev_name[] = "kfd";
+
+static const struct file_operations kfd_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = kfd_ioctl,
+	.compat_ioctl = kfd_ioctl,
+	.open = kfd_open,
+	.mmap = kfd_mmap,
+};
+
+static int kfd_char_dev_major = -1;
+static struct class *kfd_class;
+struct device *kfd_device;
+
+int kfd_chardev_init(void)
+{
+	int err = 0;
+
+	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
+	err = kfd_char_dev_major;
+	if (err < 0)
+		goto err_register_chrdev;
+
+	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
+	err = PTR_ERR(kfd_class);
+	if (IS_ERR(kfd_class))
+		goto err_class_create;
+
+	kfd_device = device_create(kfd_class, NULL,
+					MKDEV(kfd_char_dev_major, 0),
+					NULL, kfd_dev_name);
+	err = PTR_ERR(kfd_device);
+	if (IS_ERR(kfd_device))
+		goto err_device_create;
+
+	return 0;
+
+err_device_create:
+	class_destroy(kfd_class);
+err_class_create:
+	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
+err_register_chrdev:
+	return err;
+}
+
+void kfd_chardev_exit(void)
+{
+	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
+	class_destroy(kfd_class);
+	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
+}
+
+struct device *kfd_chardev(void)
+{
+	return kfd_device;
+}
+
+
+static int kfd_open(struct inode *inode, struct file *filep)
+{
+	struct kfd_process *process;
+	bool is_32bit_user_mode;
+
+	if (iminor(inode) != 0)
+		return -ENODEV;
+
+	is_32bit_user_mode = is_compat_task();
+
+	if (is_32bit_user_mode == true) {
+		dev_warn(kfd_device,
+			"Process %d (32-bit) failed to open /dev/kfd\n"
+			"32-bit processes are not supported by amdkfd\n",
+			current->pid);
+		return -EPERM;
+	}
+
+	process = kfd_create_process(current);
+	if (IS_ERR(process))
+		return PTR_ERR(process);
+
+	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
+		process->pasid, process->is_32bit_user_mode);
+
+	return 0;
+}
+
+static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
+					void *data)
+{
+	struct kfd_ioctl_get_version_args *args = data;
+	int err = 0;
+
+	args->major_version = KFD_IOCTL_MAJOR_VERSION;
+	args->minor_version = KFD_IOCTL_MINOR_VERSION;
+
+	return err;
+}
+
+static int set_queue_properties_from_user(struct queue_properties *q_properties,
+				struct kfd_ioctl_create_queue_args *args)
+{
+	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
+		pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+		return -EINVAL;
+	}
+
+	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
+		pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+		return -EINVAL;
+	}
+
+	if ((args->ring_base_address) &&
+		(!access_ok(VERIFY_WRITE,
+			(const void __user *) args->ring_base_address,
+			sizeof(uint64_t)))) {
+		pr_err("kfd: can't access ring base address\n");
+		return -EFAULT;
+	}
+
+	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
+		pr_err("kfd: ring size must be a power of 2 or 0\n");
+		return -EINVAL;
+	}
+
+	if (!access_ok(VERIFY_WRITE,
+			(const void __user *) args->read_pointer_address,
+			sizeof(uint32_t))) {
+		pr_err("kfd: can't access read pointer\n");
+		return -EFAULT;
+	}
+
+	if (!access_ok(VERIFY_WRITE,
+			(const void __user *) args->write_pointer_address,
+			sizeof(uint32_t))) {
+		pr_err("kfd: can't access write pointer\n");
+		return -EFAULT;
+	}
+
+	if (args->eop_buffer_address &&
+		!access_ok(VERIFY_WRITE,
+			(const void __user *) args->eop_buffer_address,
+			sizeof(uint32_t))) {
+		pr_debug("kfd: can't access eop buffer");
+		return -EFAULT;
+	}
+
+	if (args->ctx_save_restore_address &&
+		!access_ok(VERIFY_WRITE,
+			(const void __user *) args->ctx_save_restore_address,
+			sizeof(uint32_t))) {
+		pr_debug("kfd: can't access ctx save restore buffer");
+		return -EFAULT;
+	}
+
+	q_properties->is_interop = false;
+	q_properties->queue_percent = args->queue_percentage;
+	q_properties->priority = args->queue_priority;
+	q_properties->queue_address = args->ring_base_address;
+	q_properties->queue_size = args->ring_size;
+	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
+	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
+	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
+	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
+	q_properties->ctx_save_restore_area_address =
+			args->ctx_save_restore_address;
+	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
+	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
+		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
+		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
+	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
+		q_properties->type = KFD_QUEUE_TYPE_SDMA;
+	else
+		return -ENOTSUPP;
+
+	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
+		q_properties->format = KFD_QUEUE_FORMAT_AQL;
+	else
+		q_properties->format = KFD_QUEUE_FORMAT_PM4;
+
+	pr_debug("Queue Percentage (%d, %d)\n",
+			q_properties->queue_percent, args->queue_percentage);
+
+	pr_debug("Queue Priority (%d, %d)\n",
+			q_properties->priority, args->queue_priority);
+
+	pr_debug("Queue Address (0x%llX, 0x%llX)\n",
+			q_properties->queue_address, args->ring_base_address);
+
+	pr_debug("Queue Size (0x%llX, %u)\n",
+			q_properties->queue_size, args->ring_size);
+
+	pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n",
+			(uint64_t) q_properties->read_ptr,
+			(uint64_t) q_properties->write_ptr);
+
+	pr_debug("Queue Format (%d)\n", q_properties->format);
+
+	pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
+
+	pr_debug("Queue CTX save arex (0x%llX)\n",
+			q_properties->ctx_save_restore_area_address);
+
+	return 0;
+}
+
+static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
+					void *data)
+{
+	struct kfd_ioctl_create_queue_args *args = data;
+	struct kfd_dev *dev;
+	int err = 0;
+	unsigned int queue_id;
+	struct kfd_process_device *pdd;
+	struct queue_properties q_properties;
+
+	memset(&q_properties, 0, sizeof(struct queue_properties));
+
+	pr_debug("kfd: creating queue ioctl\n");
+
+	err = set_queue_properties_from_user(&q_properties, args);
+	if (err)
+		return err;
+
+	pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL) {
+		pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
+		return -EINVAL;
+	}
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = -ESRCH;
+		goto err_bind_process;
+	}
+
+	pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n",
+			p->pasid,
+			dev->id);
+
+	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
+				0, q_properties.type, &queue_id);
+	if (err != 0)
+		goto err_create_queue;
+
+	args->queue_id = queue_id;
+
+	/* Return gpu_id as doorbell offset for mmap usage */
+	args->doorbell_offset = args->gpu_id << PAGE_SHIFT;
+
+	mutex_unlock(&p->mutex);
+
+	pr_debug("kfd: queue id %d was created successfully\n", args->queue_id);
+
+	pr_debug("ring buffer address == 0x%016llX\n",
+			args->ring_base_address);
+
+	pr_debug("read ptr address    == 0x%016llX\n",
+			args->read_pointer_address);
+
+	pr_debug("write ptr address   == 0x%016llX\n",
+			args->write_pointer_address);
+
+	return 0;
+
+err_create_queue:
+err_bind_process:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	int retval;
+	struct kfd_ioctl_destroy_queue_args *args = data;
+
+	pr_debug("kfd: destroying queue id %d for PASID %d\n",
+				args->queue_id,
+				p->pasid);
+
+	mutex_lock(&p->mutex);
+
+	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
+
+	mutex_unlock(&p->mutex);
+	return retval;
+}
+
+static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	int retval;
+	struct kfd_ioctl_update_queue_args *args = data;
+	struct queue_properties properties;
+
+	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
+		pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+		return -EINVAL;
+	}
+
+	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
+		pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+		return -EINVAL;
+	}
+
+	if ((args->ring_base_address) &&
+		(!access_ok(VERIFY_WRITE,
+			(const void __user *) args->ring_base_address,
+			sizeof(uint64_t)))) {
+		pr_err("kfd: can't access ring base address\n");
+		return -EFAULT;
+	}
+
+	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
+		pr_err("kfd: ring size must be a power of 2 or 0\n");
+		return -EINVAL;
+	}
+
+	properties.queue_address = args->ring_base_address;
+	properties.queue_size = args->ring_size;
+	properties.queue_percent = args->queue_percentage;
+	properties.priority = args->queue_priority;
+
+	pr_debug("kfd: updating queue id %d for PASID %d\n",
+			args->queue_id, p->pasid);
+
+	mutex_lock(&p->mutex);
+
+	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
+
+	mutex_unlock(&p->mutex);
+
+	return retval;
+}
+
+static int kfd_ioctl_set_memory_policy(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_memory_policy_args *args = data;
+	struct kfd_dev *dev;
+	int err = 0;
+	struct kfd_process_device *pdd;
+	enum cache_policy default_policy, alternate_policy;
+
+	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
+	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
+		return -EINVAL;
+	}
+
+	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
+	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
+		return -EINVAL;
+	}
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
+			 ? cache_policy_coherent : cache_policy_noncoherent;
+
+	alternate_policy =
+		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
+		   ? cache_policy_coherent : cache_policy_noncoherent;
+
+	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
+				&pdd->qpd,
+				default_policy,
+				alternate_policy,
+				(void __user *)args->alternate_aperture_base,
+				args->alternate_aperture_size))
+		err = -EINVAL;
+
+out:
+	mutex_unlock(&p->mutex);
+
+	return err;
+}
+
+static int kfd_ioctl_get_clock_counters(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_clock_counters_args *args = data;
+	struct kfd_dev *dev;
+	struct timespec64 time;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	/* Reading GPU clock counter from KGD */
+	args->gpu_clock_counter =
+		dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
+
+	/* No access to rdtsc. Using raw monotonic time */
+	getrawmonotonic64(&time);
+	args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
+
+	get_monotonic_boottime64(&time);
+	args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
+
+	/* Since the counter is in nano-seconds we use 1GHz frequency */
+	args->system_clock_freq = 1000000000;
+
+	return 0;
+}
+
+
+static int kfd_ioctl_get_process_apertures(struct file *filp,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_process_apertures_args *args = data;
+	struct kfd_process_device_apertures *pAperture;
+	struct kfd_process_device *pdd;
+
+	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+	args->num_of_nodes = 0;
+
+	mutex_lock(&p->mutex);
+
+	/*if the process-device list isn't empty*/
+	if (kfd_has_process_device_data(p)) {
+		/* Run over all pdd of the process */
+		pdd = kfd_get_first_process_device_data(p);
+		do {
+			pAperture =
+				&args->process_apertures[args->num_of_nodes];
+			pAperture->gpu_id = pdd->dev->id;
+			pAperture->lds_base = pdd->lds_base;
+			pAperture->lds_limit = pdd->lds_limit;
+			pAperture->gpuvm_base = pdd->gpuvm_base;
+			pAperture->gpuvm_limit = pdd->gpuvm_limit;
+			pAperture->scratch_base = pdd->scratch_base;
+			pAperture->scratch_limit = pdd->scratch_limit;
+
+			dev_dbg(kfd_device,
+				"node id %u\n", args->num_of_nodes);
+			dev_dbg(kfd_device,
+				"gpu id %u\n", pdd->dev->id);
+			dev_dbg(kfd_device,
+				"lds_base %llX\n", pdd->lds_base);
+			dev_dbg(kfd_device,
+				"lds_limit %llX\n", pdd->lds_limit);
+			dev_dbg(kfd_device,
+				"gpuvm_base %llX\n", pdd->gpuvm_base);
+			dev_dbg(kfd_device,
+				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
+			dev_dbg(kfd_device,
+				"scratch_base %llX\n", pdd->scratch_base);
+			dev_dbg(kfd_device,
+				"scratch_limit %llX\n", pdd->scratch_limit);
+
+			args->num_of_nodes++;
+		} while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
+				(args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
+	}
+
+	mutex_unlock(&p->mutex);
+
+	return 0;
+}
+
+#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
+	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
+
+/** Ioctl table */
+static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
+			kfd_ioctl_get_version, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
+			kfd_ioctl_create_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
+			kfd_ioctl_destroy_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
+			kfd_ioctl_set_memory_policy, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
+			kfd_ioctl_get_clock_counters, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
+			kfd_ioctl_get_process_apertures, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
+			kfd_ioctl_update_queue, 0),
+};
+
+#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
+
+static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kfd_process *process;
+	amdkfd_ioctl_t *func;
+	const struct amdkfd_ioctl_desc *ioctl = NULL;
+	unsigned int nr = _IOC_NR(cmd);
+	char stack_kdata[128];
+	char *kdata = NULL;
+	unsigned int usize, asize;
+	int retcode = -EINVAL;
+
+	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
+		goto err_i1;
+
+	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
+		u32 amdkfd_size;
+
+		ioctl = &amdkfd_ioctls[nr];
+
+		amdkfd_size = _IOC_SIZE(ioctl->cmd);
+		usize = asize = _IOC_SIZE(cmd);
+		if (amdkfd_size > asize)
+			asize = amdkfd_size;
+
+		cmd = ioctl->cmd;
+	} else
+		goto err_i1;
+
+	dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
+
+	process = kfd_get_process(current);
+	if (IS_ERR(process)) {
+		dev_dbg(kfd_device, "no process\n");
+		goto err_i1;
+	}
+
+	/* Do not trust userspace, use our own definition */
+	func = ioctl->func;
+
+	if (unlikely(!func)) {
+		dev_dbg(kfd_device, "no function\n");
+		retcode = -EINVAL;
+		goto err_i1;
+	}
+
+	if (cmd & (IOC_IN | IOC_OUT)) {
+		if (asize <= sizeof(stack_kdata)) {
+			kdata = stack_kdata;
+		} else {
+			kdata = kmalloc(asize, GFP_KERNEL);
+			if (!kdata) {
+				retcode = -ENOMEM;
+				goto err_i1;
+			}
+		}
+		if (asize > usize)
+			memset(kdata + usize, 0, asize - usize);
+	}
+
+	if (cmd & IOC_IN) {
+		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
+			retcode = -EFAULT;
+			goto err_i1;
+		}
+	} else if (cmd & IOC_OUT) {
+		memset(kdata, 0, usize);
+	}
+
+	retcode = func(filep, process, kdata);
+
+	if (cmd & IOC_OUT)
+		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
+			retcode = -EFAULT;
+
+err_i1:
+	if (!ioctl)
+		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
+			  task_pid_nr(current), cmd, nr);
+
+	if (kdata != stack_kdata)
+		kfree(kdata);
+
+	if (retcode)
+		dev_dbg(kfd_device, "ret = %d\n", retcode);
+
+	return retcode;
+}
+
+static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct kfd_process *process;
+
+	process = kfd_get_process(current);
+	if (IS_ERR(process))
+		return PTR_ERR(process);
+
+	return kfd_doorbell_mmap(process, vma);
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
new file mode 100644
index 000000000..a374fa3d3
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_CRAT_H_INCLUDED
+#define KFD_CRAT_H_INCLUDED
+
+#include <linux/types.h>
+
+#pragma pack(1)
+
+/*
+ * 4CC signature values for the CRAT and CDIT ACPI tables
+ */
+
+#define CRAT_SIGNATURE	"CRAT"
+#define CDIT_SIGNATURE	"CDIT"
+
+/*
+ * Component Resource Association Table (CRAT)
+ */
+
+#define CRAT_OEMID_LENGTH	6
+#define CRAT_OEMTABLEID_LENGTH	8
+#define CRAT_RESERVED_LENGTH	6
+
+#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
+
+struct crat_header {
+	uint32_t	signature;
+	uint32_t	length;
+	uint8_t		revision;
+	uint8_t		checksum;
+	uint8_t		oem_id[CRAT_OEMID_LENGTH];
+	uint8_t		oem_table_id[CRAT_OEMTABLEID_LENGTH];
+	uint32_t	oem_revision;
+	uint32_t	creator_id;
+	uint32_t	creator_revision;
+	uint32_t	total_entries;
+	uint16_t	num_domains;
+	uint8_t		reserved[CRAT_RESERVED_LENGTH];
+};
+
+/*
+ * The header structure is immediately followed by total_entries of the
+ * data definitions
+ */
+
+/*
+ * The currently defined subtype entries in the CRAT
+ */
+#define CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY	0
+#define CRAT_SUBTYPE_MEMORY_AFFINITY		1
+#define CRAT_SUBTYPE_CACHE_AFFINITY		2
+#define CRAT_SUBTYPE_TLB_AFFINITY		3
+#define CRAT_SUBTYPE_CCOMPUTE_AFFINITY		4
+#define CRAT_SUBTYPE_IOLINK_AFFINITY		5
+#define CRAT_SUBTYPE_MAX			6
+
+#define CRAT_SIBLINGMAP_SIZE	32
+
+/*
+ * ComputeUnit Affinity structure and definitions
+ */
+#define CRAT_CU_FLAGS_ENABLED		0x00000001
+#define CRAT_CU_FLAGS_HOT_PLUGGABLE	0x00000002
+#define CRAT_CU_FLAGS_CPU_PRESENT	0x00000004
+#define CRAT_CU_FLAGS_GPU_PRESENT	0x00000008
+#define CRAT_CU_FLAGS_IOMMU_PRESENT	0x00000010
+#define CRAT_CU_FLAGS_RESERVED		0xffffffe0
+
+#define CRAT_COMPUTEUNIT_RESERVED_LENGTH 4
+
+struct crat_subtype_computeunit {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	proximity_domain;
+	uint32_t	processor_id_low;
+	uint16_t	num_cpu_cores;
+	uint16_t	num_simd_cores;
+	uint16_t	max_waves_simd;
+	uint16_t	io_count;
+	uint16_t	hsa_capability;
+	uint16_t	lds_size_in_kb;
+	uint8_t		wave_front_size;
+	uint8_t		num_banks;
+	uint16_t	micro_engine_id;
+	uint8_t		num_arrays;
+	uint8_t		num_cu_per_array;
+	uint8_t		num_simd_per_cu;
+	uint8_t		max_slots_scatch_cu;
+	uint8_t		reserved2[CRAT_COMPUTEUNIT_RESERVED_LENGTH];
+};
+
+/*
+ * HSA Memory Affinity structure and definitions
+ */
+#define CRAT_MEM_FLAGS_ENABLED		0x00000001
+#define CRAT_MEM_FLAGS_HOT_PLUGGABLE	0x00000002
+#define CRAT_MEM_FLAGS_NON_VOLATILE	0x00000004
+#define CRAT_MEM_FLAGS_RESERVED		0xfffffff8
+
+#define CRAT_MEMORY_RESERVED_LENGTH 8
+
+struct crat_subtype_memory {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	promixity_domain;
+	uint32_t	base_addr_low;
+	uint32_t	base_addr_high;
+	uint32_t	length_low;
+	uint32_t	length_high;
+	uint32_t	width;
+	uint8_t		reserved2[CRAT_MEMORY_RESERVED_LENGTH];
+};
+
+/*
+ * HSA Cache Affinity structure and definitions
+ */
+#define CRAT_CACHE_FLAGS_ENABLED	0x00000001
+#define CRAT_CACHE_FLAGS_DATA_CACHE	0x00000002
+#define CRAT_CACHE_FLAGS_INST_CACHE	0x00000004
+#define CRAT_CACHE_FLAGS_CPU_CACHE	0x00000008
+#define CRAT_CACHE_FLAGS_SIMD_CACHE	0x00000010
+#define CRAT_CACHE_FLAGS_RESERVED	0xffffffe0
+
+#define CRAT_CACHE_RESERVED_LENGTH 8
+
+struct crat_subtype_cache {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	processor_id_low;
+	uint8_t		sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint32_t	cache_size;
+	uint8_t		cache_level;
+	uint8_t		lines_per_tag;
+	uint16_t	cache_line_size;
+	uint8_t		associativity;
+	uint8_t		cache_properties;
+	uint16_t	cache_latency;
+	uint8_t		reserved2[CRAT_CACHE_RESERVED_LENGTH];
+};
+
+/*
+ * HSA TLB Affinity structure and definitions
+ */
+#define CRAT_TLB_FLAGS_ENABLED	0x00000001
+#define CRAT_TLB_FLAGS_DATA_TLB	0x00000002
+#define CRAT_TLB_FLAGS_INST_TLB	0x00000004
+#define CRAT_TLB_FLAGS_CPU_TLB	0x00000008
+#define CRAT_TLB_FLAGS_SIMD_TLB	0x00000010
+#define CRAT_TLB_FLAGS_RESERVED	0xffffffe0
+
+#define CRAT_TLB_RESERVED_LENGTH 4
+
+struct crat_subtype_tlb {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	processor_id_low;
+	uint8_t		sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint32_t	tlb_level;
+	uint8_t		data_tlb_associativity_2mb;
+	uint8_t		data_tlb_size_2mb;
+	uint8_t		instruction_tlb_associativity_2mb;
+	uint8_t		instruction_tlb_size_2mb;
+	uint8_t		data_tlb_associativity_4k;
+	uint8_t		data_tlb_size_4k;
+	uint8_t		instruction_tlb_associativity_4k;
+	uint8_t		instruction_tlb_size_4k;
+	uint8_t		data_tlb_associativity_1gb;
+	uint8_t		data_tlb_size_1gb;
+	uint8_t		instruction_tlb_associativity_1gb;
+	uint8_t		instruction_tlb_size_1gb;
+	uint8_t		reserved2[CRAT_TLB_RESERVED_LENGTH];
+};
+
+/*
+ * HSA CCompute/APU Affinity structure and definitions
+ */
+#define CRAT_CCOMPUTE_FLAGS_ENABLED	0x00000001
+#define CRAT_CCOMPUTE_FLAGS_RESERVED	0xfffffffe
+
+#define CRAT_CCOMPUTE_RESERVED_LENGTH 16
+
+struct crat_subtype_ccompute {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	processor_id_low;
+	uint8_t		sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint32_t	apu_size;
+	uint8_t		reserved2[CRAT_CCOMPUTE_RESERVED_LENGTH];
+};
+
+/*
+ * HSA IO Link Affinity structure and definitions
+ */
+#define CRAT_IOLINK_FLAGS_ENABLED	0x00000001
+#define CRAT_IOLINK_FLAGS_COHERENCY	0x00000002
+#define CRAT_IOLINK_FLAGS_RESERVED	0xfffffffc
+
+/*
+ * IO interface types
+ */
+#define CRAT_IOLINK_TYPE_UNDEFINED	0
+#define CRAT_IOLINK_TYPE_HYPERTRANSPORT	1
+#define CRAT_IOLINK_TYPE_PCIEXPRESS	2
+#define CRAT_IOLINK_TYPE_OTHER		3
+#define CRAT_IOLINK_TYPE_MAX		255
+
+#define CRAT_IOLINK_RESERVED_LENGTH 24
+
+struct crat_subtype_iolink {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	proximity_domain_from;
+	uint32_t	proximity_domain_to;
+	uint8_t		io_interface_type;
+	uint8_t		version_major;
+	uint16_t	version_minor;
+	uint32_t	minimum_latency;
+	uint32_t	maximum_latency;
+	uint32_t	minimum_bandwidth_mbs;
+	uint32_t	maximum_bandwidth_mbs;
+	uint32_t	recommended_transfer_size;
+	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH];
+};
+
+/*
+ * HSA generic sub-type header
+ */
+
+#define CRAT_SUBTYPE_FLAGS_ENABLED 0x00000001
+
+struct crat_subtype_generic {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+};
+
+/*
+ * Component Locality Distance Information Table (CDIT)
+ */
+#define CDIT_OEMID_LENGTH	6
+#define CDIT_OEMTABLEID_LENGTH	8
+
+struct cdit_header {
+	uint32_t	signature;
+	uint32_t	length;
+	uint8_t		revision;
+	uint8_t		checksum;
+	uint8_t		oem_id[CDIT_OEMID_LENGTH];
+	uint8_t		oem_table_id[CDIT_OEMTABLEID_LENGTH];
+	uint32_t	oem_revision;
+	uint32_t	creator_id;
+	uint32_t	creator_revision;
+	uint32_t	total_entries;
+	uint16_t	num_domains;
+	uint8_t		entry[1];
+};
+
+#pragma pack()
+
+#endif /* KFD_CRAT_H_INCLUDED */
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c
new file mode 100644
index 000000000..ca7f2d3af
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/amd-iommu.h>
+#include <linux/bsearch.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
+
+#define MQD_SIZE_ALIGNED 768
+
+static const struct kfd_device_info kaveri_device_info = {
+	.asic_family = CHIP_KAVERI,
+	.max_pasid_bits = 16,
+	.ih_ring_entry_size = 4 * sizeof(uint32_t),
+	.mqd_size_aligned = MQD_SIZE_ALIGNED
+};
+
+static const struct kfd_device_info carrizo_device_info = {
+	.asic_family = CHIP_CARRIZO,
+	.max_pasid_bits = 16,
+	.ih_ring_entry_size = 4 * sizeof(uint32_t),
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED
+};
+
+struct kfd_deviceid {
+	unsigned short did;
+	const struct kfd_device_info *device_info;
+};
+
+/* Please keep this sorted by increasing device id. */
+static const struct kfd_deviceid supported_devices[] = {
+	{ 0x1304, &kaveri_device_info },	/* Kaveri */
+	{ 0x1305, &kaveri_device_info },	/* Kaveri */
+	{ 0x1306, &kaveri_device_info },	/* Kaveri */
+	{ 0x1307, &kaveri_device_info },	/* Kaveri */
+	{ 0x1309, &kaveri_device_info },	/* Kaveri */
+	{ 0x130A, &kaveri_device_info },	/* Kaveri */
+	{ 0x130B, &kaveri_device_info },	/* Kaveri */
+	{ 0x130C, &kaveri_device_info },	/* Kaveri */
+	{ 0x130D, &kaveri_device_info },	/* Kaveri */
+	{ 0x130E, &kaveri_device_info },	/* Kaveri */
+	{ 0x130F, &kaveri_device_info },	/* Kaveri */
+	{ 0x1310, &kaveri_device_info },	/* Kaveri */
+	{ 0x1311, &kaveri_device_info },	/* Kaveri */
+	{ 0x1312, &kaveri_device_info },	/* Kaveri */
+	{ 0x1313, &kaveri_device_info },	/* Kaveri */
+	{ 0x1315, &kaveri_device_info },	/* Kaveri */
+	{ 0x1316, &kaveri_device_info },	/* Kaveri */
+	{ 0x1317, &kaveri_device_info },	/* Kaveri */
+	{ 0x1318, &kaveri_device_info },	/* Kaveri */
+	{ 0x131B, &kaveri_device_info },	/* Kaveri */
+	{ 0x131C, &kaveri_device_info },	/* Kaveri */
+	{ 0x131D, &kaveri_device_info }		/* Kaveri */
+};
+
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+				unsigned int chunk_size);
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
+
+static const struct kfd_device_info *lookup_device_info(unsigned short did)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
+		if (supported_devices[i].did == did) {
+			BUG_ON(supported_devices[i].device_info == NULL);
+			return supported_devices[i].device_info;
+		}
+	}
+
+	return NULL;
+}
+
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+	struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
+{
+	struct kfd_dev *kfd;
+
+	const struct kfd_device_info *device_info =
+					lookup_device_info(pdev->device);
+
+	if (!device_info)
+		return NULL;
+
+	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
+	if (!kfd)
+		return NULL;
+
+	kfd->kgd = kgd;
+	kfd->device_info = device_info;
+	kfd->pdev = pdev;
+	kfd->init_complete = false;
+	kfd->kfd2kgd = f2g;
+
+	mutex_init(&kfd->doorbell_mutex);
+	memset(&kfd->doorbell_available_index, 0,
+		sizeof(kfd->doorbell_available_index));
+
+	return kfd;
+}
+
+static bool device_iommu_pasid_init(struct kfd_dev *kfd)
+{
+	const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
+					AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
+					AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+
+	struct amd_iommu_device_info iommu_info;
+	unsigned int pasid_limit;
+	int err;
+
+	err = amd_iommu_device_info(kfd->pdev, &iommu_info);
+	if (err < 0) {
+		dev_err(kfd_device,
+			"error getting iommu info. is the iommu enabled?\n");
+		return false;
+	}
+
+	if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
+		dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
+		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
+		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
+		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
+		return false;
+	}
+
+	pasid_limit = min_t(unsigned int,
+			(unsigned int)1 << kfd->device_info->max_pasid_bits,
+			iommu_info.max_pasids);
+	/*
+	 * last pasid is used for kernel queues doorbells
+	 * in the future the last pasid might be used for a kernel thread.
+	 */
+	pasid_limit = min_t(unsigned int,
+				pasid_limit,
+				kfd->doorbell_process_limit - 1);
+
+	err = amd_iommu_init_device(kfd->pdev, pasid_limit);
+	if (err < 0) {
+		dev_err(kfd_device, "error initializing iommu device\n");
+		return false;
+	}
+
+	if (!kfd_set_pasid_limit(pasid_limit)) {
+		dev_err(kfd_device, "error setting pasid limit\n");
+		amd_iommu_free_device(kfd->pdev);
+		return false;
+	}
+
+	return true;
+}
+
+static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
+{
+	struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
+
+	if (dev)
+		kfd_unbind_process_from_device(dev, pasid);
+}
+
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 const struct kgd2kfd_shared_resources *gpu_resources)
+{
+	unsigned int size;
+
+	kfd->shared_resources = *gpu_resources;
+
+	/* calculate max size of mqds needed for queues */
+	size = max_num_of_queues_per_device *
+			kfd->device_info->mqd_size_aligned;
+
+	/*
+	 * calculate max size of runlist packet.
+	 * There can be only 2 packets at once
+	 */
+	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
+		max_num_of_queues_per_device *
+		sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
+
+	/* Add size of HIQ & DIQ */
+	size += KFD_KERNEL_QUEUE_SIZE * 2;
+
+	/* add another 512KB for all other allocations on gart (HPD, fences) */
+	size += 512 * 1024;
+
+	if (kfd->kfd2kgd->init_gtt_mem_allocation(
+			kfd->kgd, size, &kfd->gtt_mem,
+			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
+		dev_err(kfd_device,
+			"Could not allocate %d bytes for device (%x:%x)\n",
+			size, kfd->pdev->vendor, kfd->pdev->device);
+		goto out;
+	}
+
+	dev_info(kfd_device,
+		"Allocated %d bytes on gart for device(%x:%x)\n",
+		size, kfd->pdev->vendor, kfd->pdev->device);
+
+	/* Initialize GTT sa with 512 byte chunk size */
+	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
+		dev_err(kfd_device,
+			"Error initializing gtt sub-allocator\n");
+		goto kfd_gtt_sa_init_error;
+	}
+
+	kfd_doorbell_init(kfd);
+
+	if (kfd_topology_add_device(kfd) != 0) {
+		dev_err(kfd_device,
+			"Error adding device (%x:%x) to topology\n",
+			kfd->pdev->vendor, kfd->pdev->device);
+		goto kfd_topology_add_device_error;
+	}
+
+	if (!device_iommu_pasid_init(kfd)) {
+		dev_err(kfd_device,
+			"Error initializing iommuv2 for device (%x:%x)\n",
+			kfd->pdev->vendor, kfd->pdev->device);
+		goto device_iommu_pasid_error;
+	}
+	amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
+						iommu_pasid_shutdown_callback);
+
+	kfd->dqm = device_queue_manager_init(kfd);
+	if (!kfd->dqm) {
+		dev_err(kfd_device,
+			"Error initializing queue manager for device (%x:%x)\n",
+			kfd->pdev->vendor, kfd->pdev->device);
+		goto device_queue_manager_error;
+	}
+
+	if (kfd->dqm->ops.start(kfd->dqm) != 0) {
+		dev_err(kfd_device,
+			"Error starting queuen manager for device (%x:%x)\n",
+			kfd->pdev->vendor, kfd->pdev->device);
+		goto dqm_start_error;
+	}
+
+	kfd->init_complete = true;
+	dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
+		 kfd->pdev->device);
+
+	pr_debug("kfd: Starting kfd with the following scheduling policy %d\n",
+		sched_policy);
+
+	goto out;
+
+dqm_start_error:
+	device_queue_manager_uninit(kfd->dqm);
+device_queue_manager_error:
+	amd_iommu_free_device(kfd->pdev);
+device_iommu_pasid_error:
+	kfd_topology_remove_device(kfd);
+kfd_topology_add_device_error:
+	kfd_gtt_sa_fini(kfd);
+kfd_gtt_sa_init_error:
+	kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+	dev_err(kfd_device,
+		"device (%x:%x) NOT added due to errors\n",
+		kfd->pdev->vendor, kfd->pdev->device);
+out:
+	return kfd->init_complete;
+}
+
+void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+	if (kfd->init_complete) {
+		device_queue_manager_uninit(kfd->dqm);
+		amd_iommu_free_device(kfd->pdev);
+		kfd_topology_remove_device(kfd);
+		kfd_gtt_sa_fini(kfd);
+		kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+	}
+
+	kfree(kfd);
+}
+
+void kgd2kfd_suspend(struct kfd_dev *kfd)
+{
+	BUG_ON(kfd == NULL);
+
+	if (kfd->init_complete) {
+		kfd->dqm->ops.stop(kfd->dqm);
+		amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
+		amd_iommu_free_device(kfd->pdev);
+	}
+}
+
+int kgd2kfd_resume(struct kfd_dev *kfd)
+{
+	unsigned int pasid_limit;
+	int err;
+
+	BUG_ON(kfd == NULL);
+
+	pasid_limit = kfd_get_pasid_limit();
+
+	if (kfd->init_complete) {
+		err = amd_iommu_init_device(kfd->pdev, pasid_limit);
+		if (err < 0)
+			return -ENXIO;
+		amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
+						iommu_pasid_shutdown_callback);
+		kfd->dqm->ops.start(kfd->dqm);
+	}
+
+	return 0;
+}
+
+/* This is called directly from KGD at ISR. */
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+	/* Process interrupts / schedule work as necessary */
+}
+
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+				unsigned int chunk_size)
+{
+	unsigned int num_of_bits;
+
+	BUG_ON(!kfd);
+	BUG_ON(!kfd->gtt_mem);
+	BUG_ON(buf_size < chunk_size);
+	BUG_ON(buf_size == 0);
+	BUG_ON(chunk_size == 0);
+
+	kfd->gtt_sa_chunk_size = chunk_size;
+	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
+
+	num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
+	BUG_ON(num_of_bits == 0);
+
+	kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
+
+	if (!kfd->gtt_sa_bitmap)
+		return -ENOMEM;
+
+	pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
+			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
+
+	mutex_init(&kfd->gtt_sa_lock);
+
+	return 0;
+
+}
+
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
+{
+	mutex_destroy(&kfd->gtt_sa_lock);
+	kfree(kfd->gtt_sa_bitmap);
+}
+
+static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
+						unsigned int bit_num,
+						unsigned int chunk_size)
+{
+	return start_addr + bit_num * chunk_size;
+}
+
+static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
+						unsigned int bit_num,
+						unsigned int chunk_size)
+{
+	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
+}
+
+int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+			struct kfd_mem_obj **mem_obj)
+{
+	unsigned int found, start_search, cur_size;
+
+	BUG_ON(!kfd);
+
+	if (size == 0)
+		return -EINVAL;
+
+	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
+		return -ENOMEM;
+
+	*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	if ((*mem_obj) == NULL)
+		return -ENOMEM;
+
+	pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
+
+	start_search = 0;
+
+	mutex_lock(&kfd->gtt_sa_lock);
+
+kfd_gtt_restart_search:
+	/* Find the first chunk that is free */
+	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
+					kfd->gtt_sa_num_of_chunks,
+					start_search);
+
+	pr_debug("kfd: found = %d\n", found);
+
+	/* If there wasn't any free chunk, bail out */
+	if (found == kfd->gtt_sa_num_of_chunks)
+		goto kfd_gtt_no_free_chunk;
+
+	/* Update fields of mem_obj */
+	(*mem_obj)->range_start = found;
+	(*mem_obj)->range_end = found;
+	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
+					kfd->gtt_start_gpu_addr,
+					found,
+					kfd->gtt_sa_chunk_size);
+	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
+					kfd->gtt_start_cpu_ptr,
+					found,
+					kfd->gtt_sa_chunk_size);
+
+	pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
+			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
+
+	/* If we need only one chunk, mark it as allocated and get out */
+	if (size <= kfd->gtt_sa_chunk_size) {
+		pr_debug("kfd: single bit\n");
+		set_bit(found, kfd->gtt_sa_bitmap);
+		goto kfd_gtt_out;
+	}
+
+	/* Otherwise, try to see if we have enough contiguous chunks */
+	cur_size = size - kfd->gtt_sa_chunk_size;
+	do {
+		(*mem_obj)->range_end =
+			find_next_zero_bit(kfd->gtt_sa_bitmap,
+					kfd->gtt_sa_num_of_chunks, ++found);
+		/*
+		 * If next free chunk is not contiguous than we need to
+		 * restart our search from the last free chunk we found (which
+		 * wasn't contiguous to the previous ones
+		 */
+		if ((*mem_obj)->range_end != found) {
+			start_search = found;
+			goto kfd_gtt_restart_search;
+		}
+
+		/*
+		 * If we reached end of buffer, bail out with error
+		 */
+		if (found == kfd->gtt_sa_num_of_chunks)
+			goto kfd_gtt_no_free_chunk;
+
+		/* Check if we don't need another chunk */
+		if (cur_size <= kfd->gtt_sa_chunk_size)
+			cur_size = 0;
+		else
+			cur_size -= kfd->gtt_sa_chunk_size;
+
+	} while (cur_size > 0);
+
+	pr_debug("kfd: range_start = %d, range_end = %d\n",
+		(*mem_obj)->range_start, (*mem_obj)->range_end);
+
+	/* Mark the chunks as allocated */
+	for (found = (*mem_obj)->range_start;
+		found <= (*mem_obj)->range_end;
+		found++)
+		set_bit(found, kfd->gtt_sa_bitmap);
+
+kfd_gtt_out:
+	mutex_unlock(&kfd->gtt_sa_lock);
+	return 0;
+
+kfd_gtt_no_free_chunk:
+	pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
+	mutex_unlock(&kfd->gtt_sa_lock);
+	kfree(mem_obj);
+	return -ENOMEM;
+}
+
+int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
+{
+	unsigned int bit;
+
+	BUG_ON(!kfd);
+
+	/* Act like kfree when trying to free a NULL object */
+	if (!mem_obj)
+		return 0;
+
+	pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
+			mem_obj, mem_obj->range_start, mem_obj->range_end);
+
+	mutex_lock(&kfd->gtt_sa_lock);
+
+	/* Mark the chunks as free */
+	for (bit = mem_obj->range_start;
+		bit <= mem_obj->range_end;
+		bit++)
+		clear_bit(bit, kfd->gtt_sa_bitmap);
+
+	mutex_unlock(&kfd->gtt_sa_lock);
+
+	kfree(mem_obj);
+	return 0;
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
new file mode 100644
index 000000000..596ee5cd3
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -0,0 +1,1217 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/bitops.h>
+#include <linux/sched.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_mqd_manager.h"
+#include "cik_regs.h"
+#include "kfd_kernel_queue.h"
+
+/* Size of the per-pipe EOP queue */
+#define CIK_HPD_EOP_BYTES_LOG2 11
+#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+
+static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
+					unsigned int pasid, unsigned int vmid);
+
+static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
+					struct queue *q,
+					struct qcm_process_device *qpd);
+
+static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+					struct queue *q,
+					struct qcm_process_device *qpd);
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+				unsigned int sdma_queue_id);
+
+static inline
+enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
+{
+	if (type == KFD_QUEUE_TYPE_SDMA)
+		return KFD_MQD_TYPE_SDMA;
+	return KFD_MQD_TYPE_CP;
+}
+
+unsigned int get_first_pipe(struct device_queue_manager *dqm)
+{
+	BUG_ON(!dqm || !dqm->dev);
+	return dqm->dev->shared_resources.first_compute_pipe;
+}
+
+unsigned int get_pipes_num(struct device_queue_manager *dqm)
+{
+	BUG_ON(!dqm || !dqm->dev);
+	return dqm->dev->shared_resources.compute_pipe_count;
+}
+
+static inline unsigned int get_pipes_num_cpsch(void)
+{
+	return PIPE_PER_ME_CP_SCHEDULING;
+}
+
+void program_sh_mem_settings(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	return dqm->dev->kfd2kgd->program_sh_mem_settings(
+						dqm->dev->kgd, qpd->vmid,
+						qpd->sh_mem_config,
+						qpd->sh_mem_ape1_base,
+						qpd->sh_mem_ape1_limit,
+						qpd->sh_mem_bases);
+}
+
+static int allocate_vmid(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd,
+			struct queue *q)
+{
+	int bit, allocated_vmid;
+
+	if (dqm->vmid_bitmap == 0)
+		return -ENOMEM;
+
+	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
+	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
+
+	/* Kaveri kfd vmid's starts from vmid 8 */
+	allocated_vmid = bit + KFD_VMID_START_OFFSET;
+	pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
+	qpd->vmid = allocated_vmid;
+	q->properties.vmid = allocated_vmid;
+
+	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
+	program_sh_mem_settings(dqm, qpd);
+
+	return 0;
+}
+
+static void deallocate_vmid(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int bit = qpd->vmid - KFD_VMID_START_OFFSET;
+
+	/* Release the vmid mapping */
+	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+
+	set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
+	qpd->vmid = 0;
+	q->properties.vmid = 0;
+}
+
+static int create_queue_nocpsch(struct device_queue_manager *dqm,
+				struct queue *q,
+				struct qcm_process_device *qpd,
+				int *allocated_vmid)
+{
+	int retval;
+
+	BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
+
+	pr_debug("kfd: In func %s\n", __func__);
+	print_queue(q);
+
+	mutex_lock(&dqm->lock);
+
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+
+	if (list_empty(&qpd->queues_list)) {
+		retval = allocate_vmid(dqm, qpd, q);
+		if (retval != 0) {
+			mutex_unlock(&dqm->lock);
+			return retval;
+		}
+	}
+	*allocated_vmid = qpd->vmid;
+	q->properties.vmid = qpd->vmid;
+
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+		retval = create_compute_queue_nocpsch(dqm, q, qpd);
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
+
+	if (retval != 0) {
+		if (list_empty(&qpd->queues_list)) {
+			deallocate_vmid(dqm, qpd, q);
+			*allocated_vmid = 0;
+		}
+		mutex_unlock(&dqm->lock);
+		return retval;
+	}
+
+	list_add(&q->list, &qpd->queues_list);
+	if (q->properties.is_active)
+		dqm->queue_count++;
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+		dqm->sdma_queue_count++;
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+	mutex_unlock(&dqm->lock);
+	return 0;
+}
+
+static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
+{
+	bool set;
+	int pipe, bit, i;
+
+	set = false;
+
+	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
+			pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
+		if (dqm->allocated_queues[pipe] != 0) {
+			bit = find_first_bit(
+				(unsigned long *)&dqm->allocated_queues[pipe],
+				QUEUES_PER_PIPE);
+
+			clear_bit(bit,
+				(unsigned long *)&dqm->allocated_queues[pipe]);
+			q->pipe = pipe;
+			q->queue = bit;
+			set = true;
+			break;
+		}
+	}
+
+	if (set == false)
+		return -EBUSY;
+
+	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
+				__func__, q->pipe, q->queue);
+	/* horizontal hqd allocation */
+	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
+
+	return 0;
+}
+
+static inline void deallocate_hqd(struct device_queue_manager *dqm,
+				struct queue *q)
+{
+	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
+}
+
+static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
+					struct queue *q,
+					struct qcm_process_device *qpd)
+{
+	int retval;
+	struct mqd_manager *mqd;
+
+	BUG_ON(!dqm || !q || !qpd);
+
+	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
+	if (mqd == NULL)
+		return -ENOMEM;
+
+	retval = allocate_hqd(dqm, q);
+	if (retval != 0)
+		return retval;
+
+	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
+				&q->gart_mqd_addr, &q->properties);
+	if (retval != 0) {
+		deallocate_hqd(dqm, q);
+		return retval;
+	}
+
+	pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
+			q->pipe,
+			q->queue);
+
+	retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
+			q->queue, (uint32_t __user *) q->properties.write_ptr);
+	if (retval != 0) {
+		deallocate_hqd(dqm, q);
+		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+		return retval;
+	}
+
+	return 0;
+}
+
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+	struct mqd_manager *mqd;
+
+	BUG_ON(!dqm || !q || !q->mqd || !qpd);
+
+	retval = 0;
+
+	pr_debug("kfd: In Func %s\n", __func__);
+
+	mutex_lock(&dqm->lock);
+
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
+		if (mqd == NULL) {
+			retval = -ENOMEM;
+			goto out;
+		}
+		deallocate_hqd(dqm, q);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
+		if (mqd == NULL) {
+			retval = -ENOMEM;
+			goto out;
+		}
+		dqm->sdma_queue_count--;
+		deallocate_sdma_queue(dqm, q->sdma_id);
+	} else {
+		pr_debug("q->properties.type is invalid (%d)\n",
+				q->properties.type);
+		retval = -EINVAL;
+		goto out;
+	}
+
+	retval = mqd->destroy_mqd(mqd, q->mqd,
+				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
+				q->pipe, q->queue);
+
+	if (retval != 0)
+		goto out;
+
+	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+
+	list_del(&q->list);
+	if (list_empty(&qpd->queues_list))
+		deallocate_vmid(dqm, qpd, q);
+	if (q->properties.is_active)
+		dqm->queue_count--;
+
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+out:
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+{
+	int retval;
+	struct mqd_manager *mqd;
+	bool prev_active = false;
+
+	BUG_ON(!dqm || !q || !q->mqd);
+
+	mutex_lock(&dqm->lock);
+	mqd = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+	if (mqd == NULL) {
+		mutex_unlock(&dqm->lock);
+		return -ENOMEM;
+	}
+
+	if (q->properties.is_active == true)
+		prev_active = true;
+
+	/*
+	 *
+	 * check active state vs. the previous state
+	 * and modify counter accordingly
+	 */
+	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
+	if ((q->properties.is_active == true) && (prev_active == false))
+		dqm->queue_count++;
+	else if ((q->properties.is_active == false) && (prev_active == true))
+		dqm->queue_count--;
+
+	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
+		retval = execute_queues_cpsch(dqm, false);
+
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+static struct mqd_manager *get_mqd_manager_nocpsch(
+		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
+{
+	struct mqd_manager *mqd;
+
+	BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
+
+	pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
+
+	mqd = dqm->mqds[type];
+	if (!mqd) {
+		mqd = mqd_manager_init(type, dqm->dev);
+		if (mqd == NULL)
+			pr_err("kfd: mqd manager is NULL");
+		dqm->mqds[type] = mqd;
+	}
+
+	return mqd;
+}
+
+static int register_process_nocpsch(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	struct device_process_node *n;
+	int retval;
+
+	BUG_ON(!dqm || !qpd);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+
+	n->qpd = qpd;
+
+	mutex_lock(&dqm->lock);
+	list_add(&n->list, &dqm->queues);
+
+	retval = dqm->ops_asic_specific.register_process(dqm, qpd);
+
+	dqm->processes_count++;
+
+	mutex_unlock(&dqm->lock);
+
+	return retval;
+}
+
+static int unregister_process_nocpsch(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	int retval;
+	struct device_process_node *cur, *next;
+
+	BUG_ON(!dqm || !qpd);
+
+	pr_debug("In func %s\n", __func__);
+
+	pr_debug("qpd->queues_list is %s\n",
+			list_empty(&qpd->queues_list) ? "empty" : "not empty");
+
+	retval = 0;
+	mutex_lock(&dqm->lock);
+
+	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			goto out;
+		}
+	}
+	/* qpd not found in dqm list */
+	retval = 1;
+out:
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+static int
+set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
+			unsigned int vmid)
+{
+	uint32_t pasid_mapping;
+
+	pasid_mapping = (pasid == 0) ? 0 :
+		(uint32_t)pasid |
+		ATC_VMID_PASID_MAPPING_VALID;
+
+	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
+						dqm->dev->kgd, pasid_mapping,
+						vmid);
+}
+
+int init_pipelines(struct device_queue_manager *dqm,
+			unsigned int pipes_num, unsigned int first_pipe)
+{
+	void *hpdptr;
+	struct mqd_manager *mqd;
+	unsigned int i, err, inx;
+	uint64_t pipe_hpd_addr;
+
+	BUG_ON(!dqm || !dqm->dev);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	/*
+	 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
+	 * The driver never accesses this memory after zeroing it.
+	 * It doesn't even have to be saved/restored on suspend/resume
+	 * because it contains no data when there are no active queues.
+	 */
+
+	err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
+					&dqm->pipeline_mem);
+
+	if (err) {
+		pr_err("kfd: error allocate vidmem num pipes: %d\n",
+			pipes_num);
+		return -ENOMEM;
+	}
+
+	hpdptr = dqm->pipeline_mem->cpu_ptr;
+	dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
+
+	memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
+
+	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
+	if (mqd == NULL) {
+		kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < pipes_num; i++) {
+		inx = i + first_pipe;
+		/*
+		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
+		 * space in GTT for pipelines we don't initialize
+		 */
+		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
+		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
+		/* = log2(bytes/4)-1 */
+		dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
+				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
+	}
+
+	return 0;
+}
+
+static int init_scheduler(struct device_queue_manager *dqm)
+{
+	int retval;
+
+	BUG_ON(!dqm);
+
+	pr_debug("kfd: In %s\n", __func__);
+
+	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
+	return retval;
+}
+
+static int initialize_nocpsch(struct device_queue_manager *dqm)
+{
+	int i;
+
+	BUG_ON(!dqm);
+
+	pr_debug("kfd: In func %s num of pipes: %d\n",
+			__func__, get_pipes_num(dqm));
+
+	mutex_init(&dqm->lock);
+	INIT_LIST_HEAD(&dqm->queues);
+	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+	dqm->sdma_queue_count = 0;
+	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
+					sizeof(unsigned int), GFP_KERNEL);
+	if (!dqm->allocated_queues) {
+		mutex_destroy(&dqm->lock);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < get_pipes_num(dqm); i++)
+		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
+
+	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
+	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
+
+	init_scheduler(dqm);
+	return 0;
+}
+
+static void uninitialize_nocpsch(struct device_queue_manager *dqm)
+{
+	int i;
+
+	BUG_ON(!dqm);
+
+	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+
+	kfree(dqm->allocated_queues);
+	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
+		kfree(dqm->mqds[i]);
+	mutex_destroy(&dqm->lock);
+	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
+}
+
+static int start_nocpsch(struct device_queue_manager *dqm)
+{
+	return 0;
+}
+
+static int stop_nocpsch(struct device_queue_manager *dqm)
+{
+	return 0;
+}
+
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
+				unsigned int *sdma_queue_id)
+{
+	int bit;
+
+	if (dqm->sdma_bitmap == 0)
+		return -ENOMEM;
+
+	bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
+				CIK_SDMA_QUEUES);
+
+	clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
+	*sdma_queue_id = bit;
+
+	return 0;
+}
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+				unsigned int sdma_queue_id)
+{
+	if (sdma_queue_id >= CIK_SDMA_QUEUES)
+		return;
+	set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
+}
+
+static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
+				struct qcm_process_device *qpd)
+{
+	uint32_t value = SDMA_ATC;
+
+	if (q->process->is_32bit_user_mode)
+		value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
+	else
+		value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
+							qpd_to_pdd(qpd)));
+	q->properties.sdma_vm_addr = value;
+}
+
+static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
+					struct queue *q,
+					struct qcm_process_device *qpd)
+{
+	struct mqd_manager *mqd;
+	int retval;
+
+	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
+	if (!mqd)
+		return -ENOMEM;
+
+	retval = allocate_sdma_queue(dqm, &q->sdma_id);
+	if (retval != 0)
+		return retval;
+
+	q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+	q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
+
+	pr_debug("kfd: sdma id is:    %d\n", q->sdma_id);
+	pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
+	pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);
+
+	init_sdma_vm(dqm, q, qpd);
+	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
+				&q->gart_mqd_addr, &q->properties);
+	if (retval != 0) {
+		deallocate_sdma_queue(dqm, q->sdma_id);
+		return retval;
+	}
+
+	retval = mqd->load_mqd(mqd, q->mqd, 0,
+				0, NULL);
+	if (retval != 0) {
+		deallocate_sdma_queue(dqm, q->sdma_id);
+		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+		return retval;
+	}
+
+	return 0;
+}
+
+/*
+ * Device Queue Manager implementation for cp scheduler
+ */
+
+static int set_sched_resources(struct device_queue_manager *dqm)
+{
+	struct scheduling_resources res;
+	unsigned int queue_num, queue_mask;
+
+	BUG_ON(!dqm);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
+	queue_mask = (1 << queue_num) - 1;
+	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
+	res.vmid_mask <<= KFD_VMID_START_OFFSET;
+	res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
+	res.gws_mask = res.oac_mask = res.gds_heap_base =
+						res.gds_heap_size = 0;
+
+	pr_debug("kfd: scheduling resources:\n"
+			"      vmid mask: 0x%8X\n"
+			"      queue mask: 0x%8llX\n",
+			res.vmid_mask, res.queue_mask);
+
+	return pm_send_set_resources(&dqm->packets, &res);
+}
+
+static int initialize_cpsch(struct device_queue_manager *dqm)
+{
+	int retval;
+
+	BUG_ON(!dqm);
+
+	pr_debug("kfd: In func %s num of pipes: %d\n",
+			__func__, get_pipes_num_cpsch());
+
+	mutex_init(&dqm->lock);
+	INIT_LIST_HEAD(&dqm->queues);
+	dqm->queue_count = dqm->processes_count = 0;
+	dqm->sdma_queue_count = 0;
+	dqm->active_runlist = false;
+	retval = dqm->ops_asic_specific.initialize(dqm);
+	if (retval != 0)
+		goto fail_init_pipelines;
+
+	return 0;
+
+fail_init_pipelines:
+	mutex_destroy(&dqm->lock);
+	return retval;
+}
+
+static int start_cpsch(struct device_queue_manager *dqm)
+{
+	struct device_process_node *node;
+	int retval;
+
+	BUG_ON(!dqm);
+
+	retval = 0;
+
+	retval = pm_init(&dqm->packets, dqm);
+	if (retval != 0)
+		goto fail_packet_manager_init;
+
+	retval = set_sched_resources(dqm);
+	if (retval != 0)
+		goto fail_set_sched_resources;
+
+	pr_debug("kfd: allocating fence memory\n");
+
+	/* allocate fence memory on the gart */
+	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
+					&dqm->fence_mem);
+
+	if (retval != 0)
+		goto fail_allocate_vidmem;
+
+	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
+	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
+	list_for_each_entry(node, &dqm->queues, list)
+		if (node->qpd->pqm->process && dqm->dev)
+			kfd_bind_process_to_device(dqm->dev,
+						node->qpd->pqm->process);
+
+	execute_queues_cpsch(dqm, true);
+
+	return 0;
+fail_allocate_vidmem:
+fail_set_sched_resources:
+	pm_uninit(&dqm->packets);
+fail_packet_manager_init:
+	return retval;
+}
+
+static int stop_cpsch(struct device_queue_manager *dqm)
+{
+	struct device_process_node *node;
+	struct kfd_process_device *pdd;
+
+	BUG_ON(!dqm);
+
+	destroy_queues_cpsch(dqm, true);
+
+	list_for_each_entry(node, &dqm->queues, list) {
+		pdd = qpd_to_pdd(node->qpd);
+		pdd->bound = false;
+	}
+	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
+	pm_uninit(&dqm->packets);
+
+	return 0;
+}
+
+static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd)
+{
+	BUG_ON(!dqm || !kq || !qpd);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	mutex_lock(&dqm->lock);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		mutex_unlock(&dqm->lock);
+		return -EPERM;
+	}
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+	list_add(&kq->list, &qpd->priv_queue_list);
+	dqm->queue_count++;
+	qpd->is_debug = true;
+	execute_queues_cpsch(dqm, false);
+	mutex_unlock(&dqm->lock);
+
+	return 0;
+}
+
+static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd)
+{
+	BUG_ON(!dqm || !kq);
+
+	pr_debug("kfd: In %s\n", __func__);
+
+	mutex_lock(&dqm->lock);
+	destroy_queues_cpsch(dqm, false);
+	list_del(&kq->list);
+	dqm->queue_count--;
+	qpd->is_debug = false;
+	execute_queues_cpsch(dqm, false);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type.
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+	mutex_unlock(&dqm->lock);
+}
+
+static void select_sdma_engine_id(struct queue *q)
+{
+	static int sdma_id;
+
+	q->sdma_id = sdma_id;
+	sdma_id = (sdma_id + 1) % 2;
+}
+
+static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+			struct qcm_process_device *qpd, int *allocate_vmid)
+{
+	int retval;
+	struct mqd_manager *mqd;
+
+	BUG_ON(!dqm || !q || !qpd);
+
+	retval = 0;
+
+	if (allocate_vmid)
+		*allocate_vmid = 0;
+
+	mutex_lock(&dqm->lock);
+
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out;
+	}
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+		select_sdma_engine_id(q);
+
+	mqd = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+
+	if (mqd == NULL) {
+		mutex_unlock(&dqm->lock);
+		return -ENOMEM;
+	}
+
+	init_sdma_vm(dqm, q, qpd);
+
+	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
+				&q->gart_mqd_addr, &q->properties);
+	if (retval != 0)
+		goto out;
+
+	list_add(&q->list, &qpd->queues_list);
+	if (q->properties.is_active) {
+		dqm->queue_count++;
+		retval = execute_queues_cpsch(dqm, false);
+	}
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+			dqm->sdma_queue_count++;
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+out:
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+				unsigned int fence_value,
+				unsigned long timeout)
+{
+	BUG_ON(!fence_addr);
+	timeout += jiffies;
+
+	while (*fence_addr != fence_value) {
+		if (time_after(jiffies, timeout)) {
+			pr_err("kfd: qcm fence wait loop timeout expired\n");
+			return -ETIME;
+		}
+		schedule();
+	}
+
+	return 0;
+}
+
+static int destroy_sdma_queues(struct device_queue_manager *dqm,
+				unsigned int sdma_engine)
+{
+	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+			sdma_engine);
+}
+
+static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+{
+	int retval;
+
+	BUG_ON(!dqm);
+
+	retval = 0;
+
+	if (lock)
+		mutex_lock(&dqm->lock);
+	if (dqm->active_runlist == false)
+		goto out;
+
+	pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
+		dqm->sdma_queue_count);
+
+	if (dqm->sdma_queue_count > 0) {
+		destroy_sdma_queues(dqm, 0);
+		destroy_sdma_queues(dqm, 1);
+	}
+
+	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
+			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+	if (retval != 0)
+		goto out;
+
+	*dqm->fence_addr = KFD_FENCE_INIT;
+	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
+				KFD_FENCE_COMPLETED);
+	/* should be timed out */
+	amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
+				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+	pm_release_ib(&dqm->packets);
+	dqm->active_runlist = false;
+
+out:
+	if (lock)
+		mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+{
+	int retval;
+
+	BUG_ON(!dqm);
+
+	if (lock)
+		mutex_lock(&dqm->lock);
+
+	retval = destroy_queues_cpsch(dqm, false);
+	if (retval != 0) {
+		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
+		goto out;
+	}
+
+	if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
+		retval = 0;
+		goto out;
+	}
+
+	if (dqm->active_runlist) {
+		retval = 0;
+		goto out;
+	}
+
+	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
+	if (retval != 0) {
+		pr_err("kfd: failed to execute runlist");
+		goto out;
+	}
+	dqm->active_runlist = true;
+
+out:
+	if (lock)
+		mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+	struct mqd_manager *mqd;
+
+	BUG_ON(!dqm || !qpd || !q);
+
+	retval = 0;
+
+	/* remove queue from list to prevent rescheduling after preemption */
+	mutex_lock(&dqm->lock);
+	mqd = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+	if (!mqd) {
+		retval = -ENOMEM;
+		goto failed;
+	}
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+		dqm->sdma_queue_count--;
+
+	list_del(&q->list);
+	if (q->properties.is_active)
+		dqm->queue_count--;
+
+	execute_queues_cpsch(dqm, false);
+
+	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+	mutex_unlock(&dqm->lock);
+
+	return 0;
+
+failed:
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
+static bool set_cache_memory_policy(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size)
+{
+	bool retval;
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	mutex_lock(&dqm->lock);
+
+	if (alternate_aperture_size == 0) {
+		/* base > limit disables APE1 */
+		qpd->sh_mem_ape1_base = 1;
+		qpd->sh_mem_ape1_limit = 0;
+	} else {
+		/*
+		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
+		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+		 * Verify that the base and size parameters can be
+		 * represented in this format and convert them.
+		 * Additionally restrict APE1 to user-mode addresses.
+		 */
+
+		uint64_t base = (uintptr_t)alternate_aperture_base;
+		uint64_t limit = base + alternate_aperture_size - 1;
+
+		if (limit <= base)
+			goto out;
+
+		if ((base & APE1_FIXED_BITS_MASK) != 0)
+			goto out;
+
+		if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
+			goto out;
+
+		qpd->sh_mem_ape1_base = base >> 16;
+		qpd->sh_mem_ape1_limit = limit >> 16;
+	}
+
+	retval = dqm->ops_asic_specific.set_cache_memory_policy(
+			dqm,
+			qpd,
+			default_policy,
+			alternate_policy,
+			alternate_aperture_base,
+			alternate_aperture_size);
+
+	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
+		program_sh_mem_settings(dqm, qpd);
+
+	pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
+		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
+		qpd->sh_mem_ape1_limit);
+
+	mutex_unlock(&dqm->lock);
+	return retval;
+
+out:
+	mutex_unlock(&dqm->lock);
+	return false;
+}
+
+struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
+{
+	struct device_queue_manager *dqm;
+
+	BUG_ON(!dev);
+
+	pr_debug("kfd: loading device queue manager\n");
+
+	dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
+	if (!dqm)
+		return NULL;
+
+	dqm->dev = dev;
+	switch (sched_policy) {
+	case KFD_SCHED_POLICY_HWS:
+	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
+		/* initialize dqm for cp scheduling */
+		dqm->ops.create_queue = create_queue_cpsch;
+		dqm->ops.initialize = initialize_cpsch;
+		dqm->ops.start = start_cpsch;
+		dqm->ops.stop = stop_cpsch;
+		dqm->ops.destroy_queue = destroy_queue_cpsch;
+		dqm->ops.update_queue = update_queue;
+		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
+		dqm->ops.register_process = register_process_nocpsch;
+		dqm->ops.unregister_process = unregister_process_nocpsch;
+		dqm->ops.uninitialize = uninitialize_nocpsch;
+		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
+		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
+		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		break;
+	case KFD_SCHED_POLICY_NO_HWS:
+		/* initialize dqm for no cp scheduling */
+		dqm->ops.start = start_nocpsch;
+		dqm->ops.stop = stop_nocpsch;
+		dqm->ops.create_queue = create_queue_nocpsch;
+		dqm->ops.destroy_queue = destroy_queue_nocpsch;
+		dqm->ops.update_queue = update_queue;
+		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
+		dqm->ops.register_process = register_process_nocpsch;
+		dqm->ops.unregister_process = unregister_process_nocpsch;
+		dqm->ops.initialize = initialize_nocpsch;
+		dqm->ops.uninitialize = uninitialize_nocpsch;
+		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	switch (dev->device_info->asic_family) {
+	case CHIP_CARRIZO:
+		device_queue_manager_init_vi(&dqm->ops_asic_specific);
+		break;
+
+	case CHIP_KAVERI:
+		device_queue_manager_init_cik(&dqm->ops_asic_specific);
+		break;
+	}
+
+	if (dqm->ops.initialize(dqm) != 0) {
+		kfree(dqm);
+		return NULL;
+	}
+
+	return dqm;
+}
+
+void device_queue_manager_uninit(struct device_queue_manager *dqm)
+{
+	BUG_ON(!dqm);
+
+	dqm->ops.uninitialize(dqm);
+	kfree(dqm);
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
new file mode 100644
index 000000000..488f51d19
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_DEVICE_QUEUE_MANAGER_H_
+#define KFD_DEVICE_QUEUE_MANAGER_H_
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+
+#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS	(500)
+#define QUEUES_PER_PIPE				(8)
+#define PIPE_PER_ME_CP_SCHEDULING		(3)
+#define CIK_VMID_NUM				(8)
+#define KFD_VMID_START_OFFSET			(8)
+#define VMID_PER_DEVICE				CIK_VMID_NUM
+#define KFD_DQM_FIRST_PIPE			(0)
+#define CIK_SDMA_QUEUES				(4)
+#define CIK_SDMA_QUEUES_PER_ENGINE		(2)
+#define CIK_SDMA_ENGINE_NUM			(2)
+
+struct device_process_node {
+	struct qcm_process_device *qpd;
+	struct list_head list;
+};
+
+/**
+ * struct device_queue_manager_ops
+ *
+ * @create_queue: Queue creation routine.
+ *
+ * @destroy_queue: Queue destruction routine.
+ *
+ * @update_queue: Queue update routine.
+ *
+ * @get_mqd_manager: Returns the mqd manager according to the mqd type.
+ *
+ * @exeute_queues: Dispatches the queues list to the H/W.
+ *
+ * @register_process: This routine associates a specific process with device.
+ *
+ * @unregister_process: destroys the associations between process to device.
+ *
+ * @initialize: Initializes the pipelines and memory module for that device.
+ *
+ * @start: Initializes the resources/modules the the device needs for queues
+ * execution. This function is called on device initialization and after the
+ * system woke up after suspension.
+ *
+ * @stop: This routine stops execution of all the active queue running on the
+ * H/W and basically this function called on system suspend.
+ *
+ * @uninitialize: Destroys all the device queue manager resources allocated in
+ * initialize routine.
+ *
+ * @create_kernel_queue: Creates kernel queue. Used for debug queue.
+ *
+ * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue.
+ *
+ * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
+ * memory apertures.
+ *
+ */
+
+struct device_queue_manager_ops {
+	int	(*create_queue)(struct device_queue_manager *dqm,
+				struct queue *q,
+				struct qcm_process_device *qpd,
+				int *allocate_vmid);
+	int	(*destroy_queue)(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q);
+	int	(*update_queue)(struct device_queue_manager *dqm,
+				struct queue *q);
+
+	struct mqd_manager * (*get_mqd_manager)
+					(struct device_queue_manager *dqm,
+					enum KFD_MQD_TYPE type);
+
+	int	(*register_process)(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+	int	(*unregister_process)(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+	int	(*initialize)(struct device_queue_manager *dqm);
+	int	(*start)(struct device_queue_manager *dqm);
+	int	(*stop)(struct device_queue_manager *dqm);
+	void	(*uninitialize)(struct device_queue_manager *dqm);
+	int	(*create_kernel_queue)(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd);
+	void	(*destroy_kernel_queue)(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd);
+	bool	(*set_cache_memory_policy)(struct device_queue_manager *dqm,
+					   struct qcm_process_device *qpd,
+					   enum cache_policy default_policy,
+					   enum cache_policy alternate_policy,
+					   void __user *alternate_aperture_base,
+					   uint64_t alternate_aperture_size);
+};
+
+/**
+ * struct device_queue_manager
+ *
+ * This struct is a base class for the kfd queues scheduler in the
+ * device level. The device base class should expose the basic operations
+ * for queue creation and queue destruction. This base class hides the
+ * scheduling mode of the driver and the specific implementation of the
+ * concrete device. This class is the only class in the queues scheduler
+ * that configures the H/W.
+ *
+ */
+
+struct device_queue_manager {
+	struct device_queue_manager_ops ops;
+	struct device_queue_manager_ops ops_asic_specific;
+
+	struct mqd_manager	*mqds[KFD_MQD_TYPE_MAX];
+	struct packet_manager	packets;
+	struct kfd_dev		*dev;
+	struct mutex		lock;
+	struct list_head	queues;
+	unsigned int		processes_count;
+	unsigned int		queue_count;
+	unsigned int		sdma_queue_count;
+	unsigned int		total_queue_count;
+	unsigned int		next_pipe_to_allocate;
+	unsigned int		*allocated_queues;
+	unsigned int		sdma_bitmap;
+	unsigned int		vmid_bitmap;
+	uint64_t		pipelines_addr;
+	struct kfd_mem_obj	*pipeline_mem;
+	uint64_t		fence_gpu_addr;
+	unsigned int		*fence_addr;
+	struct kfd_mem_obj	*fence_mem;
+	bool			active_runlist;
+};
+
+void device_queue_manager_init_cik(struct device_queue_manager_ops *ops);
+void device_queue_manager_init_vi(struct device_queue_manager_ops *ops);
+void program_sh_mem_settings(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+int init_pipelines(struct device_queue_manager *dqm,
+		unsigned int pipes_num, unsigned int first_pipe);
+unsigned int get_first_pipe(struct device_queue_manager *dqm);
+unsigned int get_pipes_num(struct device_queue_manager *dqm);
+
+extern inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
+{
+	return (pdd->lds_base >> 16) & 0xFF;
+}
+
+extern inline unsigned int
+get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
+{
+	return (pdd->lds_base >> 60) & 0x0E;
+}
+
+#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
new file mode 100644
index 000000000..5469efe05
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "cik_regs.h"
+
+static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size);
+static int register_process_cik(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+static int initialize_cpsch_cik(struct device_queue_manager *dqm);
+
+void device_queue_manager_init_cik(struct device_queue_manager_ops *ops)
+{
+	ops->set_cache_memory_policy = set_cache_memory_policy_cik;
+	ops->register_process = register_process_cik;
+	ops->initialize = initialize_cpsch_cik;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+	/* In 64-bit mode, we can only control the top 3 bits of the LDS,
+	 * scratch and GPUVM apertures.
+	 * The hardware fills in the remaining 59 bits according to the
+	 * following pattern:
+	 * LDS:		X0000000'00000000 - X0000001'00000000 (4GB)
+	 * Scratch:	X0000001'00000000 - X0000002'00000000 (4GB)
+	 * GPUVM:	Y0010000'00000000 - Y0020000'00000000 (1TB)
+	 *
+	 * (where X/Y is the configurable nybble with the low-bit 0)
+	 *
+	 * LDS and scratch will have the same top nybble programmed in the
+	 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+	 * GPUVM can have a different top nybble programmed in the
+	 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
+	 * We don't bother to support different top nybbles
+	 * for LDS/Scratch and GPUVM.
+	 */
+
+	BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+		top_address_nybble == 0);
+
+	return PRIVATE_BASE(top_address_nybble << 12) |
+			SHARED_BASE(top_address_nybble << 12);
+}
+
+static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size)
+{
+	uint32_t default_mtype;
+	uint32_t ape1_mtype;
+
+	default_mtype = (default_policy == cache_policy_coherent) ?
+			MTYPE_NONCACHED :
+			MTYPE_CACHED;
+
+	ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+			MTYPE_NONCACHED :
+			MTYPE_CACHED;
+
+	qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
+			| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
+			| DEFAULT_MTYPE(default_mtype)
+			| APE1_MTYPE(ape1_mtype);
+
+	return true;
+}
+
+static int register_process_cik(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+	unsigned int temp;
+
+	BUG_ON(!dqm || !qpd);
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config =
+			ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
+			DEFAULT_MTYPE(MTYPE_NONCACHED) |
+			APE1_MTYPE(MTYPE_NONCACHED);
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	if (qpd->pqm->process->is_32bit_user_mode) {
+		temp = get_sh_mem_bases_32(pdd);
+		qpd->sh_mem_bases = SHARED_BASE(temp);
+		qpd->sh_mem_config |= PTR32;
+	} else {
+		temp = get_sh_mem_bases_nybble_64(pdd);
+		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+	}
+
+	pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+		qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+
+	return 0;
+}
+
+static int initialize_cpsch_cik(struct device_queue_manager *dqm)
+{
+	return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
new file mode 100644
index 000000000..20553dcd2
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+
+static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size);
+static int register_process_vi(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+static int initialize_cpsch_vi(struct device_queue_manager *dqm);
+
+void device_queue_manager_init_vi(struct device_queue_manager_ops *ops)
+{
+	pr_warn("amdkfd: VI DQM is not currently supported\n");
+
+	ops->set_cache_memory_policy = set_cache_memory_policy_vi;
+	ops->register_process = register_process_vi;
+	ops->initialize = initialize_cpsch_vi;
+}
+
+static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size)
+{
+	return false;
+}
+
+static int register_process_vi(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	return -1;
+}
+
+static int initialize_cpsch_vi(struct device_queue_manager *dqm)
+{
+	return 0;
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
new file mode 100644
index 000000000..17e56dcc8
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "kfd_priv.h"
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+/*
+ * This extension supports a kernel level doorbells management for
+ * the kernel queues.
+ * Basically the last doorbells page is devoted to kernel queues
+ * and that's assures that any user process won't get access to the
+ * kernel doorbells page
+ */
+
+#define KERNEL_DOORBELL_PASID 1
+#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
+
+/*
+ * Each device exposes a doorbell aperture, a PCI MMIO aperture that
+ * receives 32-bit writes that are passed to queues as wptr values.
+ * The doorbells are intended to be written by applications as part
+ * of queueing work on user-mode queues.
+ * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
+ * We map the doorbell address space into user-mode when a process creates
+ * its first queue on each device.
+ * Although the mapping is done by KFD, it is equivalent to an mmap of
+ * the /dev/kfd with the particular device encoded in the mmap offset.
+ * There will be other uses for mmap of /dev/kfd, so only a range of
+ * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
+ */
+
+/* # of doorbell bytes allocated for each process. */
+static inline size_t doorbell_process_allocation(void)
+{
+	return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+			PAGE_SIZE);
+}
+
+/* Doorbell calculations for device init. */
+void kfd_doorbell_init(struct kfd_dev *kfd)
+{
+	size_t doorbell_start_offset;
+	size_t doorbell_aperture_size;
+	size_t doorbell_process_limit;
+
+	/*
+	 * We start with calculations in bytes because the input data might
+	 * only be byte-aligned.
+	 * Only after we have done the rounding can we assume any alignment.
+	 */
+
+	doorbell_start_offset =
+			roundup(kfd->shared_resources.doorbell_start_offset,
+					doorbell_process_allocation());
+
+	doorbell_aperture_size =
+			rounddown(kfd->shared_resources.doorbell_aperture_size,
+					doorbell_process_allocation());
+
+	if (doorbell_aperture_size > doorbell_start_offset)
+		doorbell_process_limit =
+			(doorbell_aperture_size - doorbell_start_offset) /
+						doorbell_process_allocation();
+	else
+		doorbell_process_limit = 0;
+
+	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
+				doorbell_start_offset;
+
+	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
+	kfd->doorbell_process_limit = doorbell_process_limit - 1;
+
+	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
+						doorbell_process_allocation());
+
+	BUG_ON(!kfd->doorbell_kernel_ptr);
+
+	pr_debug("kfd: doorbell initialization:\n");
+	pr_debug("kfd: doorbell base           == 0x%08lX\n",
+			(uintptr_t)kfd->doorbell_base);
+
+	pr_debug("kfd: doorbell_id_offset      == 0x%08lX\n",
+			kfd->doorbell_id_offset);
+
+	pr_debug("kfd: doorbell_process_limit  == 0x%08lX\n",
+			doorbell_process_limit);
+
+	pr_debug("kfd: doorbell_kernel_offset  == 0x%08lX\n",
+			(uintptr_t)kfd->doorbell_base);
+
+	pr_debug("kfd: doorbell aperture size  == 0x%08lX\n",
+			kfd->shared_resources.doorbell_aperture_size);
+
+	pr_debug("kfd: doorbell kernel address == 0x%08lX\n",
+			(uintptr_t)kfd->doorbell_kernel_ptr);
+}
+
+int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
+{
+	phys_addr_t address;
+	struct kfd_dev *dev;
+
+	/*
+	 * For simplicitly we only allow mapping of the entire doorbell
+	 * allocation of a single device & process.
+	 */
+	if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
+		return -EINVAL;
+
+	/* Find kfd device according to gpu id */
+	dev = kfd_device_by_id(vma->vm_pgoff);
+	if (dev == NULL)
+		return -EINVAL;
+
+	/* Calculate physical address of doorbell */
+	address = kfd_get_process_doorbells(dev, process);
+
+	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+				VM_DONTDUMP | VM_PFNMAP;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n"
+		 "     target user address == 0x%08llX\n"
+		 "     physical address    == 0x%08llX\n"
+		 "     vm_flags            == 0x%04lX\n"
+		 "     size                == 0x%04lX\n",
+		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
+		 doorbell_process_allocation());
+
+
+	return io_remap_pfn_range(vma,
+				vma->vm_start,
+				address >> PAGE_SHIFT,
+				doorbell_process_allocation(),
+				vma->vm_page_prot);
+}
+
+
+/* get kernel iomem pointer for a doorbell */
+u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+					unsigned int *doorbell_off)
+{
+	u32 inx;
+
+	BUG_ON(!kfd || !doorbell_off);
+
+	mutex_lock(&kfd->doorbell_mutex);
+	inx = find_first_zero_bit(kfd->doorbell_available_index,
+					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+
+	__set_bit(inx, kfd->doorbell_available_index);
+	mutex_unlock(&kfd->doorbell_mutex);
+
+	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+		return NULL;
+
+	/*
+	 * Calculating the kernel doorbell offset using "faked" kernel
+	 * pasid that allocated for kernel queues only
+	 */
+	*doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() /
+							sizeof(u32)) + inx;
+
+	pr_debug("kfd: get kernel queue doorbell\n"
+			 "     doorbell offset   == 0x%08d\n"
+			 "     kernel address    == 0x%08lX\n",
+		*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
+
+	return kfd->doorbell_kernel_ptr + inx;
+}
+
+void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
+{
+	unsigned int inx;
+
+	BUG_ON(!kfd || !db_addr);
+
+	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+
+	mutex_lock(&kfd->doorbell_mutex);
+	__clear_bit(inx, kfd->doorbell_available_index);
+	mutex_unlock(&kfd->doorbell_mutex);
+}
+
+inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+{
+	if (db) {
+		writel(value, db);
+		pr_debug("writing %d to doorbell address 0x%p\n", value, db);
+	}
+}
+
+/*
+ * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
+ * to doorbells with the process's doorbell page
+ */
+unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+					struct kfd_process *process,
+					unsigned int queue_id)
+{
+	/*
+	 * doorbell_id_offset accounts for doorbells taken by KGD.
+	 * pasid * doorbell_process_allocation/sizeof(u32) adjusts
+	 * to the process's doorbells
+	 */
+	return kfd->doorbell_id_offset +
+		process->pasid * (doorbell_process_allocation()/sizeof(u32)) +
+		queue_id;
+}
+
+uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
+{
+	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
+				kfd->shared_resources.doorbell_start_offset) /
+					doorbell_process_allocation() + 1;
+
+	return num_of_elems;
+
+}
+
+phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
+					struct kfd_process *process)
+{
+	return dev->doorbell_base +
+		process->pasid * doorbell_process_allocation();
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
new file mode 100644
index 000000000..35b987574
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
+#include "kfd_priv.h"
+#include <linux/mm.h>
+#include <uapi/asm-generic/mman-common.h>
+#include <asm/processor.h>
+
+/*
+ * The primary memory I/O features being added for revisions of gfxip
+ * beyond 7.0 (Kaveri) are:
+ *
+ * Access to ATC/IOMMU mapped memory w/ associated extension of VA to 48b
+ *
+ * “Flat” shader memory access – These are new shader vector memory
+ * operations that do not reference a T#/V# so a “pointer” is what is
+ * sourced from the vector gprs for direct access to memory.
+ * This pointer space has the Shared(LDS) and Private(Scratch) memory
+ * mapped into this pointer space as apertures.
+ * The hardware then determines how to direct the memory request
+ * based on what apertures the request falls in.
+ *
+ * Unaligned support and alignment check
+ *
+ *
+ * System Unified Address - SUA
+ *
+ * The standard usage for GPU virtual addresses are that they are mapped by
+ * a set of page tables we call GPUVM and these page tables are managed by
+ * a combination of vidMM/driver software components.  The current virtual
+ * address (VA) range for GPUVM is 40b.
+ *
+ * As of gfxip7.1 and beyond we’re adding the ability for compute memory
+ * clients (CP/RLC, DMA, SHADER(ifetch, scalar, and vector ops)) to access
+ * the same page tables used by host x86 processors and that are managed by
+ * the operating system. This is via a technique and hardware called ATC/IOMMU.
+ * The GPU has the capability of accessing both the GPUVM and ATC address
+ * spaces for a given VMID (process) simultaneously and we call this feature
+ * system unified address (SUA).
+ *
+ * There are three fundamental address modes of operation for a given VMID
+ * (process) on the GPU:
+ *
+ *	HSA64 – 64b pointers and the default address space is ATC
+ *	HSA32 – 32b pointers and the default address space is ATC
+ *	GPUVM – 64b pointers and the default address space is GPUVM (driver
+ *		model mode)
+ *
+ *
+ * HSA64 - ATC/IOMMU 64b
+ *
+ * A 64b pointer in the AMD64/IA64 CPU architecture is not fully utilized
+ * by the CPU so an AMD CPU can only access the high area
+ * (VA[63:47] == 0x1FFFF) and low area (VA[63:47 == 0) of the address space
+ * so the actual VA carried to translation is 48b.  There is a “hole” in
+ * the middle of the 64b VA space.
+ *
+ * The GPU not only has access to all of the CPU accessible address space via
+ * ATC/IOMMU, but it also has access to the GPUVM address space.  The “system
+ * unified address” feature (SUA) is the mapping of GPUVM and ATC address
+ * spaces into a unified pointer space.  The method we take for 64b mode is
+ * to map the full 40b GPUVM address space into the hole of the 64b address
+ * space.
+
+ * The GPUVM_Base/GPUVM_Limit defines the aperture in the 64b space where we
+ * direct requests to be translated via GPUVM page tables instead of the
+ * IOMMU path.
+ *
+ *
+ * 64b to 49b Address conversion
+ *
+ * Note that there are still significant portions of unused regions (holes)
+ * in the 64b address space even for the GPU.  There are several places in
+ * the pipeline (sw and hw), we wish to compress the 64b virtual address
+ * to a 49b address.  This 49b address is constituted of an “ATC” bit
+ * plus a 48b virtual address.  This 49b address is what is passed to the
+ * translation hardware.  ATC==0 means the 48b address is a GPUVM address
+ * (max of 2^40 – 1) intended to be translated via GPUVM page tables.
+ * ATC==1 means the 48b address is intended to be translated via IOMMU
+ * page tables.
+ *
+ * A 64b pointer is compared to the apertures that are defined (Base/Limit), in
+ * this case the GPUVM aperture (red) is defined and if a pointer falls in this
+ * aperture, we subtract the GPUVM_Base address and set the ATC bit to zero
+ * as part of the 64b to 49b conversion.
+ *
+ * Where this 64b to 49b conversion is done is a function of the usage.
+ * Most GPU memory access is via memory objects where the driver builds
+ * a descriptor which consists of a base address and a memory access by
+ * the GPU usually consists of some kind of an offset or Cartesian coordinate
+ * that references this memory descriptor.  This is the case for shader
+ * instructions that reference the T# or V# constants, or for specified
+ * locations of assets (ex. the shader program location).  In these cases
+ * the driver is what handles the 64b to 49b conversion and the base
+ * address in the descriptor (ex. V# or T# or shader program location)
+ * is defined as a 48b address w/ an ATC bit.  For this usage a given
+ * memory object cannot straddle multiple apertures in the 64b address
+ * space. For example a shader program cannot jump in/out between ATC
+ * and GPUVM space.
+ *
+ * In some cases we wish to pass a 64b pointer to the GPU hardware and
+ * the GPU hw does the 64b to 49b conversion before passing memory
+ * requests to the cache/memory system.  This is the case for the
+ * S_LOAD and FLAT_* shader memory instructions where we have 64b pointers
+ * in scalar and vector GPRs respectively.
+ *
+ * In all cases (no matter where the 64b -> 49b conversion is done), the gfxip
+ * hardware sends a 48b address along w/ an ATC bit, to the memory controller
+ * on the memory request interfaces.
+ *
+ *	<client>_MC_rdreq_atc   // read request ATC bit
+ *
+ *		0 : <client>_MC_rdreq_addr is a GPUVM VA
+ *
+ *		1 : <client>_MC_rdreq_addr is a ATC VA
+ *
+ *
+ * “Spare” aperture (APE1)
+ *
+ * We use the GPUVM aperture to differentiate ATC vs. GPUVM, but we also use
+ * apertures to set the Mtype field for S_LOAD/FLAT_* ops which is input to the
+ * config tables for setting cache policies. The “spare” (APE1) aperture is
+ * motivated by getting a different Mtype from the default.
+ * The default aperture isn’t an actual base/limit aperture; it is just the
+ * address space that doesn’t hit any defined base/limit apertures.
+ * The following diagram is a complete picture of the gfxip7.x SUA apertures.
+ * The APE1 can be placed either below or above
+ * the hole (cannot be in the hole).
+ *
+ *
+ * General Aperture definitions and rules
+ *
+ * An aperture register definition consists of a Base, Limit, Mtype, and
+ * usually an ATC bit indicating which translation tables that aperture uses.
+ * In all cases (for SUA and DUA apertures discussed later), aperture base
+ * and limit definitions are 64KB aligned.
+ *
+ *	<ape>_Base[63:0] = { <ape>_Base_register[63:16], 0x0000 }
+ *
+ *	<ape>_Limit[63:0] = { <ape>_Limit_register[63:16], 0xFFFF }
+ *
+ * The base and limit are considered inclusive to an aperture so being
+ * inside an aperture means (address >= Base) AND (address <= Limit).
+ *
+ * In no case is a payload that straddles multiple apertures expected to work.
+ * For example a load_dword_x4 that starts in one aperture and ends in another,
+ * does not work.  For the vector FLAT_* ops we have detection capability in
+ * the shader for reporting a “memory violation” back to the
+ * SQ block for use in traps.
+ * A memory violation results when an op falls into the hole,
+ * or a payload straddles multiple apertures.  The S_LOAD instruction
+ * does not have this detection.
+ *
+ * Apertures cannot overlap.
+ *
+ *
+ *
+ * HSA32 - ATC/IOMMU 32b
+ *
+ * For HSA32 mode, the pointers are interpreted as 32 bits and use a single GPR
+ * instead of two for the S_LOAD and FLAT_* ops. The entire GPUVM space of 40b
+ * will not fit so there is only partial visibility to the GPUVM
+ * space (defined by the aperture) for S_LOAD and FLAT_* ops.
+ * There is no spare (APE1) aperture for HSA32 mode.
+ *
+ *
+ * GPUVM 64b mode (driver model)
+ *
+ * This mode is related to HSA64 in that the difference really is that
+ * the default aperture is GPUVM (ATC==0) and not ATC space.
+ * We have gfxip7.x hardware that has FLAT_* and S_LOAD support for
+ * SUA GPUVM mode, but does not support HSA32/HSA64.
+ *
+ *
+ * Device Unified Address - DUA
+ *
+ * Device unified address (DUA) is the name of the feature that maps the
+ * Shared(LDS) memory and Private(Scratch) memory into the overall address
+ * space for use by the new FLAT_* vector memory ops.  The Shared and
+ * Private memories are mapped as apertures into the address space,
+ * and the hardware detects when a FLAT_* memory request is to be redirected
+ * to the LDS or Scratch memory when it falls into one of these apertures.
+ * Like the SUA apertures, the Shared/Private apertures are 64KB aligned and
+ * the base/limit is “in” the aperture. For both HSA64 and GPUVM SUA modes,
+ * the Shared/Private apertures are always placed in a limited selection of
+ * options in the hole of the 64b address space. For HSA32 mode, the
+ * Shared/Private apertures can be placed anywhere in the 32b space
+ * except at 0.
+ *
+ *
+ * HSA64 Apertures for FLAT_* vector ops
+ *
+ * For HSA64 SUA mode, the Shared and Private apertures are always placed
+ * in the hole w/ a limited selection of possible locations. The requests
+ * that fall in the private aperture are expanded as a function of the
+ * work-item id (tid) and redirected to the location of the
+ * “hidden private memory”. The hidden private can be placed in either GPUVM
+ * or ATC space. The addresses that fall in the shared aperture are
+ * re-directed to the on-chip LDS memory hardware.
+ *
+ *
+ * HSA32 Apertures for FLAT_* vector ops
+ *
+ * In HSA32 mode, the Private and Shared apertures can be placed anywhere
+ * in the 32b space except at 0 (Private or Shared Base at zero disables
+ * the apertures). If the base address of the apertures are non-zero
+ * (ie apertures exists), the size is always 64KB.
+ *
+ *
+ * GPUVM Apertures for FLAT_* vector ops
+ *
+ * In GPUVM mode, the Shared/Private apertures are specified identically
+ * to HSA64 mode where they are always in the hole at a limited selection
+ * of locations.
+ *
+ *
+ * Aperture Definitions for SUA and DUA
+ *
+ * The interpretation of the aperture register definitions for a given
+ * VMID is a function of the “SUA Mode” which is one of HSA64, HSA32, or
+ * GPUVM64 discussed in previous sections. The mode is first decoded, and
+ * then the remaining register decode is a function of the mode.
+ *
+ *
+ * SUA Mode Decode
+ *
+ * For the S_LOAD and FLAT_* shader operations, the SUA mode is decoded from
+ * the COMPUTE_DISPATCH_INITIATOR:DATA_ATC bit and
+ * the SH_MEM_CONFIG:PTR32 bits.
+ *
+ * COMPUTE_DISPATCH_INITIATOR:DATA_ATC    SH_MEM_CONFIG:PTR32        Mode
+ *
+ * 1                                              0                  HSA64
+ *
+ * 1                                              1                  HSA32
+ *
+ * 0                                              X                 GPUVM64
+ *
+ * In general the hardware will ignore the PTR32 bit and treat
+ * as “0” whenever DATA_ATC = “0”, but sw should set PTR32=0
+ * when DATA_ATC=0.
+ *
+ * The DATA_ATC bit is only set for compute dispatches.
+ * All “Draw” dispatches are hardcoded to GPUVM64 mode
+ * for FLAT_* / S_LOAD operations.
+ */
+
+#define MAKE_GPUVM_APP_BASE(gpu_num) \
+	(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
+
+#define MAKE_GPUVM_APP_LIMIT(base) \
+	(((uint64_t)(base) & \
+		0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
+
+#define MAKE_SCRATCH_APP_BASE(gpu_num) \
+	(((uint64_t)(gpu_num) << 61) + 0x100000000L)
+
+#define MAKE_SCRATCH_APP_LIMIT(base) \
+	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+
+#define MAKE_LDS_APP_BASE(gpu_num) \
+	(((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_LIMIT(base) \
+	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+
+int kfd_init_apertures(struct kfd_process *process)
+{
+	uint8_t id  = 0;
+	struct kfd_dev *dev;
+	struct kfd_process_device *pdd;
+
+	/*Iterating over all devices*/
+	while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
+		id < NUM_OF_SUPPORTED_GPUS) {
+
+		pdd = kfd_create_process_device_data(dev, process);
+		if (pdd == NULL) {
+			pr_err("Failed to create process device data\n");
+			return -1;
+		}
+		/*
+		 * For 64 bit process aperture will be statically reserved in
+		 * the x86_64 non canonical process address space
+		 * amdkfd doesn't currently support apertures for 32 bit process
+		 */
+		if (process->is_32bit_user_mode) {
+			pdd->lds_base = pdd->lds_limit = 0;
+			pdd->gpuvm_base = pdd->gpuvm_limit = 0;
+			pdd->scratch_base = pdd->scratch_limit = 0;
+		} else {
+			/*
+			 * node id couldn't be 0 - the three MSB bits of
+			 * aperture shoudn't be 0
+			 */
+			pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
+
+			pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+			pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
+
+			pdd->gpuvm_limit =
+					MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
+
+			pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
+
+			pdd->scratch_limit =
+				MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+		}
+
+		dev_dbg(kfd_device, "node id %u\n", id);
+		dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device, "lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device, "lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device, "gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device, "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device, "scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit);
+
+		id++;
+	}
+
+	return 0;
+}
+
+
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
new file mode 100644
index 000000000..c7d298e62
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_opcodes.h"
+
+#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
+
+static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+		enum kfd_queue_type type, unsigned int queue_size)
+{
+	struct queue_properties prop;
+	int retval;
+	union PM4_MES_TYPE_3_HEADER nop;
+
+	BUG_ON(!kq || !dev);
+	BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ);
+
+	pr_debug("amdkfd: In func %s initializing queue type %d size %d\n",
+			__func__, KFD_QUEUE_TYPE_HIQ, queue_size);
+
+	nop.opcode = IT_NOP;
+	nop.type = PM4_TYPE_3;
+	nop.u32all |= PM4_COUNT_ZERO;
+
+	kq->dev = dev;
+	kq->nop_packet = nop.u32all;
+	switch (type) {
+	case KFD_QUEUE_TYPE_DIQ:
+	case KFD_QUEUE_TYPE_HIQ:
+		kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm,
+						KFD_MQD_TYPE_HIQ);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	if (kq->mqd == NULL)
+		return false;
+
+	prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
+
+	if (prop.doorbell_ptr == NULL) {
+		pr_err("amdkfd: error init doorbell");
+		goto err_get_kernel_doorbell;
+	}
+
+	retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
+	if (retval != 0) {
+		pr_err("amdkfd: error init pq queues size (%d)\n", queue_size);
+		goto err_pq_allocate_vidmem;
+	}
+
+	kq->pq_kernel_addr = kq->pq->cpu_ptr;
+	kq->pq_gpu_addr = kq->pq->gpu_addr;
+
+	retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
+	if (retval == false)
+		goto err_eop_allocate_vidmem;
+
+	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
+					&kq->rptr_mem);
+
+	if (retval != 0)
+		goto err_rptr_allocate_vidmem;
+
+	kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
+	kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
+
+	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+					&kq->wptr_mem);
+
+	if (retval != 0)
+		goto err_wptr_allocate_vidmem;
+
+	kq->wptr_kernel = kq->wptr_mem->cpu_ptr;
+	kq->wptr_gpu_addr = kq->wptr_mem->gpu_addr;
+
+	memset(kq->pq_kernel_addr, 0, queue_size);
+	memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
+	memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel));
+
+	prop.queue_size = queue_size;
+	prop.is_interop = false;
+	prop.priority = 1;
+	prop.queue_percent = 100;
+	prop.type = type;
+	prop.vmid = 0;
+	prop.queue_address = kq->pq_gpu_addr;
+	prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
+	prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
+	prop.eop_ring_buffer_address = kq->eop_gpu_addr;
+	prop.eop_ring_buffer_size = PAGE_SIZE;
+
+	if (init_queue(&kq->queue, prop) != 0)
+		goto err_init_queue;
+
+	kq->queue->device = dev;
+	kq->queue->process = kfd_get_process(current);
+
+	retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd,
+					&kq->queue->mqd_mem_obj,
+					&kq->queue->gart_mqd_addr,
+					&kq->queue->properties);
+	if (retval != 0)
+		goto err_init_mqd;
+
+	/* assign HIQ to HQD */
+	if (type == KFD_QUEUE_TYPE_HIQ) {
+		pr_debug("assigning hiq to hqd\n");
+		kq->queue->pipe = KFD_CIK_HIQ_PIPE;
+		kq->queue->queue = KFD_CIK_HIQ_QUEUE;
+		kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe,
+					kq->queue->queue, NULL);
+	} else {
+		/* allocate fence for DIQ */
+
+		retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
+						&kq->fence_mem_obj);
+
+		if (retval != 0)
+			goto err_alloc_fence;
+
+		kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr;
+		kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr;
+	}
+
+	print_queue(kq->queue);
+
+	return true;
+err_alloc_fence:
+err_init_mqd:
+	uninit_queue(kq->queue);
+err_init_queue:
+	kfd_gtt_sa_free(dev, kq->wptr_mem);
+err_wptr_allocate_vidmem:
+	kfd_gtt_sa_free(dev, kq->rptr_mem);
+err_rptr_allocate_vidmem:
+	kfd_gtt_sa_free(dev, kq->eop_mem);
+err_eop_allocate_vidmem:
+	kfd_gtt_sa_free(dev, kq->pq);
+err_pq_allocate_vidmem:
+	kfd_release_kernel_doorbell(dev, prop.doorbell_ptr);
+err_get_kernel_doorbell:
+	return false;
+
+}
+
+static void uninitialize(struct kernel_queue *kq)
+{
+	BUG_ON(!kq);
+
+	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
+		kq->mqd->destroy_mqd(kq->mqd,
+					NULL,
+					false,
+					QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
+					kq->queue->pipe,
+					kq->queue->queue);
+	else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
+		kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
+
+	kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj);
+
+	kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
+	kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
+	kq->ops_asic_specific.uninitialize(kq);
+	kfd_gtt_sa_free(kq->dev, kq->pq);
+	kfd_release_kernel_doorbell(kq->dev,
+					kq->queue->properties.doorbell_ptr);
+	uninit_queue(kq->queue);
+}
+
+static int acquire_packet_buffer(struct kernel_queue *kq,
+		size_t packet_size_in_dwords, unsigned int **buffer_ptr)
+{
+	size_t available_size;
+	size_t queue_size_dwords;
+	uint32_t wptr, rptr;
+	unsigned int *queue_address;
+
+	BUG_ON(!kq || !buffer_ptr);
+
+	rptr = *kq->rptr_kernel;
+	wptr = *kq->wptr_kernel;
+	queue_address = (unsigned int *)kq->pq_kernel_addr;
+	queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
+
+	pr_debug("amdkfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n",
+			__func__, rptr, wptr, queue_address);
+
+	available_size = (rptr - 1 - wptr + queue_size_dwords) %
+							queue_size_dwords;
+
+	if (packet_size_in_dwords >= queue_size_dwords ||
+			packet_size_in_dwords >= available_size) {
+		/*
+		 * make sure calling functions know
+		 * acquire_packet_buffer() failed
+		 */
+		*buffer_ptr = NULL;
+		return -ENOMEM;
+	}
+
+	if (wptr + packet_size_in_dwords >= queue_size_dwords) {
+		while (wptr > 0) {
+			queue_address[wptr] = kq->nop_packet;
+			wptr = (wptr + 1) % queue_size_dwords;
+		}
+	}
+
+	*buffer_ptr = &queue_address[wptr];
+	kq->pending_wptr = wptr + packet_size_in_dwords;
+
+	return 0;
+}
+
+static void submit_packet(struct kernel_queue *kq)
+{
+#ifdef DEBUG
+	int i;
+#endif
+
+	BUG_ON(!kq);
+
+#ifdef DEBUG
+	for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) {
+		pr_debug("0x%2X ", kq->pq_kernel_addr[i]);
+		if (i % 15 == 0)
+			pr_debug("\n");
+	}
+	pr_debug("\n");
+#endif
+
+	*kq->wptr_kernel = kq->pending_wptr;
+	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+				kq->pending_wptr);
+}
+
+static void rollback_packet(struct kernel_queue *kq)
+{
+	BUG_ON(!kq);
+	kq->pending_wptr = *kq->queue->properties.write_ptr;
+}
+
+struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+					enum kfd_queue_type type)
+{
+	struct kernel_queue *kq;
+
+	BUG_ON(!dev);
+
+	kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL);
+	if (!kq)
+		return NULL;
+
+	kq->ops.initialize = initialize;
+	kq->ops.uninitialize = uninitialize;
+	kq->ops.acquire_packet_buffer = acquire_packet_buffer;
+	kq->ops.submit_packet = submit_packet;
+	kq->ops.rollback_packet = rollback_packet;
+
+	switch (dev->device_info->asic_family) {
+	case CHIP_CARRIZO:
+		kernel_queue_init_vi(&kq->ops_asic_specific);
+		break;
+
+	case CHIP_KAVERI:
+		kernel_queue_init_cik(&kq->ops_asic_specific);
+		break;
+	}
+
+	if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
+		pr_err("amdkfd: failed to init kernel queue\n");
+		kfree(kq);
+		return NULL;
+	}
+	return kq;
+}
+
+void kernel_queue_uninit(struct kernel_queue *kq)
+{
+	BUG_ON(!kq);
+
+	kq->ops.uninitialize(kq);
+	kfree(kq);
+}
+
+static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
+{
+	struct kernel_queue *kq;
+	uint32_t *buffer, i;
+	int retval;
+
+	BUG_ON(!dev);
+
+	pr_err("amdkfd: starting kernel queue test\n");
+
+	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
+	BUG_ON(!kq);
+
+	retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
+	BUG_ON(retval != 0);
+	for (i = 0; i < 5; i++)
+		buffer[i] = kq->nop_packet;
+	kq->ops.submit_packet(kq);
+
+	pr_err("amdkfd: ending kernel queue test\n");
+}
+
+
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
new file mode 100644
index 000000000..594053136
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_KERNEL_QUEUE_H_
+#define KFD_KERNEL_QUEUE_H_
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include "kfd_priv.h"
+
+/**
+ * struct kernel_queue_ops
+ *
+ * @initialize: Initialize a kernel queue, including allocations of GART memory
+ * needed for the queue.
+ *
+ * @uninitialize: Uninitialize a kernel queue and free all its memory usages.
+ *
+ * @acquire_packet_buffer: Returns a pointer to the location in the kernel
+ * queue ring buffer where the calling function can write its packet. It is
+ * Guaranteed that there is enough space for that packet. It also updates the
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
+ * @submit_packet: Update the write pointer and doorbell of a kernel queue.
+ *
+ * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
+ * queue are equal, which means the CP has read all the submitted packets.
+ *
+ * @rollback_packet: This routine is called if we failed to build an acquired
+ * packet for some reason. It just overwrites the pending wptr with the current
+ * one
+ *
+ */
+struct kernel_queue_ops {
+	bool	(*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size);
+	void	(*uninitialize)(struct kernel_queue *kq);
+	int	(*acquire_packet_buffer)(struct kernel_queue *kq,
+					size_t packet_size_in_dwords,
+					unsigned int **buffer_ptr);
+
+	void	(*submit_packet)(struct kernel_queue *kq);
+	void	(*rollback_packet)(struct kernel_queue *kq);
+};
+
+struct kernel_queue {
+	struct kernel_queue_ops ops;
+	struct kernel_queue_ops ops_asic_specific;
+
+	/* data */
+	struct kfd_dev		*dev;
+	struct mqd_manager	*mqd;
+	struct queue		*queue;
+	uint32_t		pending_wptr;
+	unsigned int		nop_packet;
+
+	struct kfd_mem_obj	*rptr_mem;
+	uint32_t		*rptr_kernel;
+	uint64_t		rptr_gpu_addr;
+	struct kfd_mem_obj	*wptr_mem;
+	uint32_t		*wptr_kernel;
+	uint64_t		wptr_gpu_addr;
+	struct kfd_mem_obj	*pq;
+	uint64_t		pq_gpu_addr;
+	uint32_t		*pq_kernel_addr;
+	struct kfd_mem_obj	*eop_mem;
+	uint64_t		eop_gpu_addr;
+	uint32_t		*eop_kernel_addr;
+
+	struct kfd_mem_obj	*fence_mem_obj;
+	uint64_t		fence_gpu_addr;
+	void			*fence_kernel_address;
+
+	struct list_head	list;
+};
+
+void kernel_queue_init_cik(struct kernel_queue_ops *ops);
+void kernel_queue_init_vi(struct kernel_queue_ops *ops);
+
+#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
new file mode 100644
index 000000000..a90eb440b
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+
+static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_cik(struct kernel_queue *kq);
+
+void kernel_queue_init_cik(struct kernel_queue_ops *ops)
+{
+	ops->initialize = initialize_cik;
+	ops->uninitialize = uninitialize_cik;
+}
+
+static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size)
+{
+	return true;
+}
+
+static void uninitialize_cik(struct kernel_queue *kq)
+{
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
new file mode 100644
index 000000000..f1d48281e
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+
+static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_vi(struct kernel_queue *kq);
+
+void kernel_queue_init_vi(struct kernel_queue_ops *ops)
+{
+	ops->initialize = initialize_vi;
+	ops->uninitialize = uninitialize_vi;
+}
+
+static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size)
+{
+	int retval;
+
+	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+	if (retval != 0)
+		return false;
+
+	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+
+	return true;
+}
+
+static void uninitialize_vi(struct kernel_queue *kq)
+{
+	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_module.c
new file mode 100644
index 000000000..4e0a68f13
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include "kfd_priv.h"
+
+#define KFD_DRIVER_AUTHOR	"AMD Inc. and others"
+
+#define KFD_DRIVER_DESC		"Standalone HSA driver for AMD's GPUs"
+#define KFD_DRIVER_DATE		"20150122"
+#define KFD_DRIVER_MAJOR	0
+#define KFD_DRIVER_MINOR	7
+#define KFD_DRIVER_PATCHLEVEL	1
+
+static const struct kgd2kfd_calls kgd2kfd = {
+	.exit		= kgd2kfd_exit,
+	.probe		= kgd2kfd_probe,
+	.device_init	= kgd2kfd_device_init,
+	.device_exit	= kgd2kfd_device_exit,
+	.interrupt	= kgd2kfd_interrupt,
+	.suspend	= kgd2kfd_suspend,
+	.resume		= kgd2kfd_resume,
+};
+
+int sched_policy = KFD_SCHED_POLICY_HWS;
+module_param(sched_policy, int, 0444);
+MODULE_PARM_DESC(sched_policy,
+	"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
+
+bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
+{
+	/*
+	 * Only one interface version is supported,
+	 * no kfd/kgd version skew allowed.
+	 */
+	if (interface_version != KFD_INTERFACE_VERSION)
+		return false;
+
+	*g2f = &kgd2kfd;
+
+	return true;
+}
+EXPORT_SYMBOL(kgd2kfd_init);
+
+void kgd2kfd_exit(void)
+{
+}
+
+static int __init kfd_module_init(void)
+{
+	int err;
+
+	/* Verify module parameters */
+	if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
+		(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
+		pr_err("kfd: sched_policy has invalid value\n");
+		return -1;
+	}
+
+	/* Verify module parameters */
+	if ((max_num_of_queues_per_device < 1) ||
+		(max_num_of_queues_per_device >
+			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
+		pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
+		return -1;
+	}
+
+	err = kfd_pasid_init();
+	if (err < 0)
+		goto err_pasid;
+
+	err = kfd_chardev_init();
+	if (err < 0)
+		goto err_ioctl;
+
+	err = kfd_topology_init();
+	if (err < 0)
+		goto err_topology;
+
+	kfd_process_create_wq();
+
+	dev_info(kfd_device, "Initialized module\n");
+
+	return 0;
+
+err_topology:
+	kfd_chardev_exit();
+err_ioctl:
+	kfd_pasid_exit();
+err_pasid:
+	return err;
+}
+
+static void __exit kfd_module_exit(void)
+{
+	kfd_process_destroy_wq();
+	kfd_topology_shutdown();
+	kfd_chardev_exit();
+	kfd_pasid_exit();
+	dev_info(kfd_device, "Removed module\n");
+}
+
+module_init(kfd_module_init);
+module_exit(kfd_module_exit);
+
+MODULE_AUTHOR(KFD_DRIVER_AUTHOR);
+MODULE_DESCRIPTION(KFD_DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
+MODULE_VERSION(__stringify(KFD_DRIVER_MAJOR) "."
+	       __stringify(KFD_DRIVER_MINOR) "."
+	       __stringify(KFD_DRIVER_PATCHLEVEL));
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
new file mode 100644
index 000000000..b1ef1368c
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_priv.h"
+
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
+					struct kfd_dev *dev)
+{
+	switch (dev->device_info->asic_family) {
+	case CHIP_KAVERI:
+		return mqd_manager_init_cik(type, dev);
+	case CHIP_CARRIZO:
+		return mqd_manager_init_vi(type, dev);
+	}
+
+	return NULL;
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
new file mode 100644
index 000000000..213a71e0b
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_MQD_MANAGER_H_
+#define KFD_MQD_MANAGER_H_
+
+#include "kfd_priv.h"
+
+/**
+ * struct mqd_manager
+ *
+ * @init_mqd: Allocates the mqd buffer on local gpu memory and initialize it.
+ *
+ * @load_mqd: Loads the mqd to a concrete hqd slot. Used only for no cp
+ * scheduling mode.
+ *
+ * @update_mqd: Handles a update call for the MQD
+ *
+ * @destroy_mqd: Destroys the HQD slot and by that preempt the relevant queue.
+ * Used only for no cp scheduling.
+ *
+ * @uninit_mqd: Releases the mqd buffer from local gpu memory.
+ *
+ * @is_occupied: Checks if the relevant HQD slot is occupied.
+ *
+ * @mqd_mutex: Mqd manager mutex.
+ *
+ * @dev: The kfd device structure coupled with this module.
+ *
+ * MQD stands for Memory Queue Descriptor which represents the current queue
+ * state in the memory and initiate the HQD (Hardware Queue Descriptor) state.
+ * This structure is actually a base class for the different types of MQDs
+ * structures for the variant ASICs that should be supported in the future.
+ * This base class is also contains all the MQD specific operations.
+ * Another important thing to mention is that each queue has a MQD that keeps
+ * his state (or context) after each preemption or reassignment.
+ * Basically there are a instances of the mqd manager class per MQD type per
+ * ASIC. Currently the kfd driver supports only Kaveri so there are instances
+ * per KFD_MQD_TYPE for each device.
+ *
+ */
+
+struct mqd_manager {
+	int	(*init_mqd)(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q);
+
+	int	(*load_mqd)(struct mqd_manager *mm, void *mqd,
+				uint32_t pipe_id, uint32_t queue_id,
+				uint32_t __user *wptr);
+
+	int	(*update_mqd)(struct mqd_manager *mm, void *mqd,
+				struct queue_properties *q);
+
+	int	(*destroy_mqd)(struct mqd_manager *mm, void *mqd,
+				enum kfd_preempt_type type,
+				unsigned int timeout, uint32_t pipe_id,
+				uint32_t queue_id);
+
+	void	(*uninit_mqd)(struct mqd_manager *mm, void *mqd,
+				struct kfd_mem_obj *mqd_mem_obj);
+
+	bool	(*is_occupied)(struct mqd_manager *mm, void *mqd,
+				uint64_t queue_address,	uint32_t pipe_id,
+				uint32_t queue_id);
+
+	struct mutex	mqd_mutex;
+	struct kfd_dev	*dev;
+};
+
+#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
new file mode 100644
index 000000000..434979428
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "cik_regs.h"
+#include "cik_structs.h"
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+	return (struct cik_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	uint64_t addr;
+	struct cik_mqd *m;
+	int retval;
+
+	BUG_ON(!mm || !q || !mqd);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
+					mqd_mem_obj);
+
+	if (retval != 0)
+		return -ENOMEM;
+
+	m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
+	addr = (*mqd_mem_obj)->gpu_addr;
+
+	memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+	/*
+	 * Make sure to use the last queue state saved on mqd when the cp
+	 * reassigns the queue, so when queue is switched on/off (e.g over
+	 * subscription or quantum timeout) the context will be consistent
+	 */
+	m->cp_hqd_persistent_state =
+				DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ;
+
+	m->cp_mqd_control             = MQD_CONTROL_PRIV_STATE_EN;
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
+	/* Although WinKFD writes this, I suspect it should not be necessary */
+	m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
+
+	m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
+				QUANTUM_DURATION(10);
+
+	/*
+	 * Pipe Priority
+	 * Identifies the pipe relative priority when this queue is connected
+	 * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
+	 * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
+	 * 0 = CS_LOW (typically below GFX)
+	 * 1 = CS_MEDIUM (typically between HP3D and GFX
+	 * 2 = CS_HIGH (typically above HP3D)
+	 */
+	m->cp_hqd_pipe_priority = 1;
+	m->cp_hqd_queue_priority = 15;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL)
+		m->cp_hqd_iq_rptr = AQL_ENABLE;
+
+	*mqd = m;
+	if (gart_addr != NULL)
+		*gart_addr = addr;
+	retval = mm->update_mqd(mm, m, q);
+
+	return retval;
+}
+
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	int retval;
+	struct cik_sdma_rlc_registers *m;
+
+	BUG_ON(!mm || !mqd || !mqd_mem_obj);
+
+	retval = kfd_gtt_sa_allocate(mm->dev,
+					sizeof(struct cik_sdma_rlc_registers),
+					mqd_mem_obj);
+
+	if (retval != 0)
+		return -ENOMEM;
+
+	m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
+
+	memset(m, 0, sizeof(struct cik_sdma_rlc_registers));
+
+	*mqd = m;
+	if (gart_addr != NULL)
+		*gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+	retval = mm->update_mqd(mm, m, q);
+
+	return retval;
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+			struct kfd_mem_obj *mqd_mem_obj)
+{
+	BUG_ON(!mm || !mqd);
+	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+				struct kfd_mem_obj *mqd_mem_obj)
+{
+	BUG_ON(!mm || !mqd);
+	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr)
+{
+	return mm->dev->kfd2kgd->hqd_load
+		(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q)
+{
+	struct cik_mqd *m;
+
+	BUG_ON(!mm || !q || !mqd);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	m = get_mqd(mqd);
+	m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+				DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
+
+	/*
+	 * Calculating queue size which is log base 2 of actual queue size -1
+	 * dwords and another -1 for ffs
+	 */
+	m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
+								- 1 - 1;
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
+					DOORBELL_OFFSET(q->doorbell_off);
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
+	}
+
+	m->cp_hqd_active = 0;
+	q->is_active = false;
+	if (q->queue_size > 0 &&
+			q->queue_address != 0 &&
+			q->queue_percent > 0) {
+		m->cp_hqd_active = 1;
+		q->is_active = true;
+	}
+
+	return 0;
+}
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+				struct queue_properties *q)
+{
+	struct cik_sdma_rlc_registers *m;
+
+	BUG_ON(!mm || !mqd || !q);
+
+	m = get_sdma_mqd(mqd);
+	m->sdma_rlc_rb_cntl =
+		SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) |
+		SDMA_RB_VMID(q->vmid) |
+		SDMA_RPTR_WRITEBACK_ENABLE |
+		SDMA_RPTR_WRITEBACK_TIMER(6);
+
+	m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE;
+	m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+
+	q->is_active = false;
+	if (q->queue_size > 0 &&
+			q->queue_address != 0 &&
+			q->queue_percent > 0) {
+		m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE;
+		q->is_active = true;
+	}
+
+	return 0;
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+			enum kfd_preempt_type type,
+			unsigned int timeout, uint32_t pipe_id,
+			uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
+					pipe_id, queue_id);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+				enum kfd_preempt_type type,
+				unsigned int timeout, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+			uint64_t queue_address,	uint32_t pipe_id,
+			uint32_t queue_id)
+{
+
+	return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address,
+					pipe_id, queue_id);
+
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+			uint64_t queue_address,	uint32_t pipe_id,
+			uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+/*
+ * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation.
+ * The HIQ queue in Kaveri is using the same MQD structure as all the user mode
+ * queues but with different initial values.
+ */
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	uint64_t addr;
+	struct cik_mqd *m;
+	int retval;
+
+	BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
+					mqd_mem_obj);
+
+	if (retval != 0)
+		return -ENOMEM;
+
+	m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
+	addr = (*mqd_mem_obj)->gpu_addr;
+
+	memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+	m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE |
+					PRELOAD_REQ;
+	m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
+				QUANTUM_DURATION(10);
+
+	m->cp_mqd_control             = MQD_CONTROL_PRIV_STATE_EN;
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+
+	/*
+	 * Pipe Priority
+	 * Identifies the pipe relative priority when this queue is connected
+	 * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
+	 * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
+	 * 0 = CS_LOW (typically below GFX)
+	 * 1 = CS_MEDIUM (typically between HP3D and GFX
+	 * 2 = CS_HIGH (typically above HP3D)
+	 */
+	m->cp_hqd_pipe_priority = 1;
+	m->cp_hqd_queue_priority = 15;
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	retval = mm->update_mqd(mm, m, q);
+
+	return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+				struct queue_properties *q)
+{
+	struct cik_mqd *m;
+
+	BUG_ON(!mm || !q || !mqd);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	m = get_mqd(mqd);
+	m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+				DEFAULT_MIN_AVAIL_SIZE |
+				PRIV_STATE |
+				KMD_QUEUE;
+
+	/*
+	 * Calculating queue size which is log base 2 of actual queue
+	 * size -1 dwords
+	 */
+	m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
+								- 1 - 1;
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
+					DOORBELL_OFFSET(q->doorbell_off);
+
+	m->cp_hqd_vmid = q->vmid;
+
+	m->cp_hqd_active = 0;
+	q->is_active = false;
+	if (q->queue_size > 0 &&
+			q->queue_address != 0 &&
+			q->queue_percent > 0) {
+		m->cp_hqd_active = 1;
+		q->is_active = true;
+	}
+
+	return 0;
+}
+
+struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+	struct cik_sdma_rlc_registers *m;
+
+	BUG_ON(!mqd);
+
+	m = (struct cik_sdma_rlc_registers *)mqd;
+
+	return m;
+}
+
+struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
+		struct kfd_dev *dev)
+{
+	struct mqd_manager *mqd;
+
+	BUG_ON(!dev);
+	BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+	case KFD_MQD_TYPE_COMPUTE:
+		mqd->init_mqd = init_mqd;
+		mqd->uninit_mqd = uninit_mqd;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->uninit_mqd = uninit_mqd;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->uninit_mqd = uninit_mqd_sdma;
+		mqd->load_mqd = load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = destroy_mqd_sdma;
+		mqd->is_occupied = is_occupied_sdma;
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
+
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
new file mode 100644
index 000000000..b3a7e3ba1
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+
+struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
+					struct kfd_dev *dev)
+{
+	pr_warn("amdkfd: VI MQD is not currently supported\n");
+	return NULL;
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
new file mode 100644
index 000000000..e2533d875
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include "kfd_device_queue_manager.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_opcodes.h"
+
+static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
+				unsigned int buffer_size_bytes)
+{
+	unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
+
+	BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes);
+	*wptr = temp;
+}
+
+static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+	union PM4_MES_TYPE_3_HEADER header;
+
+	header.u32all = 0;
+	header.opcode = opcode;
+	header.count = packet_size/sizeof(uint32_t) - 2;
+	header.type = PM4_TYPE_3;
+
+	return header.u32all;
+}
+
+static void pm_calc_rlib_size(struct packet_manager *pm,
+				unsigned int *rlib_size,
+				bool *over_subscription)
+{
+	unsigned int process_count, queue_count;
+
+	BUG_ON(!pm || !rlib_size || !over_subscription);
+
+	process_count = pm->dqm->processes_count;
+	queue_count = pm->dqm->queue_count;
+
+	/* check if there is over subscription*/
+	*over_subscription = false;
+	if ((process_count > 1) ||
+		queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
+		*over_subscription = true;
+		pr_debug("kfd: over subscribed runlist\n");
+	}
+
+	/* calculate run list ib allocation size */
+	*rlib_size = process_count * sizeof(struct pm4_map_process) +
+		     queue_count * sizeof(struct pm4_map_queues);
+
+	/*
+	 * Increase the allocation size in case we need a chained run list
+	 * when over subscription
+	 */
+	if (*over_subscription)
+		*rlib_size += sizeof(struct pm4_runlist);
+
+	pr_debug("kfd: runlist ib size %d\n", *rlib_size);
+}
+
+static int pm_allocate_runlist_ib(struct packet_manager *pm,
+				unsigned int **rl_buffer,
+				uint64_t *rl_gpu_buffer,
+				unsigned int *rl_buffer_size,
+				bool *is_over_subscription)
+{
+	int retval;
+
+	BUG_ON(!pm);
+	BUG_ON(pm->allocated == true);
+	BUG_ON(is_over_subscription == NULL);
+
+	pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+
+	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
+					&pm->ib_buffer_obj);
+
+	if (retval != 0) {
+		pr_err("kfd: failed to allocate runlist IB\n");
+		return retval;
+	}
+
+	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
+	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
+
+	memset(*rl_buffer, 0, *rl_buffer_size);
+	pm->allocated = true;
+	return retval;
+}
+
+static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_runlist *packet;
+
+	BUG_ON(!pm || !buffer || !ib);
+
+	packet = (struct pm4_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_runlist));
+	packet->header.u32all = build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.valid = 1;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
+				struct qcm_process_device *qpd)
+{
+	struct pm4_map_process *packet;
+	struct queue *cur;
+	uint32_t num_queues;
+
+	BUG_ON(!pm || !buffer || !qpd);
+
+	packet = (struct pm4_map_process *)buffer;
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	memset(buffer, 0, sizeof(struct pm4_map_process));
+
+	packet->header.u32all = build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 1;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields3.page_table_base = qpd->page_table_base;
+	packet->bitfields10.gds_size = qpd->gds_size;
+	packet->bitfields10.num_gws = qpd->num_gws;
+	packet->bitfields10.num_oac = qpd->num_oac;
+	num_queues = 0;
+	list_for_each_entry(cur, &qpd->queues_list, list)
+		num_queues++;
+	packet->bitfields10.num_queues = num_queues;
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	return 0;
+}
+
+static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
+				struct queue *q)
+{
+	struct pm4_map_queues *packet;
+
+	BUG_ON(!pm || !buffer || !q);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	packet = (struct pm4_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_map_queues));
+
+	packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
+						sizeof(struct pm4_map_queues));
+	packet->bitfields2.alloc_format =
+				alloc_format__mes_map_queues__one_per_pipe;
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots;
+
+	packet->bitfields2.vidmem = (q->properties.is_interop) ?
+			vidmem__mes_map_queues__uses_video_memory :
+			vidmem__mes_map_queues__uses_no_video_memory;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.engine_sel =
+				engine_sel__mes_map_queues__compute;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		packet->bitfields2.engine_sel =
+				engine_sel__mes_map_queues__sdma0;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mes_map_queues_ordinals[0].mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mes_map_queues_ordinals[0].mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->mes_map_queues_ordinals[0].wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->mes_map_queues_ordinals[0].wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_create_runlist_ib(struct packet_manager *pm,
+				struct list_head *queues,
+				uint64_t *rl_gpu_addr,
+				size_t *rl_size_bytes)
+{
+	unsigned int alloc_size_bytes;
+	unsigned int *rl_buffer, rl_wptr, i;
+	int retval, proccesses_mapped;
+	struct device_process_node *cur;
+	struct qcm_process_device *qpd;
+	struct queue *q;
+	struct kernel_queue *kq;
+	bool is_over_subscription;
+
+	BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr);
+
+	rl_wptr = retval = proccesses_mapped = 0;
+
+	retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
+				&alloc_size_bytes, &is_over_subscription);
+	if (retval != 0)
+		return retval;
+
+	*rl_size_bytes = alloc_size_bytes;
+
+	pr_debug("kfd: In func %s\n", __func__);
+	pr_debug("kfd: building runlist ib process count: %d queues count %d\n",
+		pm->dqm->processes_count, pm->dqm->queue_count);
+
+	/* build the run list ib packet */
+	list_for_each_entry(cur, queues, list) {
+		qpd = cur->qpd;
+		/* build map process packet */
+		if (proccesses_mapped >= pm->dqm->processes_count) {
+			pr_debug("kfd: not enough space left in runlist IB\n");
+			pm_release_ib(pm);
+			return -ENOMEM;
+		}
+		retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
+		if (retval != 0)
+			return retval;
+		proccesses_mapped++;
+		inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
+				alloc_size_bytes);
+
+		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
+			if (kq->queue->properties.is_active != true)
+				continue;
+			retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+							kq->queue);
+			if (retval != 0)
+				return retval;
+			inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
+					alloc_size_bytes);
+		}
+
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			if (q->properties.is_active != true)
+				continue;
+			retval = pm_create_map_queue(pm,
+						&rl_buffer[rl_wptr], q);
+			if (retval != 0)
+				return retval;
+			inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
+					alloc_size_bytes);
+		}
+	}
+
+	pr_debug("kfd: finished map process and queues to runlist\n");
+
+	if (is_over_subscription)
+		pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr,
+				alloc_size_bytes / sizeof(uint32_t), true);
+
+	for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
+		pr_debug("0x%2X ", rl_buffer[i]);
+	pr_debug("\n");
+
+	return 0;
+}
+
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
+{
+	BUG_ON(!dqm);
+
+	pm->dqm = dqm;
+	mutex_init(&pm->lock);
+	pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
+	if (pm->priv_queue == NULL) {
+		mutex_destroy(&pm->lock);
+		return -ENOMEM;
+	}
+	pm->allocated = false;
+
+	return 0;
+}
+
+void pm_uninit(struct packet_manager *pm)
+{
+	BUG_ON(!pm);
+
+	mutex_destroy(&pm->lock);
+	kernel_queue_uninit(pm->priv_queue);
+}
+
+int pm_send_set_resources(struct packet_manager *pm,
+				struct scheduling_resources *res)
+{
+	struct pm4_set_resources *packet;
+
+	BUG_ON(!pm || !res);
+
+	pr_debug("kfd: In func %s\n", __func__);
+
+	mutex_lock(&pm->lock);
+	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+					sizeof(*packet) / sizeof(uint32_t),
+			(unsigned int **)&packet);
+	if (packet == NULL) {
+		mutex_unlock(&pm->lock);
+		pr_err("kfd: failed to allocate buffer on kernel queue\n");
+		return -ENOMEM;
+	}
+
+	memset(packet, 0, sizeof(struct pm4_set_resources));
+	packet->header.u32all = build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_set_resources));
+
+	packet->bitfields2.queue_type =
+			queue_type__mes_set_resources__hsa_interface_queue_hiq;
+	packet->bitfields2.vmid_mask = res->vmid_mask;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY;
+	packet->bitfields7.oac_mask = res->oac_mask;
+	packet->bitfields8.gds_heap_base = res->gds_heap_base;
+	packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+
+	mutex_unlock(&pm->lock);
+
+	return 0;
+}
+
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+{
+	uint64_t rl_gpu_ib_addr;
+	uint32_t *rl_buffer;
+	size_t rl_ib_size, packet_size_dwords;
+	int retval;
+
+	BUG_ON(!pm || !dqm_queues);
+
+	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
+					&rl_ib_size);
+	if (retval != 0)
+		goto fail_create_runlist_ib;
+
+	pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
+
+	packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
+	mutex_lock(&pm->lock);
+
+	retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+					packet_size_dwords, &rl_buffer);
+	if (retval != 0)
+		goto fail_acquire_packet_buffer;
+
+	retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
+					rl_ib_size / sizeof(uint32_t), false);
+	if (retval != 0)
+		goto fail_create_runlist;
+
+	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+
+	mutex_unlock(&pm->lock);
+
+	return retval;
+
+fail_create_runlist:
+	pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+fail_acquire_packet_buffer:
+	mutex_unlock(&pm->lock);
+fail_create_runlist_ib:
+	if (pm->allocated == true)
+		pm_release_ib(pm);
+	return retval;
+}
+
+int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+			uint32_t fence_value)
+{
+	int retval;
+	struct pm4_query_status *packet;
+
+	BUG_ON(!pm || !fence_address);
+
+	mutex_lock(&pm->lock);
+	retval = pm->priv_queue->ops.acquire_packet_buffer(
+			pm->priv_queue,
+			sizeof(struct pm4_query_status) / sizeof(uint32_t),
+			(unsigned int **)&packet);
+	if (retval != 0)
+		goto fail_acquire_packet_buffer;
+
+	packet->header.u32all = build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+	mutex_unlock(&pm->lock);
+
+	return 0;
+
+fail_acquire_packet_buffer:
+	mutex_unlock(&pm->lock);
+	return retval;
+}
+
+int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+			enum kfd_preempt_type_filter mode,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine)
+{
+	int retval;
+	uint32_t *buffer;
+	struct pm4_unmap_queues *packet;
+
+	BUG_ON(!pm);
+
+	mutex_lock(&pm->lock);
+	retval = pm->priv_queue->ops.acquire_packet_buffer(
+			pm->priv_queue,
+			sizeof(struct pm4_unmap_queues) / sizeof(uint32_t),
+			&buffer);
+	if (retval != 0)
+		goto err_acquire_packet_buffer;
+
+	packet = (struct pm4_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_unmap_queues));
+
+	packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_unmap_queues));
+	switch (type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__compute;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	if (reset)
+		packet->bitfields2.action =
+				action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+				action__mes_unmap_queues__preempt_queues;
+
+	switch (mode) {
+	case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE:
+		packet->bitfields2.queue_sel =
+				queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+		packet->bitfields2.num_queues = 1;
+		packet->bitfields3b.doorbell_offset0 = filter_param;
+		break;
+	case KFD_PREEMPT_TYPE_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+				queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+				queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
+		break;
+	default:
+		BUG();
+		break;
+	};
+
+	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+
+	mutex_unlock(&pm->lock);
+	return 0;
+
+err_acquire_packet_buffer:
+	mutex_unlock(&pm->lock);
+	return retval;
+}
+
+void pm_release_ib(struct packet_manager *pm)
+{
+	BUG_ON(!pm);
+
+	mutex_lock(&pm->lock);
+	if (pm->allocated) {
+		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
+		pm->allocated = false;
+	}
+	mutex_unlock(&pm->lock);
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
new file mode 100644
index 000000000..6cfe7f1f1
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include "kfd_priv.h"
+
+static unsigned long *pasid_bitmap;
+static unsigned int pasid_limit;
+static DEFINE_MUTEX(pasid_mutex);
+
+int kfd_pasid_init(void)
+{
+	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
+
+	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
+	if (!pasid_bitmap)
+		return -ENOMEM;
+
+	set_bit(0, pasid_bitmap); /* PASID 0 is reserved. */
+
+	return 0;
+}
+
+void kfd_pasid_exit(void)
+{
+	kfree(pasid_bitmap);
+}
+
+bool kfd_set_pasid_limit(unsigned int new_limit)
+{
+	if (new_limit < pasid_limit) {
+		bool ok;
+
+		mutex_lock(&pasid_mutex);
+
+		/* ensure that no pasids >= new_limit are in-use */
+		ok = (find_next_bit(pasid_bitmap, pasid_limit, new_limit) ==
+								pasid_limit);
+		if (ok)
+			pasid_limit = new_limit;
+
+		mutex_unlock(&pasid_mutex);
+
+		return ok;
+	}
+
+	return true;
+}
+
+inline unsigned int kfd_get_pasid_limit(void)
+{
+	return pasid_limit;
+}
+
+unsigned int kfd_pasid_alloc(void)
+{
+	unsigned int found;
+
+	mutex_lock(&pasid_mutex);
+
+	found = find_first_zero_bit(pasid_bitmap, pasid_limit);
+	if (found == pasid_limit)
+		found = 0;
+	else
+		set_bit(found, pasid_bitmap);
+
+	mutex_unlock(&pasid_mutex);
+
+	return found;
+}
+
+void kfd_pasid_free(unsigned int pasid)
+{
+	BUG_ON(pasid == 0 || pasid >= pasid_limit);
+	clear_bit(pasid, pasid_bitmap);
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
new file mode 100644
index 000000000..071ad5724
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -0,0 +1,405 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_PM4_HEADERS_H_
+#define KFD_PM4_HEADERS_H_
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+	struct {
+		uint32_t reserved1:8;	/* < reserved */
+		uint32_t opcode:8;	/* < IT opcode */
+		uint32_t count:14;	/* < number of DWORDs - 1
+					 * in the information body.
+					 */
+		uint32_t type:2;	/* < packet identifier.
+					 * It should be 3 for type 3 packets
+					 */
+	};
+	uint32_t u32all;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/* --------------------MES_SET_RESOURCES-------------------- */
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum set_resources_queue_type_enum {
+	queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+	queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+	queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+struct pm4_set_resources {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t vmid_mask:16;
+			uint32_t unmap_latency:8;
+			uint32_t reserved1:5;
+			enum set_resources_queue_type_enum queue_type:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t queue_mask_lo;
+	uint32_t queue_mask_hi;
+	uint32_t gws_mask_lo;
+	uint32_t gws_mask_hi;
+
+	union {
+		struct {
+			uint32_t oac_mask:16;
+			uint32_t reserved2:16;
+		} bitfields7;
+		uint32_t ordinal7;
+	};
+
+	union {
+		struct {
+			uint32_t gds_heap_base:6;
+			uint32_t reserved3:5;
+			uint32_t gds_heap_size:6;
+			uint32_t reserved4:15;
+		} bitfields8;
+		uint32_t ordinal8;
+	};
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST-------------------- */
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_runlist {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:2;
+			uint32_t ib_base_lo:30;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t ib_base_hi:16;
+			uint32_t reserved2:16;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	union {
+		struct {
+			uint32_t ib_size:20;
+			uint32_t chain:1;
+			uint32_t offload_polling:1;
+			uint32_t reserved3:1;
+			uint32_t valid:1;
+			uint32_t reserved4:8;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS-------------------- */
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_map_process {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:8;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t page_table_base:28;
+			uint32_t reserved3:4;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+	uint32_t sh_mem_config;
+	uint32_t gds_addr_lo;
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:6;
+			uint32_t reserved4:2;
+			uint32_t num_oac:4;
+			uint32_t reserved5:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields10;
+		uint32_t ordinal10;
+	};
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_DEFINED
+#define PM4_MES_MAP_QUEUES_DEFINED
+enum map_queues_queue_sel_enum {
+	queue_sel__mes_map_queues__map_to_specified_queue_slots = 0,
+	queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1,
+	queue_sel__mes_map_queues__enable_process_queues = 2
+};
+
+enum map_queues_vidmem_enum {
+	vidmem__mes_map_queues__uses_no_video_memory = 0,
+	vidmem__mes_map_queues__uses_video_memory = 1
+};
+
+enum map_queues_alloc_format_enum {
+	alloc_format__mes_map_queues__one_per_pipe = 0,
+	alloc_format__mes_map_queues__all_on_one_pipe = 1
+};
+
+enum map_queues_engine_sel_enum {
+	engine_sel__mes_map_queues__compute = 0,
+	engine_sel__mes_map_queues__sdma0 = 2,
+	engine_sel__mes_map_queues__sdma1 = 3
+};
+
+struct pm4_map_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:4;
+			enum map_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved2:2;
+			uint32_t vmid:4;
+			uint32_t reserved3:4;
+			enum map_queues_vidmem_enum vidmem:2;
+			uint32_t reserved4:6;
+			enum map_queues_alloc_format_enum alloc_format:2;
+			enum map_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	struct {
+		union {
+			struct {
+				uint32_t reserved5:2;
+				uint32_t doorbell_offset:21;
+				uint32_t reserved6:3;
+				uint32_t queue:6;
+			} bitfields3;
+			uint32_t ordinal3;
+		};
+
+		uint32_t mqd_addr_lo;
+		uint32_t mqd_addr_hi;
+		uint32_t wptr_addr_lo;
+		uint32_t wptr_addr_hi;
+
+	} mes_map_queues_ordinals[1];	/* 1..N of these ordinal groups */
+
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum query_status_interrupt_sel_enum {
+	interrupt_sel__mes_query_status__completion_status = 0,
+	interrupt_sel__mes_query_status__process_status = 1,
+	interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum query_status_command_enum {
+	command__mes_query_status__interrupt_only = 0,
+	command__mes_query_status__fence_only_immediate = 1,
+	command__mes_query_status__fence_only_after_write_ack = 2,
+	command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum query_status_engine_sel_enum {
+	engine_sel__mes_query_status__compute = 0,
+	engine_sel__mes_query_status__sdma0_queue = 2,
+	engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_query_status {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t context_id:28;
+			enum query_status_interrupt_sel_enum interrupt_sel:2;
+			enum query_status_command_enum command:2;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved2:2;
+			uint32_t doorbell_offset:21;
+			uint32_t reserved3:3;
+			enum query_status_engine_sel_enum engine_sel:3;
+			uint32_t reserved4:3;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+	uint32_t data_lo;
+	uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum unmap_queues_action_enum {
+	action__mes_unmap_queues__preempt_queues = 0,
+	action__mes_unmap_queues__reset_queues = 1,
+	action__mes_unmap_queues__disable_process_queues = 2
+};
+
+enum unmap_queues_queue_sel_enum {
+	queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+	queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+	queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+};
+
+enum unmap_queues_engine_sel_enum {
+	engine_sel__mes_unmap_queues__compute = 0,
+	engine_sel__mes_unmap_queues__sdma0 = 2,
+	engine_sel__mes_unmap_queues__sdma1 = 3
+};
+
+struct pm4_unmap_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			enum unmap_queues_action_enum action:2;
+			uint32_t reserved1:2;
+			enum unmap_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved2:20;
+			enum unmap_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved3:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved4:2;
+			uint32_t doorbell_offset0:21;
+			uint32_t reserved5:9;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	union {
+		struct {
+			uint32_t reserved6:2;
+			uint32_t doorbell_offset1:21;
+			uint32_t reserved7:9;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+	union {
+		struct {
+			uint32_t reserved8:2;
+			uint32_t doorbell_offset2:21;
+			uint32_t reserved9:9;
+		} bitfields5;
+		uint32_t ordinal5;
+	};
+
+	union {
+		struct {
+			uint32_t reserved10:2;
+			uint32_t doorbell_offset3:21;
+			uint32_t reserved11:9;
+		} bitfields6;
+		uint32_t ordinal6;
+	};
+
+};
+#endif
+
+enum {
+	CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+
+#endif /* KFD_PM4_HEADERS_H_ */
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
new file mode 100644
index 000000000..b72fa3b8c
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+
+#ifndef KFD_PM4_OPCODES_H
+#define KFD_PM4_OPCODES_H
+
+enum it_opcode_type {
+	IT_NOP                               = 0x10,
+	IT_SET_BASE                          = 0x11,
+	IT_CLEAR_STATE                       = 0x12,
+	IT_INDEX_BUFFER_SIZE                 = 0x13,
+	IT_DISPATCH_DIRECT                   = 0x15,
+	IT_DISPATCH_INDIRECT                 = 0x16,
+	IT_ATOMIC_GDS                        = 0x1D,
+	IT_OCCLUSION_QUERY                   = 0x1F,
+	IT_SET_PREDICATION                   = 0x20,
+	IT_REG_RMW                           = 0x21,
+	IT_COND_EXEC                         = 0x22,
+	IT_PRED_EXEC                         = 0x23,
+	IT_DRAW_INDIRECT                     = 0x24,
+	IT_DRAW_INDEX_INDIRECT               = 0x25,
+	IT_INDEX_BASE                        = 0x26,
+	IT_DRAW_INDEX_2                      = 0x27,
+	IT_CONTEXT_CONTROL                   = 0x28,
+	IT_INDEX_TYPE                        = 0x2A,
+	IT_DRAW_INDIRECT_MULTI               = 0x2C,
+	IT_DRAW_INDEX_AUTO                   = 0x2D,
+	IT_NUM_INSTANCES                     = 0x2F,
+	IT_DRAW_INDEX_MULTI_AUTO             = 0x30,
+	IT_INDIRECT_BUFFER_CNST              = 0x33,
+	IT_STRMOUT_BUFFER_UPDATE             = 0x34,
+	IT_DRAW_INDEX_OFFSET_2               = 0x35,
+	IT_DRAW_PREAMBLE                     = 0x36,
+	IT_WRITE_DATA                        = 0x37,
+	IT_DRAW_INDEX_INDIRECT_MULTI         = 0x38,
+	IT_MEM_SEMAPHORE                     = 0x39,
+	IT_COPY_DW                           = 0x3B,
+	IT_WAIT_REG_MEM                      = 0x3C,
+	IT_INDIRECT_BUFFER                   = 0x3F,
+	IT_COPY_DATA                         = 0x40,
+	IT_PFP_SYNC_ME                       = 0x42,
+	IT_SURFACE_SYNC                      = 0x43,
+	IT_COND_WRITE                        = 0x45,
+	IT_EVENT_WRITE                       = 0x46,
+	IT_EVENT_WRITE_EOP                   = 0x47,
+	IT_EVENT_WRITE_EOS                   = 0x48,
+	IT_RELEASE_MEM                       = 0x49,
+	IT_PREAMBLE_CNTL                     = 0x4A,
+	IT_DMA_DATA                          = 0x50,
+	IT_ACQUIRE_MEM                       = 0x58,
+	IT_REWIND                            = 0x59,
+	IT_LOAD_UCONFIG_REG                  = 0x5E,
+	IT_LOAD_SH_REG                       = 0x5F,
+	IT_LOAD_CONFIG_REG                   = 0x60,
+	IT_LOAD_CONTEXT_REG                  = 0x61,
+	IT_SET_CONFIG_REG                    = 0x68,
+	IT_SET_CONTEXT_REG                   = 0x69,
+	IT_SET_CONTEXT_REG_INDIRECT          = 0x73,
+	IT_SET_SH_REG                        = 0x76,
+	IT_SET_SH_REG_OFFSET                 = 0x77,
+	IT_SET_QUEUE_REG                     = 0x78,
+	IT_SET_UCONFIG_REG                   = 0x79,
+	IT_SCRATCH_RAM_WRITE                 = 0x7D,
+	IT_SCRATCH_RAM_READ                  = 0x7E,
+	IT_LOAD_CONST_RAM                    = 0x80,
+	IT_WRITE_CONST_RAM                   = 0x81,
+	IT_DUMP_CONST_RAM                    = 0x83,
+	IT_INCREMENT_CE_COUNTER              = 0x84,
+	IT_INCREMENT_DE_COUNTER              = 0x85,
+	IT_WAIT_ON_CE_COUNTER                = 0x86,
+	IT_WAIT_ON_DE_COUNTER_DIFF           = 0x88,
+	IT_SWITCH_BUFFER                     = 0x8B,
+	IT_SET_RESOURCES                     = 0xA0,
+	IT_MAP_PROCESS                       = 0xA1,
+	IT_MAP_QUEUES                        = 0xA2,
+	IT_UNMAP_QUEUES                      = 0xA3,
+	IT_QUERY_STATUS                      = 0xA4,
+	IT_RUN_LIST                          = 0xA5,
+};
+
+#define PM4_TYPE_0 0
+#define PM4_TYPE_2 2
+#define PM4_TYPE_3 3
+
+#endif /* KFD_PM4_OPCODES_H */
+
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
new file mode 100644
index 000000000..f21fccebd
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -0,0 +1,645 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_PRIV_H_INCLUDED
+#define KFD_PRIV_H_INCLUDED
+
+#include <linux/hashtable.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/kfd_ioctl.h>
+#include <kgd_kfd_interface.h>
+
+#define KFD_SYSFS_FILE_MODE 0444
+
+/*
+ * When working with cp scheduler we should assign the HIQ manually or via
+ * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot
+ * definitions for Kaveri. In Kaveri only the first ME queues participates
+ * in the cp scheduling taking that in mind we set the HIQ slot in the
+ * second ME.
+ */
+#define KFD_CIK_HIQ_PIPE 4
+#define KFD_CIK_HIQ_QUEUE 0
+
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Macro for allocating structures */
+#define kfd_alloc_struct(ptr_to_struct)	\
+	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
+
+#define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+
+/*
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
+ */
+extern int max_num_of_queues_per_device;
+
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+
+#define KFD_KERNEL_QUEUE_SIZE 2048
+
+/* Kernel module parameter to specify the scheduling policy */
+extern int sched_policy;
+
+/**
+ * enum kfd_sched_policy
+ *
+ * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp)
+ * scheduling. In this scheduling mode we're using the firmware code to
+ * schedule the user mode queues and kernel queues such as HIQ and DIQ.
+ * the HIQ queue is used as a special queue that dispatches the configuration
+ * to the cp and the user mode queues list that are currently running.
+ * the DIQ queue is a debugging queue that dispatches debugging commands to the
+ * firmware.
+ * in this scheduling mode user mode queues over subscription feature is
+ * enabled.
+ *
+ * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over
+ * subscription feature disabled.
+ *
+ * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly
+ * set the command processor registers and sets the queues "manually". This
+ * mode is used *ONLY* for debugging proposes.
+ *
+ */
+enum kfd_sched_policy {
+	KFD_SCHED_POLICY_HWS = 0,
+	KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION,
+	KFD_SCHED_POLICY_NO_HWS
+};
+
+enum cache_policy {
+	cache_policy_coherent,
+	cache_policy_noncoherent
+};
+
+enum asic_family_type {
+	CHIP_KAVERI = 0,
+	CHIP_CARRIZO
+};
+
+struct kfd_device_info {
+	unsigned int asic_family;
+	unsigned int max_pasid_bits;
+	size_t ih_ring_entry_size;
+	uint8_t num_of_watch_points;
+	uint16_t mqd_size_aligned;
+};
+
+struct kfd_mem_obj {
+	uint32_t range_start;
+	uint32_t range_end;
+	uint64_t gpu_addr;
+	uint32_t *cpu_ptr;
+};
+
+struct kfd_dev {
+	struct kgd_dev *kgd;
+
+	const struct kfd_device_info *device_info;
+	struct pci_dev *pdev;
+
+	unsigned int id;		/* topology stub index */
+
+	phys_addr_t doorbell_base;	/* Start of actual doorbells used by
+					 * KFD. It is aligned for mapping
+					 * into user mode
+					 */
+	size_t doorbell_id_offset;	/* Doorbell offset (from KFD doorbell
+					 * to HW doorbell, GFX reserved some
+					 * at the start)
+					 */
+	size_t doorbell_process_limit;	/* Number of processes we have doorbell
+					 * space for.
+					 */
+	u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
+					   * page used by kernel queue
+					   */
+
+	struct kgd2kfd_shared_resources shared_resources;
+
+	const struct kfd2kgd_calls *kfd2kgd;
+	struct mutex doorbell_mutex;
+	unsigned long doorbell_available_index[DIV_ROUND_UP(
+		KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+
+	void *gtt_mem;
+	uint64_t gtt_start_gpu_addr;
+	void *gtt_start_cpu_ptr;
+	void *gtt_sa_bitmap;
+	struct mutex gtt_sa_lock;
+	unsigned int gtt_sa_chunk_size;
+	unsigned int gtt_sa_num_of_chunks;
+
+	/* QCM Device instance */
+	struct device_queue_manager *dqm;
+
+	bool init_complete;
+};
+
+/* KGD2KFD callbacks */
+void kgd2kfd_exit(void);
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+			struct pci_dev *pdev, const struct kfd2kgd_calls *f2g);
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			const struct kgd2kfd_shared_resources *gpu_resources);
+void kgd2kfd_device_exit(struct kfd_dev *kfd);
+
+enum kfd_mempool {
+	KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
+	KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
+	KFD_MEMPOOL_FRAMEBUFFER = 3,
+};
+
+/* Character device interface */
+int kfd_chardev_init(void);
+void kfd_chardev_exit(void);
+struct device *kfd_chardev(void);
+
+/**
+ * enum kfd_preempt_type_filter
+ *
+ * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue.
+ *
+ * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the
+ *						running queues list.
+ *
+ * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to
+ *						specific process.
+ *
+ */
+enum kfd_preempt_type_filter {
+	KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
+	KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+	KFD_PREEMPT_TYPE_FILTER_BY_PASID
+};
+
+enum kfd_preempt_type {
+	KFD_PREEMPT_TYPE_WAVEFRONT,
+	KFD_PREEMPT_TYPE_WAVEFRONT_RESET
+};
+
+/**
+ * enum kfd_queue_type
+ *
+ * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
+ *
+ * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type.
+ *
+ * @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
+ *
+ * @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
+ */
+enum kfd_queue_type  {
+	KFD_QUEUE_TYPE_COMPUTE,
+	KFD_QUEUE_TYPE_SDMA,
+	KFD_QUEUE_TYPE_HIQ,
+	KFD_QUEUE_TYPE_DIQ
+};
+
+enum kfd_queue_format {
+	KFD_QUEUE_FORMAT_PM4,
+	KFD_QUEUE_FORMAT_AQL
+};
+
+/**
+ * struct queue_properties
+ *
+ * @type: The queue type.
+ *
+ * @queue_id: Queue identifier.
+ *
+ * @queue_address: Queue ring buffer address.
+ *
+ * @queue_size: Queue ring buffer size.
+ *
+ * @priority: Defines the queue priority relative to other queues in the
+ * process.
+ * This is just an indication and HW scheduling may override the priority as
+ * necessary while keeping the relative prioritization.
+ * the priority granularity is from 0 to f which f is the highest priority.
+ * currently all queues are initialized with the highest priority.
+ *
+ * @queue_percent: This field is partially implemented and currently a zero in
+ * this field defines that the queue is non active.
+ *
+ * @read_ptr: User space address which points to the number of dwords the
+ * cp read from the ring buffer. This field updates automatically by the H/W.
+ *
+ * @write_ptr: Defines the number of dwords written to the ring buffer.
+ *
+ * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
+ * the queue ring buffer. This field should be similar to write_ptr and the user
+ * should update this field after he updated the write_ptr.
+ *
+ * @doorbell_off: The doorbell offset in the doorbell pci-bar.
+ *
+ * @is_interop: Defines if this is a interop queue. Interop queue means that the
+ * queue can access both graphics and compute resources.
+ *
+ * @is_active: Defines if the queue is active or not.
+ *
+ * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
+ * of the queue.
+ *
+ * This structure represents the queue properties for each queue no matter if
+ * it's user mode or kernel mode queue.
+ *
+ */
+struct queue_properties {
+	enum kfd_queue_type type;
+	enum kfd_queue_format format;
+	unsigned int queue_id;
+	uint64_t queue_address;
+	uint64_t  queue_size;
+	uint32_t priority;
+	uint32_t queue_percent;
+	uint32_t *read_ptr;
+	uint32_t *write_ptr;
+	uint32_t __iomem *doorbell_ptr;
+	uint32_t doorbell_off;
+	bool is_interop;
+	bool is_active;
+	/* Not relevant for user mode queues in cp scheduling */
+	unsigned int vmid;
+	/* Relevant only for sdma queues*/
+	uint32_t sdma_engine_id;
+	uint32_t sdma_queue_id;
+	uint32_t sdma_vm_addr;
+	/* Relevant only for VI */
+	uint64_t eop_ring_buffer_address;
+	uint32_t eop_ring_buffer_size;
+	uint64_t ctx_save_restore_area_address;
+	uint32_t ctx_save_restore_area_size;
+};
+
+/**
+ * struct queue
+ *
+ * @list: Queue linked list.
+ *
+ * @mqd: The queue MQD.
+ *
+ * @mqd_mem_obj: The MQD local gpu memory object.
+ *
+ * @gart_mqd_addr: The MQD gart mc address.
+ *
+ * @properties: The queue properties.
+ *
+ * @mec: Used only in no cp scheduling mode and identifies to micro engine id
+ * that the queue should be execute on.
+ *
+ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id.
+ *
+ * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
+ *
+ * @process: The kfd process that created this queue.
+ *
+ * @device: The kfd device that created this queue.
+ *
+ * This structure represents user mode compute queues.
+ * It contains all the necessary data to handle such queues.
+ *
+ */
+
+struct queue {
+	struct list_head list;
+	void *mqd;
+	struct kfd_mem_obj *mqd_mem_obj;
+	uint64_t gart_mqd_addr;
+	struct queue_properties properties;
+
+	uint32_t mec;
+	uint32_t pipe;
+	uint32_t queue;
+
+	unsigned int sdma_id;
+
+	struct kfd_process	*process;
+	struct kfd_dev		*device;
+};
+
+/*
+ * Please read the kfd_mqd_manager.h description.
+ */
+enum KFD_MQD_TYPE {
+	KFD_MQD_TYPE_COMPUTE = 0,	/* for no cp scheduling */
+	KFD_MQD_TYPE_HIQ,		/* for hiq */
+	KFD_MQD_TYPE_CP,		/* for cp queues and diq */
+	KFD_MQD_TYPE_SDMA,		/* for sdma queues */
+	KFD_MQD_TYPE_MAX
+};
+
+struct scheduling_resources {
+	unsigned int vmid_mask;
+	enum kfd_queue_type type;
+	uint64_t queue_mask;
+	uint64_t gws_mask;
+	uint32_t oac_mask;
+	uint32_t gds_heap_base;
+	uint32_t gds_heap_size;
+};
+
+struct process_queue_manager {
+	/* data */
+	struct kfd_process	*process;
+	unsigned int		num_concurrent_processes;
+	struct list_head	queues;
+	unsigned long		*queue_slot_bitmap;
+};
+
+struct qcm_process_device {
+	/* The Device Queue Manager that owns this data */
+	struct device_queue_manager *dqm;
+	struct process_queue_manager *pqm;
+	/* Queues list */
+	struct list_head queues_list;
+	struct list_head priv_queue_list;
+
+	unsigned int queue_count;
+	unsigned int vmid;
+	bool is_debug;
+	/*
+	 * All the memory management data should be here too
+	 */
+	uint64_t gds_context_area;
+	uint32_t sh_mem_config;
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+	uint32_t page_table_base;
+	uint32_t gds_size;
+	uint32_t num_gws;
+	uint32_t num_oac;
+};
+
+/* Data that is per-process-per device. */
+struct kfd_process_device {
+	/*
+	 * List of all per-device data for a process.
+	 * Starts from kfd_process.per_device_data.
+	 */
+	struct list_head per_device_list;
+
+	/* The device that owns this data. */
+	struct kfd_dev *dev;
+
+
+	/* per-process-per device QCM data structure */
+	struct qcm_process_device qpd;
+
+	/*Apertures*/
+	uint64_t lds_base;
+	uint64_t lds_limit;
+	uint64_t gpuvm_base;
+	uint64_t gpuvm_limit;
+	uint64_t scratch_base;
+	uint64_t scratch_limit;
+
+	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
+	bool bound;
+};
+
+#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
+
+/* Process data */
+struct kfd_process {
+	/*
+	 * kfd_process are stored in an mm_struct*->kfd_process*
+	 * hash table (kfd_processes in kfd_process.c)
+	 */
+	struct hlist_node kfd_processes;
+
+	struct mm_struct *mm;
+
+	struct mutex mutex;
+
+	/*
+	 * In any process, the thread that started main() is the lead
+	 * thread and outlives the rest.
+	 * It is here because amd_iommu_bind_pasid wants a task_struct.
+	 */
+	struct task_struct *lead_thread;
+
+	/* We want to receive a notification when the mm_struct is destroyed */
+	struct mmu_notifier mmu_notifier;
+
+	/* Use for delayed freeing of kfd_process structure */
+	struct rcu_head	rcu;
+
+	unsigned int pasid;
+
+	/*
+	 * List of kfd_process_device structures,
+	 * one for each device the process is using.
+	 */
+	struct list_head per_device_data;
+
+	struct process_queue_manager pqm;
+
+	/* The process's queues. */
+	size_t queue_array_size;
+
+	/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
+	struct kfd_queue **queues;
+
+	unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+
+	/*Is the user space process 32 bit?*/
+	bool is_32bit_user_mode;
+};
+
+/**
+ * Ioctl function type.
+ *
+ * \param filep pointer to file structure.
+ * \param p amdkfd process pointer.
+ * \param data pointer to arg that was copied from user.
+ */
+typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
+				void *data);
+
+struct amdkfd_ioctl_desc {
+	unsigned int cmd;
+	int flags;
+	amdkfd_ioctl_t *func;
+	unsigned int cmd_drv;
+	const char *name;
+};
+
+void kfd_process_create_wq(void);
+void kfd_process_destroy_wq(void);
+struct kfd_process *kfd_create_process(const struct task_struct *);
+struct kfd_process *kfd_get_process(const struct task_struct *);
+
+struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
+							struct kfd_process *p);
+void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid);
+struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
+							struct kfd_process *p);
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+							struct kfd_process *p);
+
+/* Process device data iterator */
+struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
+struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+						struct kfd_process_device *pdd);
+bool kfd_has_process_device_data(struct kfd_process *p);
+
+/* PASIDs */
+int kfd_pasid_init(void);
+void kfd_pasid_exit(void);
+bool kfd_set_pasid_limit(unsigned int new_limit);
+unsigned int kfd_get_pasid_limit(void);
+unsigned int kfd_pasid_alloc(void);
+void kfd_pasid_free(unsigned int pasid);
+
+/* Doorbells */
+void kfd_doorbell_init(struct kfd_dev *kfd);
+int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
+u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+					unsigned int *doorbell_off);
+void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
+u32 read_kernel_doorbell(u32 __iomem *db);
+void write_kernel_doorbell(u32 __iomem *db, u32 value);
+unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+					struct kfd_process *process,
+					unsigned int queue_id);
+
+/* GTT Sub-Allocator */
+
+int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+			struct kfd_mem_obj **mem_obj);
+
+int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
+
+extern struct device *kfd_device;
+
+/* Topology */
+int kfd_topology_init(void);
+void kfd_topology_shutdown(void);
+int kfd_topology_add_device(struct kfd_dev *gpu);
+int kfd_topology_remove_device(struct kfd_dev *gpu);
+struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
+struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
+struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
+
+/* Interrupts */
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
+
+/* Power Management */
+void kgd2kfd_suspend(struct kfd_dev *kfd);
+int kgd2kfd_resume(struct kfd_dev *kfd);
+
+/* amdkfd Apertures */
+int kfd_init_apertures(struct kfd_process *process);
+
+/* Queue Context Management */
+inline uint32_t lower_32(uint64_t x);
+inline uint32_t upper_32(uint64_t x);
+struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
+inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m);
+
+int init_queue(struct queue **q, struct queue_properties properties);
+void uninit_queue(struct queue *q);
+void print_queue_properties(struct queue_properties *q);
+void print_queue(struct queue *q);
+
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
+					struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
+		struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
+		struct kfd_dev *dev);
+struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
+void device_queue_manager_uninit(struct device_queue_manager *dqm);
+struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
+					enum kfd_queue_type type);
+void kernel_queue_uninit(struct kernel_queue *kq);
+
+/* Process Queue Manager */
+struct process_queue_node {
+	struct queue *q;
+	struct kernel_queue *kq;
+	struct list_head process_queue_list;
+};
+
+int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
+void pqm_uninit(struct process_queue_manager *pqm);
+int pqm_create_queue(struct process_queue_manager *pqm,
+			    struct kfd_dev *dev,
+			    struct file *f,
+			    struct queue_properties *properties,
+			    unsigned int flags,
+			    enum kfd_queue_type type,
+			    unsigned int *qid);
+int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
+int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
+			struct queue_properties *p);
+
+/* Packet Manager */
+
+#define KFD_HIQ_TIMEOUT (500)
+
+#define KFD_FENCE_COMPLETED (100)
+#define KFD_FENCE_INIT   (10)
+#define KFD_UNMAP_LATENCY (150)
+
+struct packet_manager {
+	struct device_queue_manager *dqm;
+	struct kernel_queue *priv_queue;
+	struct mutex lock;
+	bool allocated;
+	struct kfd_mem_obj *ib_buffer_obj;
+};
+
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
+void pm_uninit(struct packet_manager *pm);
+int pm_send_set_resources(struct packet_manager *pm,
+				struct scheduling_resources *res);
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
+int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+				uint32_t fence_value);
+
+int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
+			enum kfd_preempt_type_filter mode,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine);
+
+void pm_release_ib(struct packet_manager *pm);
+
+uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
+phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
+					struct kfd_process *process);
+
+#endif
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_process.c
new file mode 100644
index 000000000..945d6226d
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/amd-iommu.h>
+#include <linux/notifier.h>
+#include <linux/compat.h>
+
+struct mm_struct;
+
+#include "kfd_priv.h"
+
+/*
+ * Initial size for the array of queues.
+ * The allocated size is doubled each time
+ * it is exceeded up to MAX_PROCESS_QUEUES.
+ */
+#define INITIAL_QUEUE_ARRAY_SIZE 16
+
+/*
+ * List of struct kfd_process (field kfd_process).
+ * Unique/indexed by mm_struct*
+ */
+#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
+static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
+static DEFINE_MUTEX(kfd_processes_mutex);
+
+DEFINE_STATIC_SRCU(kfd_processes_srcu);
+
+static struct workqueue_struct *kfd_process_wq;
+
+struct kfd_process_release_work {
+	struct work_struct kfd_work;
+	struct kfd_process *p;
+};
+
+static struct kfd_process *find_process(const struct task_struct *thread);
+static struct kfd_process *create_process(const struct task_struct *thread);
+
+void kfd_process_create_wq(void)
+{
+	if (!kfd_process_wq)
+		kfd_process_wq = create_workqueue("kfd_process_wq");
+}
+
+void kfd_process_destroy_wq(void)
+{
+	if (kfd_process_wq) {
+		flush_workqueue(kfd_process_wq);
+		destroy_workqueue(kfd_process_wq);
+		kfd_process_wq = NULL;
+	}
+}
+
+struct kfd_process *kfd_create_process(const struct task_struct *thread)
+{
+	struct kfd_process *process;
+
+	BUG_ON(!kfd_process_wq);
+
+	if (thread->mm == NULL)
+		return ERR_PTR(-EINVAL);
+
+	/* Only the pthreads threading model is supported. */
+	if (thread->group_leader->mm != thread->mm)
+		return ERR_PTR(-EINVAL);
+
+	/* Take mmap_sem because we call __mmu_notifier_register inside */
+	down_write(&thread->mm->mmap_sem);
+
+	/*
+	 * take kfd processes mutex before starting of process creation
+	 * so there won't be a case where two threads of the same process
+	 * create two kfd_process structures
+	 */
+	mutex_lock(&kfd_processes_mutex);
+
+	/* A prior open of /dev/kfd could have already created the process. */
+	process = find_process(thread);
+	if (process)
+		pr_debug("kfd: process already found\n");
+
+	if (!process)
+		process = create_process(thread);
+
+	mutex_unlock(&kfd_processes_mutex);
+
+	up_write(&thread->mm->mmap_sem);
+
+	return process;
+}
+
+struct kfd_process *kfd_get_process(const struct task_struct *thread)
+{
+	struct kfd_process *process;
+
+	if (thread->mm == NULL)
+		return ERR_PTR(-EINVAL);
+
+	/* Only the pthreads threading model is supported. */
+	if (thread->group_leader->mm != thread->mm)
+		return ERR_PTR(-EINVAL);
+
+	process = find_process(thread);
+
+	return process;
+}
+
+static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
+{
+	struct kfd_process *process;
+
+	hash_for_each_possible_rcu(kfd_processes_table, process,
+					kfd_processes, (uintptr_t)mm)
+		if (process->mm == mm)
+			return process;
+
+	return NULL;
+}
+
+static struct kfd_process *find_process(const struct task_struct *thread)
+{
+	struct kfd_process *p;
+	int idx;
+
+	idx = srcu_read_lock(&kfd_processes_srcu);
+	p = find_process_by_mm(thread->mm);
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p;
+}
+
+static void kfd_process_wq_release(struct work_struct *work)
+{
+	struct kfd_process_release_work *my_work;
+	struct kfd_process_device *pdd, *temp;
+	struct kfd_process *p;
+
+	my_work = (struct kfd_process_release_work *) work;
+
+	p = my_work->p;
+
+	pr_debug("Releasing process (pasid %d) in workqueue\n",
+			p->pasid);
+
+	mutex_lock(&p->mutex);
+
+	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
+							per_device_list) {
+		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
+				pdd->dev->id, p->pasid);
+
+		amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
+		list_del(&pdd->per_device_list);
+
+		kfree(pdd);
+	}
+
+	kfd_pasid_free(p->pasid);
+
+	mutex_unlock(&p->mutex);
+
+	mutex_destroy(&p->mutex);
+
+	kfree(p->queues);
+
+	kfree(p);
+
+	kfree((void *)work);
+}
+
+static void kfd_process_destroy_delayed(struct rcu_head *rcu)
+{
+	struct kfd_process_release_work *work;
+	struct kfd_process *p;
+
+	BUG_ON(!kfd_process_wq);
+
+	p = container_of(rcu, struct kfd_process, rcu);
+	BUG_ON(atomic_read(&p->mm->mm_count) <= 0);
+
+	mmdrop(p->mm);
+
+	work = (struct kfd_process_release_work *)
+		kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
+
+	if (work) {
+		INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
+		work->p = p;
+		queue_work(kfd_process_wq, (struct work_struct *) work);
+	}
+}
+
+static void kfd_process_notifier_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct kfd_process *p;
+
+	/*
+	 * The kfd_process structure can not be free because the
+	 * mmu_notifier srcu is read locked
+	 */
+	p = container_of(mn, struct kfd_process, mmu_notifier);
+	BUG_ON(p->mm != mm);
+
+	mutex_lock(&kfd_processes_mutex);
+	hash_del_rcu(&p->kfd_processes);
+	mutex_unlock(&kfd_processes_mutex);
+	synchronize_srcu(&kfd_processes_srcu);
+
+	mutex_lock(&p->mutex);
+
+	/* In case our notifier is called before IOMMU notifier */
+	pqm_uninit(&p->pqm);
+
+	mutex_unlock(&p->mutex);
+
+	/*
+	 * Because we drop mm_count inside kfd_process_destroy_delayed
+	 * and because the mmu_notifier_unregister function also drop
+	 * mm_count we need to take an extra count here.
+	 */
+	atomic_inc(&p->mm->mm_count);
+	mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
+	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
+}
+
+static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
+	.release = kfd_process_notifier_release,
+};
+
+static struct kfd_process *create_process(const struct task_struct *thread)
+{
+	struct kfd_process *process;
+	int err = -ENOMEM;
+
+	process = kzalloc(sizeof(*process), GFP_KERNEL);
+
+	if (!process)
+		goto err_alloc_process;
+
+	process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
+					sizeof(process->queues[0]), GFP_KERNEL);
+	if (!process->queues)
+		goto err_alloc_queues;
+
+	process->pasid = kfd_pasid_alloc();
+	if (process->pasid == 0)
+		goto err_alloc_pasid;
+
+	mutex_init(&process->mutex);
+
+	process->mm = thread->mm;
+
+	/* register notifier */
+	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
+	err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
+	if (err)
+		goto err_mmu_notifier;
+
+	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+			(uintptr_t)process->mm);
+
+	process->lead_thread = thread->group_leader;
+
+	process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
+
+	INIT_LIST_HEAD(&process->per_device_data);
+
+	err = pqm_init(&process->pqm, process);
+	if (err != 0)
+		goto err_process_pqm_init;
+
+	/* init process apertures*/
+	process->is_32bit_user_mode = is_compat_task();
+	if (kfd_init_apertures(process) != 0)
+		goto err_init_apretures;
+
+	return process;
+
+err_init_apretures:
+	pqm_uninit(&process->pqm);
+err_process_pqm_init:
+	hash_del_rcu(&process->kfd_processes);
+	synchronize_rcu();
+	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
+err_mmu_notifier:
+	kfd_pasid_free(process->pasid);
+err_alloc_pasid:
+	kfree(process->queues);
+err_alloc_queues:
+	kfree(process);
+err_alloc_process:
+	return ERR_PTR(err);
+}
+
+struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
+							struct kfd_process *p)
+{
+	struct kfd_process_device *pdd = NULL;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+		if (pdd->dev == dev)
+			break;
+
+	return pdd;
+}
+
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+							struct kfd_process *p)
+{
+	struct kfd_process_device *pdd = NULL;
+
+	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
+	if (pdd != NULL) {
+		pdd->dev = dev;
+		INIT_LIST_HEAD(&pdd->qpd.queues_list);
+		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+		pdd->qpd.dqm = dev->dqm;
+		list_add(&pdd->per_device_list, &p->per_device_data);
+	}
+
+	return pdd;
+}
+
+/*
+ * Direct the IOMMU to bind the process (specifically the pasid->mm)
+ * to the device.
+ * Unbinding occurs when the process dies or the device is removed.
+ *
+ * Assumes that the process lock is held.
+ */
+struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
+							struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	int err;
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (pdd->bound)
+		return pdd;
+
+	err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	pdd->bound = true;
+
+	return pdd;
+}
+
+void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
+{
+	struct kfd_process *p;
+	struct kfd_process_device *pdd;
+	int idx, i;
+
+	BUG_ON(dev == NULL);
+
+	idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
+		if (p->pasid == pasid)
+			break;
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	BUG_ON(p->pasid != pasid);
+
+	mutex_lock(&p->mutex);
+
+	pqm_uninit(&p->pqm);
+
+	pdd = kfd_get_process_device_data(dev, p);
+
+	/*
+	 * Just mark pdd as unbound, because we still need it to call
+	 * amd_iommu_unbind_pasid() in when the process exits.
+	 * We don't call amd_iommu_unbind_pasid() here
+	 * because the IOMMU called us.
+	 */
+	if (pdd)
+		pdd->bound = false;
+
+	mutex_unlock(&p->mutex);
+}
+
+struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
+{
+	return list_first_entry(&p->per_device_data,
+				struct kfd_process_device,
+				per_device_list);
+}
+
+struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+						struct kfd_process_device *pdd)
+{
+	if (list_is_last(&pdd->per_device_list, &p->per_device_data))
+		return NULL;
+	return list_next_entry(pdd, per_device_list);
+}
+
+bool kfd_has_process_device_data(struct kfd_process *p)
+{
+	return !(list_empty(&p->per_device_data));
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
new file mode 100644
index 000000000..530b82c4e
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include "kfd_device_queue_manager.h"
+#include "kfd_priv.h"
+#include "kfd_kernel_queue.h"
+
+static inline struct process_queue_node *get_queue_by_qid(
+			struct process_queue_manager *pqm, unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	BUG_ON(!pqm);
+
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (pqn->q && pqn->q->properties.queue_id == qid)
+			return pqn;
+		if (pqn->kq && pqn->kq->queue->properties.queue_id == qid)
+			return pqn;
+	}
+
+	return NULL;
+}
+
+static int find_available_queue_slot(struct process_queue_manager *pqm,
+					unsigned int *qid)
+{
+	unsigned long found;
+
+	BUG_ON(!pqm || !qid);
+
+	pr_debug("kfd: in %s\n", __func__);
+
+	found = find_first_zero_bit(pqm->queue_slot_bitmap,
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+
+	pr_debug("kfd: the new slot id %lu\n", found);
+
+	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+		pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
+				pqm->process->pasid);
+		return -ENOMEM;
+	}
+
+	set_bit(found, pqm->queue_slot_bitmap);
+	*qid = found;
+
+	return 0;
+}
+
+int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
+{
+	BUG_ON(!pqm);
+
+	INIT_LIST_HEAD(&pqm->queues);
+	pqm->queue_slot_bitmap =
+			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+					BITS_PER_BYTE), GFP_KERNEL);
+	if (pqm->queue_slot_bitmap == NULL)
+		return -ENOMEM;
+	pqm->process = p;
+
+	return 0;
+}
+
+void pqm_uninit(struct process_queue_manager *pqm)
+{
+	int retval;
+	struct process_queue_node *pqn, *next;
+
+	BUG_ON(!pqm);
+
+	pr_debug("In func %s\n", __func__);
+
+	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
+		retval = pqm_destroy_queue(
+				pqm,
+				(pqn->q != NULL) ?
+					pqn->q->properties.queue_id :
+					pqn->kq->queue->properties.queue_id);
+
+		if (retval != 0) {
+			pr_err("kfd: failed to destroy queue\n");
+			return;
+		}
+	}
+	kfree(pqm->queue_slot_bitmap);
+	pqm->queue_slot_bitmap = NULL;
+}
+
+static int create_cp_queue(struct process_queue_manager *pqm,
+				struct kfd_dev *dev, struct queue **q,
+				struct queue_properties *q_properties,
+				struct file *f, unsigned int qid)
+{
+	int retval;
+
+	retval = 0;
+
+	/* Doorbell initialized in user space*/
+	q_properties->doorbell_ptr = NULL;
+
+	q_properties->doorbell_off =
+			kfd_queue_id_to_doorbell(dev, pqm->process, qid);
+
+	/* let DQM handle it*/
+	q_properties->vmid = 0;
+	q_properties->queue_id = qid;
+
+	retval = init_queue(q, *q_properties);
+	if (retval != 0)
+		goto err_init_queue;
+
+	(*q)->device = dev;
+	(*q)->process = pqm->process;
+
+	pr_debug("kfd: PQM After init queue");
+
+	return retval;
+
+err_init_queue:
+	return retval;
+}
+
+int pqm_create_queue(struct process_queue_manager *pqm,
+			    struct kfd_dev *dev,
+			    struct file *f,
+			    struct queue_properties *properties,
+			    unsigned int flags,
+			    enum kfd_queue_type type,
+			    unsigned int *qid)
+{
+	int retval;
+	struct kfd_process_device *pdd;
+	struct queue_properties q_properties;
+	struct queue *q;
+	struct process_queue_node *pqn;
+	struct kernel_queue *kq;
+
+	BUG_ON(!pqm || !dev || !properties || !qid);
+
+	memset(&q_properties, 0, sizeof(struct queue_properties));
+	memcpy(&q_properties, properties, sizeof(struct queue_properties));
+	q = NULL;
+	kq = NULL;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -1;
+	}
+
+	retval = find_available_queue_slot(pqm, qid);
+	if (retval != 0)
+		return retval;
+
+	if (list_empty(&pqm->queues)) {
+		pdd->qpd.pqm = pqm;
+		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+	}
+
+	pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
+	if (!pqn) {
+		retval = -ENOMEM;
+		goto err_allocate_pqn;
+	}
+
+	switch (type) {
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_COMPUTE:
+		/* check if there is over subscription */
+		if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
+		((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
+		(dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) {
+			pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
+			retval = -EPERM;
+			goto err_create_queue;
+		}
+
+		retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid);
+		if (retval != 0)
+			goto err_create_queue;
+		pqn->q = q;
+		pqn->kq = NULL;
+		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
+						&q->properties.vmid);
+		pr_debug("DQM returned %d for create_queue\n", retval);
+		print_queue(q);
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
+		if (kq == NULL) {
+			retval = -ENOMEM;
+			goto err_create_queue;
+		}
+		kq->queue->properties.queue_id = *qid;
+		pqn->kq = kq;
+		pqn->q = NULL;
+		retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
+							kq, &pdd->qpd);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	if (retval != 0) {
+		pr_debug("Error dqm create queue\n");
+		goto err_create_queue;
+	}
+
+	pr_debug("kfd: PQM After DQM create queue\n");
+
+	list_add(&pqn->process_queue_list, &pqm->queues);
+
+	if (q) {
+		*properties = q->properties;
+		pr_debug("kfd: PQM done creating queue\n");
+		print_queue_properties(properties);
+	}
+
+	return retval;
+
+err_create_queue:
+	kfree(pqn);
+err_allocate_pqn:
+	/* check if queues list is empty unregister process from device */
+	clear_bit(*qid, pqm->queue_slot_bitmap);
+	if (list_empty(&pqm->queues))
+		dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
+	return retval;
+}
+
+int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
+{
+	struct process_queue_node *pqn;
+	struct kfd_process_device *pdd;
+	struct device_queue_manager *dqm;
+	struct kfd_dev *dev;
+	int retval;
+
+	dqm = NULL;
+
+	BUG_ON(!pqm);
+	retval = 0;
+
+	pr_debug("kfd: In Func %s\n", __func__);
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (pqn == NULL) {
+		pr_err("kfd: queue id does not match any known queue\n");
+		return -EINVAL;
+	}
+
+	dev = NULL;
+	if (pqn->kq)
+		dev = pqn->kq->dev;
+	if (pqn->q)
+		dev = pqn->q->device;
+	BUG_ON(!dev);
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -1;
+	}
+
+	if (pqn->kq) {
+		/* destroy kernel queue (DIQ) */
+		dqm = pqn->kq->dev->dqm;
+		dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
+		kernel_queue_uninit(pqn->kq);
+	}
+
+	if (pqn->q) {
+		dqm = pqn->q->device->dqm;
+		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
+		if (retval != 0)
+			return retval;
+
+		uninit_queue(pqn->q);
+	}
+
+	list_del(&pqn->process_queue_list);
+	kfree(pqn);
+	clear_bit(qid, pqm->queue_slot_bitmap);
+
+	if (list_empty(&pqm->queues))
+		dqm->ops.unregister_process(dqm, &pdd->qpd);
+
+	return retval;
+}
+
+int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
+			struct queue_properties *p)
+{
+	int retval;
+	struct process_queue_node *pqn;
+
+	BUG_ON(!pqm);
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("amdkfd: No queue %d exists for update operation\n",
+				qid);
+		return -EFAULT;
+	}
+
+	pqn->q->properties.queue_address = p->queue_address;
+	pqn->q->properties.queue_size = p->queue_size;
+	pqn->q->properties.queue_percent = p->queue_percent;
+	pqn->q->properties.priority = p->priority;
+
+	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+							pqn->q);
+	if (retval != 0)
+		return retval;
+
+	return 0;
+}
+
+static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue(
+					struct process_queue_manager *pqm,
+					unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	BUG_ON(!pqm);
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (pqn && pqn->kq)
+		return pqn->kq;
+
+	return NULL;
+}
+
+
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
new file mode 100644
index 000000000..9a0c90b07
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "kfd_priv.h"
+
+void print_queue_properties(struct queue_properties *q)
+{
+	if (!q)
+		return;
+
+	pr_debug("Printing queue properties:\n");
+	pr_debug("Queue Type: %u\n", q->type);
+	pr_debug("Queue Size: %llu\n", q->queue_size);
+	pr_debug("Queue percent: %u\n", q->queue_percent);
+	pr_debug("Queue Address: 0x%llX\n", q->queue_address);
+	pr_debug("Queue Id: %u\n", q->queue_id);
+	pr_debug("Queue Process Vmid: %u\n", q->vmid);
+	pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr);
+	pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr);
+	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
+	pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
+}
+
+void print_queue(struct queue *q)
+{
+	if (!q)
+		return;
+	pr_debug("Printing queue:\n");
+	pr_debug("Queue Type: %u\n", q->properties.type);
+	pr_debug("Queue Size: %llu\n", q->properties.queue_size);
+	pr_debug("Queue percent: %u\n", q->properties.queue_percent);
+	pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
+	pr_debug("Queue Id: %u\n", q->properties.queue_id);
+	pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
+	pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr);
+	pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr);
+	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
+	pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
+	pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
+	pr_debug("Queue MQD Gart: 0x%llX\n", q->gart_mqd_addr);
+	pr_debug("Queue Process Address: 0x%p\n", q->process);
+	pr_debug("Queue Device Address: 0x%p\n", q->device);
+}
+
+int init_queue(struct queue **q, struct queue_properties properties)
+{
+	struct queue *tmp;
+
+	BUG_ON(!q);
+
+	tmp = kzalloc(sizeof(struct queue), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	memcpy(&tmp->properties, &properties, sizeof(struct queue_properties));
+
+	*q = tmp;
+	return 0;
+}
+
+void uninit_queue(struct queue *q)
+{
+	kfree(q);
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
new file mode 100644
index 000000000..c25728bc3
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -0,0 +1,1254 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/hash.h>
+#include <linux/cpufreq.h>
+#include <linux/log2.h>
+
+#include "kfd_priv.h"
+#include "kfd_crat.h"
+#include "kfd_topology.h"
+
+static struct list_head topology_device_list;
+static int topology_crat_parsed;
+static struct kfd_system_properties sys_props;
+
+static DECLARE_RWSEM(topology_lock);
+
+struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
+{
+	struct kfd_topology_device *top_dev;
+	struct kfd_dev *device = NULL;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu_id == gpu_id) {
+			device = top_dev->gpu;
+			break;
+		}
+
+	up_read(&topology_lock);
+
+	return device;
+}
+
+struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
+{
+	struct kfd_topology_device *top_dev;
+	struct kfd_dev *device = NULL;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu->pdev == pdev) {
+			device = top_dev->gpu;
+			break;
+		}
+
+	up_read(&topology_lock);
+
+	return device;
+}
+
+static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
+{
+	struct acpi_table_header *crat_table;
+	acpi_status status;
+
+	if (!size)
+		return -EINVAL;
+
+	/*
+	 * Fetch the CRAT table from ACPI
+	 */
+	status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("CRAT table not found\n");
+		return -ENODATA;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+
+		pr_err("CRAT table error: %s\n", err);
+		return -EINVAL;
+	}
+
+	if (*size >= crat_table->length && crat_image != NULL)
+		memcpy(crat_image, crat_table, crat_table->length);
+
+	*size = crat_table->length;
+
+	return 0;
+}
+
+static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
+		struct crat_subtype_computeunit *cu)
+{
+	BUG_ON(!dev);
+	BUG_ON(!cu);
+
+	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
+	dev->node_props.cpu_core_id_base = cu->processor_id_low;
+	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
+		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+
+	pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
+			cu->processor_id_low);
+}
+
+static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
+		struct crat_subtype_computeunit *cu)
+{
+	BUG_ON(!dev);
+	BUG_ON(!cu);
+
+	dev->node_props.simd_id_base = cu->processor_id_low;
+	dev->node_props.simd_count = cu->num_simd_cores;
+	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
+	dev->node_props.max_waves_per_simd = cu->max_waves_simd;
+	dev->node_props.wave_front_size = cu->wave_front_size;
+	dev->node_props.mem_banks_count = cu->num_banks;
+	dev->node_props.array_count = cu->num_arrays;
+	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
+	dev->node_props.simd_per_cu = cu->num_simd_per_cu;
+	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
+	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
+		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
+	pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores,
+				cu->processor_id_low);
+}
+
+/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */
+static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu)
+{
+	struct kfd_topology_device *dev;
+	int i = 0;
+
+	BUG_ON(!cu);
+
+	pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
+			cu->proximity_domain, cu->hsa_capability);
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (cu->proximity_domain == i) {
+			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
+				kfd_populated_cu_info_cpu(dev, cu);
+
+			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
+				kfd_populated_cu_info_gpu(dev, cu);
+			break;
+		}
+		i++;
+	}
+
+	return 0;
+}
+
+/*
+ * kfd_parse_subtype_mem is called when the topology mutex is
+ * already acquired
+ */
+static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem)
+{
+	struct kfd_mem_properties *props;
+	struct kfd_topology_device *dev;
+	int i = 0;
+
+	BUG_ON(!mem);
+
+	pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
+			mem->promixity_domain);
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (mem->promixity_domain == i) {
+			props = kfd_alloc_struct(props);
+			if (props == NULL)
+				return -ENOMEM;
+
+			if (dev->node_props.cpu_cores_count == 0)
+				props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE;
+			else
+				props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
+
+			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
+				props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
+			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
+				props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
+
+			props->size_in_bytes =
+				((uint64_t)mem->length_high << 32) +
+							mem->length_low;
+			props->width = mem->width;
+
+			dev->mem_bank_count++;
+			list_add_tail(&props->list, &dev->mem_props);
+
+			break;
+		}
+		i++;
+	}
+
+	return 0;
+}
+
+/*
+ * kfd_parse_subtype_cache is called when the topology mutex
+ * is already acquired
+ */
+static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache)
+{
+	struct kfd_cache_properties *props;
+	struct kfd_topology_device *dev;
+	uint32_t id;
+
+	BUG_ON(!cache);
+
+	id = cache->processor_id_low;
+
+	pr_info("Found cache entry in CRAT table with processor_id=%d\n", id);
+	list_for_each_entry(dev, &topology_device_list, list)
+		if (id == dev->node_props.cpu_core_id_base ||
+		    id == dev->node_props.simd_id_base) {
+			props = kfd_alloc_struct(props);
+			if (props == NULL)
+				return -ENOMEM;
+
+			props->processor_id_low = id;
+			props->cache_level = cache->cache_level;
+			props->cache_size = cache->cache_size;
+			props->cacheline_size = cache->cache_line_size;
+			props->cachelines_per_tag = cache->lines_per_tag;
+			props->cache_assoc = cache->associativity;
+			props->cache_latency = cache->cache_latency;
+
+			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_DATA;
+			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_CPU;
+			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+			dev->cache_count++;
+			dev->node_props.caches_count++;
+			list_add_tail(&props->list, &dev->cache_props);
+
+			break;
+		}
+
+	return 0;
+}
+
+/*
+ * kfd_parse_subtype_iolink is called when the topology mutex
+ * is already acquired
+ */
+static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink)
+{
+	struct kfd_iolink_properties *props;
+	struct kfd_topology_device *dev;
+	uint32_t i = 0;
+	uint32_t id_from;
+	uint32_t id_to;
+
+	BUG_ON(!iolink);
+
+	id_from = iolink->proximity_domain_from;
+	id_to = iolink->proximity_domain_to;
+
+	pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from);
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (id_from == i) {
+			props = kfd_alloc_struct(props);
+			if (props == NULL)
+				return -ENOMEM;
+
+			props->node_from = id_from;
+			props->node_to = id_to;
+			props->ver_maj = iolink->version_major;
+			props->ver_min = iolink->version_minor;
+
+			/*
+			 * weight factor (derived from CDIR), currently always 1
+			 */
+			props->weight = 1;
+
+			props->min_latency = iolink->minimum_latency;
+			props->max_latency = iolink->maximum_latency;
+			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
+			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
+			props->rec_transfer_size =
+					iolink->recommended_transfer_size;
+
+			dev->io_link_count++;
+			dev->node_props.io_links_count++;
+			list_add_tail(&props->list, &dev->io_link_props);
+
+			break;
+		}
+		i++;
+	}
+
+	return 0;
+}
+
+static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr)
+{
+	struct crat_subtype_computeunit *cu;
+	struct crat_subtype_memory *mem;
+	struct crat_subtype_cache *cache;
+	struct crat_subtype_iolink *iolink;
+	int ret = 0;
+
+	BUG_ON(!sub_type_hdr);
+
+	switch (sub_type_hdr->type) {
+	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
+		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+		ret = kfd_parse_subtype_cu(cu);
+		break;
+	case CRAT_SUBTYPE_MEMORY_AFFINITY:
+		mem = (struct crat_subtype_memory *)sub_type_hdr;
+		ret = kfd_parse_subtype_mem(mem);
+		break;
+	case CRAT_SUBTYPE_CACHE_AFFINITY:
+		cache = (struct crat_subtype_cache *)sub_type_hdr;
+		ret = kfd_parse_subtype_cache(cache);
+		break;
+	case CRAT_SUBTYPE_TLB_AFFINITY:
+		/*
+		 * For now, nothing to do here
+		 */
+		pr_info("Found TLB entry in CRAT table (not processing)\n");
+		break;
+	case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
+		/*
+		 * For now, nothing to do here
+		 */
+		pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n");
+		break;
+	case CRAT_SUBTYPE_IOLINK_AFFINITY:
+		iolink = (struct crat_subtype_iolink *)sub_type_hdr;
+		ret = kfd_parse_subtype_iolink(iolink);
+		break;
+	default:
+		pr_warn("Unknown subtype (%d) in CRAT\n",
+				sub_type_hdr->type);
+	}
+
+	return ret;
+}
+
+static void kfd_release_topology_device(struct kfd_topology_device *dev)
+{
+	struct kfd_mem_properties *mem;
+	struct kfd_cache_properties *cache;
+	struct kfd_iolink_properties *iolink;
+
+	BUG_ON(!dev);
+
+	list_del(&dev->list);
+
+	while (dev->mem_props.next != &dev->mem_props) {
+		mem = container_of(dev->mem_props.next,
+				struct kfd_mem_properties, list);
+		list_del(&mem->list);
+		kfree(mem);
+	}
+
+	while (dev->cache_props.next != &dev->cache_props) {
+		cache = container_of(dev->cache_props.next,
+				struct kfd_cache_properties, list);
+		list_del(&cache->list);
+		kfree(cache);
+	}
+
+	while (dev->io_link_props.next != &dev->io_link_props) {
+		iolink = container_of(dev->io_link_props.next,
+				struct kfd_iolink_properties, list);
+		list_del(&iolink->list);
+		kfree(iolink);
+	}
+
+	kfree(dev);
+
+	sys_props.num_devices--;
+}
+
+static void kfd_release_live_view(void)
+{
+	struct kfd_topology_device *dev;
+
+	while (topology_device_list.next != &topology_device_list) {
+		dev = container_of(topology_device_list.next,
+				 struct kfd_topology_device, list);
+		kfd_release_topology_device(dev);
+}
+
+	memset(&sys_props, 0, sizeof(sys_props));
+}
+
+static struct kfd_topology_device *kfd_create_topology_device(void)
+{
+	struct kfd_topology_device *dev;
+
+	dev = kfd_alloc_struct(dev);
+	if (dev == NULL) {
+		pr_err("No memory to allocate a topology device");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&dev->mem_props);
+	INIT_LIST_HEAD(&dev->cache_props);
+	INIT_LIST_HEAD(&dev->io_link_props);
+
+	list_add_tail(&dev->list, &topology_device_list);
+	sys_props.num_devices++;
+
+	return dev;
+}
+
+static int kfd_parse_crat_table(void *crat_image)
+{
+	struct kfd_topology_device *top_dev;
+	struct crat_subtype_generic *sub_type_hdr;
+	uint16_t node_id;
+	int ret;
+	struct crat_header *crat_table = (struct crat_header *)crat_image;
+	uint16_t num_nodes;
+	uint32_t image_len;
+
+	if (!crat_image)
+		return -EINVAL;
+
+	num_nodes = crat_table->num_domains;
+	image_len = crat_table->length;
+
+	pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
+
+	for (node_id = 0; node_id < num_nodes; node_id++) {
+		top_dev = kfd_create_topology_device();
+		if (!top_dev) {
+			kfd_release_live_view();
+			return -ENOMEM;
+		}
+	}
+
+	sys_props.platform_id =
+		(*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK;
+	sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id);
+	sys_props.platform_rev = crat_table->revision;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+	while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
+			((char *)crat_image) + image_len) {
+		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
+			ret = kfd_parse_subtype(sub_type_hdr);
+			if (ret != 0) {
+				kfd_release_live_view();
+				return ret;
+			}
+		}
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+				sub_type_hdr->length);
+	}
+
+	sys_props.generation_count++;
+	topology_crat_parsed = 1;
+
+	return 0;
+}
+
+
+#define sysfs_show_gen_prop(buffer, fmt, ...) \
+		snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
+#define sysfs_show_32bit_prop(buffer, name, value) \
+		sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
+#define sysfs_show_64bit_prop(buffer, name, value) \
+		sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
+#define sysfs_show_32bit_val(buffer, value) \
+		sysfs_show_gen_prop(buffer, "%u\n", value)
+#define sysfs_show_str_val(buffer, value) \
+		sysfs_show_gen_prop(buffer, "%s\n", value)
+
+static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	ssize_t ret;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	if (attr == &sys_props.attr_genid) {
+		ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
+	} else if (attr == &sys_props.attr_props) {
+		sysfs_show_64bit_prop(buffer, "platform_oem",
+				sys_props.platform_oem);
+		sysfs_show_64bit_prop(buffer, "platform_id",
+				sys_props.platform_id);
+		ret = sysfs_show_64bit_prop(buffer, "platform_rev",
+				sys_props.platform_rev);
+	} else {
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct sysfs_ops sysprops_ops = {
+	.show = sysprops_show,
+};
+
+static struct kobj_type sysprops_type = {
+	.sysfs_ops = &sysprops_ops,
+};
+
+static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	ssize_t ret;
+	struct kfd_iolink_properties *iolink;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	iolink = container_of(attr, struct kfd_iolink_properties, attr);
+	sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
+	sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
+	sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
+	sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
+	sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
+	sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
+	sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
+	sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
+	sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
+	sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
+	sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
+			iolink->rec_transfer_size);
+	ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
+
+	return ret;
+}
+
+static const struct sysfs_ops iolink_ops = {
+	.show = iolink_show,
+};
+
+static struct kobj_type iolink_type = {
+	.sysfs_ops = &iolink_ops,
+};
+
+static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	ssize_t ret;
+	struct kfd_mem_properties *mem;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	mem = container_of(attr, struct kfd_mem_properties, attr);
+	sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
+	sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
+	sysfs_show_32bit_prop(buffer, "flags", mem->flags);
+	sysfs_show_32bit_prop(buffer, "width", mem->width);
+	ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
+
+	return ret;
+}
+
+static const struct sysfs_ops mem_ops = {
+	.show = mem_show,
+};
+
+static struct kobj_type mem_type = {
+	.sysfs_ops = &mem_ops,
+};
+
+static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	ssize_t ret;
+	uint32_t i;
+	struct kfd_cache_properties *cache;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	cache = container_of(attr, struct kfd_cache_properties, attr);
+	sysfs_show_32bit_prop(buffer, "processor_id_low",
+			cache->processor_id_low);
+	sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
+	sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
+	sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
+	sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
+			cache->cachelines_per_tag);
+	sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
+	sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
+	sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
+	snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
+	for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++)
+		ret = snprintf(buffer, PAGE_SIZE, "%s%d%s",
+				buffer, cache->sibling_map[i],
+				(i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ?
+						"\n" : ",");
+
+	return ret;
+}
+
+static const struct sysfs_ops cache_ops = {
+	.show = kfd_cache_show,
+};
+
+static struct kobj_type cache_type = {
+	.sysfs_ops = &cache_ops,
+};
+
+static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	struct kfd_topology_device *dev;
+	char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
+	uint32_t i;
+	uint32_t log_max_watch_addr;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	if (strcmp(attr->name, "gpu_id") == 0) {
+		dev = container_of(attr, struct kfd_topology_device,
+				attr_gpuid);
+		return sysfs_show_32bit_val(buffer, dev->gpu_id);
+	}
+
+	if (strcmp(attr->name, "name") == 0) {
+		dev = container_of(attr, struct kfd_topology_device,
+				attr_name);
+		for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
+			public_name[i] =
+					(char)dev->node_props.marketing_name[i];
+			if (dev->node_props.marketing_name[i] == 0)
+				break;
+		}
+		public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
+		return sysfs_show_str_val(buffer, public_name);
+	}
+
+	dev = container_of(attr, struct kfd_topology_device,
+			attr_props);
+	sysfs_show_32bit_prop(buffer, "cpu_cores_count",
+			dev->node_props.cpu_cores_count);
+	sysfs_show_32bit_prop(buffer, "simd_count",
+			dev->node_props.simd_count);
+
+	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
+		pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
+				dev->node_props.mem_banks_count,
+				dev->mem_bank_count);
+		sysfs_show_32bit_prop(buffer, "mem_banks_count",
+				dev->mem_bank_count);
+	} else {
+		sysfs_show_32bit_prop(buffer, "mem_banks_count",
+				dev->node_props.mem_banks_count);
+	}
+
+	sysfs_show_32bit_prop(buffer, "caches_count",
+			dev->node_props.caches_count);
+	sysfs_show_32bit_prop(buffer, "io_links_count",
+			dev->node_props.io_links_count);
+	sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
+			dev->node_props.cpu_core_id_base);
+	sysfs_show_32bit_prop(buffer, "simd_id_base",
+			dev->node_props.simd_id_base);
+	sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
+			dev->node_props.max_waves_per_simd);
+	sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
+			dev->node_props.lds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
+			dev->node_props.gds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, "wave_front_size",
+			dev->node_props.wave_front_size);
+	sysfs_show_32bit_prop(buffer, "array_count",
+			dev->node_props.array_count);
+	sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
+			dev->node_props.simd_arrays_per_engine);
+	sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
+			dev->node_props.cu_per_simd_array);
+	sysfs_show_32bit_prop(buffer, "simd_per_cu",
+			dev->node_props.simd_per_cu);
+	sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
+			dev->node_props.max_slots_scratch_cu);
+	sysfs_show_32bit_prop(buffer, "vendor_id",
+			dev->node_props.vendor_id);
+	sysfs_show_32bit_prop(buffer, "device_id",
+			dev->node_props.device_id);
+	sysfs_show_32bit_prop(buffer, "location_id",
+			dev->node_props.location_id);
+
+	if (dev->gpu) {
+		log_max_watch_addr =
+			__ilog2_u32(dev->gpu->device_info->num_of_watch_points);
+
+		if (log_max_watch_addr) {
+			dev->node_props.capability |=
+					HSA_CAP_WATCH_POINTS_SUPPORTED;
+
+			dev->node_props.capability |=
+				((log_max_watch_addr <<
+					HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
+				HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
+		}
+
+		sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
+			dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(
+					dev->gpu->kgd));
+
+		sysfs_show_64bit_prop(buffer, "local_mem_size",
+				(unsigned long long int) 0);
+
+		sysfs_show_32bit_prop(buffer, "fw_version",
+			dev->gpu->kfd2kgd->get_fw_version(
+						dev->gpu->kgd,
+						KGD_ENGINE_MEC1));
+		sysfs_show_32bit_prop(buffer, "capability",
+				dev->node_props.capability);
+	}
+
+	return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
+					cpufreq_quick_get_max(0)/1000);
+}
+
+static const struct sysfs_ops node_ops = {
+	.show = node_show,
+};
+
+static struct kobj_type node_type = {
+	.sysfs_ops = &node_ops,
+};
+
+static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
+{
+	sysfs_remove_file(kobj, attr);
+	kobject_del(kobj);
+	kobject_put(kobj);
+}
+
+static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
+{
+	struct kfd_iolink_properties *iolink;
+	struct kfd_cache_properties *cache;
+	struct kfd_mem_properties *mem;
+
+	BUG_ON(!dev);
+
+	if (dev->kobj_iolink) {
+		list_for_each_entry(iolink, &dev->io_link_props, list)
+			if (iolink->kobj) {
+				kfd_remove_sysfs_file(iolink->kobj,
+							&iolink->attr);
+				iolink->kobj = NULL;
+			}
+		kobject_del(dev->kobj_iolink);
+		kobject_put(dev->kobj_iolink);
+		dev->kobj_iolink = NULL;
+	}
+
+	if (dev->kobj_cache) {
+		list_for_each_entry(cache, &dev->cache_props, list)
+			if (cache->kobj) {
+				kfd_remove_sysfs_file(cache->kobj,
+							&cache->attr);
+				cache->kobj = NULL;
+			}
+		kobject_del(dev->kobj_cache);
+		kobject_put(dev->kobj_cache);
+		dev->kobj_cache = NULL;
+	}
+
+	if (dev->kobj_mem) {
+		list_for_each_entry(mem, &dev->mem_props, list)
+			if (mem->kobj) {
+				kfd_remove_sysfs_file(mem->kobj, &mem->attr);
+				mem->kobj = NULL;
+			}
+		kobject_del(dev->kobj_mem);
+		kobject_put(dev->kobj_mem);
+		dev->kobj_mem = NULL;
+	}
+
+	if (dev->kobj_node) {
+		sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
+		sysfs_remove_file(dev->kobj_node, &dev->attr_name);
+		sysfs_remove_file(dev->kobj_node, &dev->attr_props);
+		kobject_del(dev->kobj_node);
+		kobject_put(dev->kobj_node);
+		dev->kobj_node = NULL;
+	}
+}
+
+static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
+		uint32_t id)
+{
+	struct kfd_iolink_properties *iolink;
+	struct kfd_cache_properties *cache;
+	struct kfd_mem_properties *mem;
+	int ret;
+	uint32_t i;
+
+	BUG_ON(!dev);
+
+	/*
+	 * Creating the sysfs folders
+	 */
+	BUG_ON(dev->kobj_node);
+	dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
+	if (!dev->kobj_node)
+		return -ENOMEM;
+
+	ret = kobject_init_and_add(dev->kobj_node, &node_type,
+			sys_props.kobj_nodes, "%d", id);
+	if (ret < 0)
+		return ret;
+
+	dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
+	if (!dev->kobj_mem)
+		return -ENOMEM;
+
+	dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
+	if (!dev->kobj_cache)
+		return -ENOMEM;
+
+	dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
+	if (!dev->kobj_iolink)
+		return -ENOMEM;
+
+	/*
+	 * Creating sysfs files for node properties
+	 */
+	dev->attr_gpuid.name = "gpu_id";
+	dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&dev->attr_gpuid);
+	dev->attr_name.name = "name";
+	dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&dev->attr_name);
+	dev->attr_props.name = "properties";
+	dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&dev->attr_props);
+	ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
+	if (ret < 0)
+		return ret;
+	ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
+	if (ret < 0)
+		return ret;
+	ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
+	if (ret < 0)
+		return ret;
+
+	i = 0;
+	list_for_each_entry(mem, &dev->mem_props, list) {
+		mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!mem->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(mem->kobj, &mem_type,
+				dev->kobj_mem, "%d", i);
+		if (ret < 0)
+			return ret;
+
+		mem->attr.name = "properties";
+		mem->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&mem->attr);
+		ret = sysfs_create_file(mem->kobj, &mem->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	i = 0;
+	list_for_each_entry(cache, &dev->cache_props, list) {
+		cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!cache->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(cache->kobj, &cache_type,
+				dev->kobj_cache, "%d", i);
+		if (ret < 0)
+			return ret;
+
+		cache->attr.name = "properties";
+		cache->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&cache->attr);
+		ret = sysfs_create_file(cache->kobj, &cache->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	i = 0;
+	list_for_each_entry(iolink, &dev->io_link_props, list) {
+		iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!iolink->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(iolink->kobj, &iolink_type,
+				dev->kobj_iolink, "%d", i);
+		if (ret < 0)
+			return ret;
+
+		iolink->attr.name = "properties";
+		iolink->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&iolink->attr);
+		ret = sysfs_create_file(iolink->kobj, &iolink->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+}
+
+	return 0;
+}
+
+static int kfd_build_sysfs_node_tree(void)
+{
+	struct kfd_topology_device *dev;
+	int ret;
+	uint32_t i = 0;
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		ret = kfd_build_sysfs_node_entry(dev, i);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	return 0;
+}
+
+static void kfd_remove_sysfs_node_tree(void)
+{
+	struct kfd_topology_device *dev;
+
+	list_for_each_entry(dev, &topology_device_list, list)
+		kfd_remove_sysfs_node_entry(dev);
+}
+
+static int kfd_topology_update_sysfs(void)
+{
+	int ret;
+
+	pr_info("Creating topology SYSFS entries\n");
+	if (sys_props.kobj_topology == NULL) {
+		sys_props.kobj_topology =
+				kfd_alloc_struct(sys_props.kobj_topology);
+		if (!sys_props.kobj_topology)
+			return -ENOMEM;
+
+		ret = kobject_init_and_add(sys_props.kobj_topology,
+				&sysprops_type,  &kfd_device->kobj,
+				"topology");
+		if (ret < 0)
+			return ret;
+
+		sys_props.kobj_nodes = kobject_create_and_add("nodes",
+				sys_props.kobj_topology);
+		if (!sys_props.kobj_nodes)
+			return -ENOMEM;
+
+		sys_props.attr_genid.name = "generation_id";
+		sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&sys_props.attr_genid);
+		ret = sysfs_create_file(sys_props.kobj_topology,
+				&sys_props.attr_genid);
+		if (ret < 0)
+			return ret;
+
+		sys_props.attr_props.name = "system_properties";
+		sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&sys_props.attr_props);
+		ret = sysfs_create_file(sys_props.kobj_topology,
+				&sys_props.attr_props);
+		if (ret < 0)
+			return ret;
+	}
+
+	kfd_remove_sysfs_node_tree();
+
+	return kfd_build_sysfs_node_tree();
+}
+
+static void kfd_topology_release_sysfs(void)
+{
+	kfd_remove_sysfs_node_tree();
+	if (sys_props.kobj_topology) {
+		sysfs_remove_file(sys_props.kobj_topology,
+				&sys_props.attr_genid);
+		sysfs_remove_file(sys_props.kobj_topology,
+				&sys_props.attr_props);
+		if (sys_props.kobj_nodes) {
+			kobject_del(sys_props.kobj_nodes);
+			kobject_put(sys_props.kobj_nodes);
+			sys_props.kobj_nodes = NULL;
+		}
+		kobject_del(sys_props.kobj_topology);
+		kobject_put(sys_props.kobj_topology);
+		sys_props.kobj_topology = NULL;
+	}
+}
+
+int kfd_topology_init(void)
+{
+	void *crat_image = NULL;
+	size_t image_size = 0;
+	int ret;
+
+	/*
+	 * Initialize the head for the topology device list
+	 */
+	INIT_LIST_HEAD(&topology_device_list);
+	init_rwsem(&topology_lock);
+	topology_crat_parsed = 0;
+
+	memset(&sys_props, 0, sizeof(sys_props));
+
+	/*
+	 * Get the CRAT image from the ACPI
+	 */
+	ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
+	if (ret == 0 && image_size > 0) {
+		pr_info("Found CRAT image with size=%zd\n", image_size);
+		crat_image = kmalloc(image_size, GFP_KERNEL);
+		if (!crat_image) {
+			ret = -ENOMEM;
+			pr_err("No memory for allocating CRAT image\n");
+			goto err;
+		}
+		ret = kfd_topology_get_crat_acpi(crat_image, &image_size);
+
+		if (ret == 0) {
+			down_write(&topology_lock);
+			ret = kfd_parse_crat_table(crat_image);
+			if (ret == 0)
+				ret = kfd_topology_update_sysfs();
+			up_write(&topology_lock);
+		} else {
+			pr_err("Couldn't get CRAT table size from ACPI\n");
+		}
+		kfree(crat_image);
+	} else if (ret == -ENODATA) {
+		ret = 0;
+	} else {
+		pr_err("Couldn't get CRAT table size from ACPI\n");
+	}
+
+err:
+	pr_info("Finished initializing topology ret=%d\n", ret);
+	return ret;
+}
+
+void kfd_topology_shutdown(void)
+{
+	kfd_topology_release_sysfs();
+	kfd_release_live_view();
+}
+
+static void kfd_debug_print_topology(void)
+{
+	struct kfd_topology_device *dev;
+	uint32_t i = 0;
+
+	pr_info("DEBUG PRINT OF TOPOLOGY:");
+	list_for_each_entry(dev, &topology_device_list, list) {
+		pr_info("Node: %d\n", i);
+		pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no"));
+		pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count);
+		pr_info("\tSIMD count: %d", dev->node_props.simd_count);
+		i++;
+	}
+}
+
+static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
+{
+	uint32_t hashout;
+	uint32_t buf[7];
+	int i;
+
+	if (!gpu)
+		return 0;
+
+	buf[0] = gpu->pdev->devfn;
+	buf[1] = gpu->pdev->subsystem_vendor;
+	buf[2] = gpu->pdev->subsystem_device;
+	buf[3] = gpu->pdev->device;
+	buf[4] = gpu->pdev->bus->number;
+	buf[5] = (uint32_t)(gpu->kfd2kgd->get_vmem_size(gpu->kgd)
+			& 0xffffffff);
+	buf[6] = (uint32_t)(gpu->kfd2kgd->get_vmem_size(gpu->kgd) >> 32);
+
+	for (i = 0, hashout = 0; i < 7; i++)
+		hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
+
+	return hashout;
+}
+
+static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
+{
+	struct kfd_topology_device *dev;
+	struct kfd_topology_device *out_dev = NULL;
+
+	BUG_ON(!gpu);
+
+	list_for_each_entry(dev, &topology_device_list, list)
+		if (dev->gpu == NULL && dev->node_props.simd_count > 0) {
+			dev->gpu = gpu;
+			out_dev = dev;
+			break;
+		}
+
+	return out_dev;
+}
+
+static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
+{
+	/*
+	 * TODO: Generate an event for thunk about the arrival/removal
+	 * of the GPU
+	 */
+}
+
+int kfd_topology_add_device(struct kfd_dev *gpu)
+{
+	uint32_t gpu_id;
+	struct kfd_topology_device *dev;
+	int res;
+
+	BUG_ON(!gpu);
+
+	gpu_id = kfd_generate_gpu_id(gpu);
+
+	pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+
+	down_write(&topology_lock);
+	/*
+	 * Try to assign the GPU to existing topology device (generated from
+	 * CRAT table
+	 */
+	dev = kfd_assign_gpu(gpu);
+	if (!dev) {
+		pr_info("GPU was not found in the current topology. Extending.\n");
+		kfd_debug_print_topology();
+		dev = kfd_create_topology_device();
+		if (!dev) {
+			res = -ENOMEM;
+			goto err;
+		}
+		dev->gpu = gpu;
+
+		/*
+		 * TODO: Make a call to retrieve topology information from the
+		 * GPU vBIOS
+		 */
+
+		/*
+		 * Update the SYSFS tree, since we added another topology device
+		 */
+		if (kfd_topology_update_sysfs() < 0)
+			kfd_topology_release_sysfs();
+
+	}
+
+	dev->gpu_id = gpu_id;
+	gpu->id = gpu_id;
+	dev->node_props.vendor_id = gpu->pdev->vendor;
+	dev->node_props.device_id = gpu->pdev->device;
+	dev->node_props.location_id = (gpu->pdev->bus->number << 24) +
+			(gpu->pdev->devfn & 0xffffff);
+	/*
+	 * TODO: Retrieve max engine clock values from KGD
+	 */
+
+	res = 0;
+
+err:
+	up_write(&topology_lock);
+
+	if (res == 0)
+		kfd_notify_gpu_change(gpu_id, 1);
+
+	return res;
+}
+
+int kfd_topology_remove_device(struct kfd_dev *gpu)
+{
+	struct kfd_topology_device *dev;
+	uint32_t gpu_id;
+	int res = -ENODEV;
+
+	BUG_ON(!gpu);
+
+	down_write(&topology_lock);
+
+	list_for_each_entry(dev, &topology_device_list, list)
+		if (dev->gpu == gpu) {
+			gpu_id = dev->gpu_id;
+			kfd_remove_sysfs_node_entry(dev);
+			kfd_release_topology_device(dev);
+			res = 0;
+			if (kfd_topology_update_sysfs() < 0)
+				kfd_topology_release_sysfs();
+			break;
+		}
+
+	up_write(&topology_lock);
+
+	if (res == 0)
+		kfd_notify_gpu_change(gpu_id, 0);
+
+	return res;
+}
+
+/*
+ * When idx is out of bounds, the function will return NULL
+ */
+struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
+{
+
+	struct kfd_topology_device *top_dev;
+	struct kfd_dev *device = NULL;
+	uint8_t device_idx = 0;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list) {
+		if (device_idx == idx) {
+			device = top_dev->gpu;
+			break;
+		}
+
+		device_idx++;
+	}
+
+	up_read(&topology_lock);
+
+	return device;
+
+}
diff --git a/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
new file mode 100644
index 000000000..989624b3c
--- /dev/null
+++ b/kernel/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __KFD_TOPOLOGY_H__
+#define __KFD_TOPOLOGY_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include "kfd_priv.h"
+
+#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128
+
+#define HSA_CAP_HOT_PLUGGABLE			0x00000001
+#define HSA_CAP_ATS_PRESENT			0x00000002
+#define HSA_CAP_SHARED_WITH_GRAPHICS		0x00000004
+#define HSA_CAP_QUEUE_SIZE_POW2			0x00000008
+#define HSA_CAP_QUEUE_SIZE_32BIT		0x00000010
+#define HSA_CAP_QUEUE_IDLE_EVENT		0x00000020
+#define HSA_CAP_VA_LIMIT			0x00000040
+#define HSA_CAP_WATCH_POINTS_SUPPORTED		0x00000080
+#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK	0x00000f00
+#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT	8
+#define HSA_CAP_RESERVED			0xfffff000
+
+struct kfd_node_properties {
+	uint32_t cpu_cores_count;
+	uint32_t simd_count;
+	uint32_t mem_banks_count;
+	uint32_t caches_count;
+	uint32_t io_links_count;
+	uint32_t cpu_core_id_base;
+	uint32_t simd_id_base;
+	uint32_t capability;
+	uint32_t max_waves_per_simd;
+	uint32_t lds_size_in_kb;
+	uint32_t gds_size_in_kb;
+	uint32_t wave_front_size;
+	uint32_t array_count;
+	uint32_t simd_arrays_per_engine;
+	uint32_t cu_per_simd_array;
+	uint32_t simd_per_cu;
+	uint32_t max_slots_scratch_cu;
+	uint32_t engine_id;
+	uint32_t vendor_id;
+	uint32_t device_id;
+	uint32_t location_id;
+	uint32_t max_engine_clk_fcompute;
+	uint32_t max_engine_clk_ccompute;
+	uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
+};
+
+#define HSA_MEM_HEAP_TYPE_SYSTEM	0
+#define HSA_MEM_HEAP_TYPE_FB_PUBLIC	1
+#define HSA_MEM_HEAP_TYPE_FB_PRIVATE	2
+#define HSA_MEM_HEAP_TYPE_GPU_GDS	3
+#define HSA_MEM_HEAP_TYPE_GPU_LDS	4
+#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH	5
+
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE	0x00000001
+#define HSA_MEM_FLAGS_NON_VOLATILE	0x00000002
+#define HSA_MEM_FLAGS_RESERVED		0xfffffffc
+
+struct kfd_mem_properties {
+	struct list_head	list;
+	uint32_t		heap_type;
+	uint64_t		size_in_bytes;
+	uint32_t		flags;
+	uint32_t		width;
+	uint32_t		mem_clk_max;
+	struct kobject		*kobj;
+	struct attribute	attr;
+};
+
+#define KFD_TOPOLOGY_CPU_SIBLINGS 256
+
+#define HSA_CACHE_TYPE_DATA		0x00000001
+#define HSA_CACHE_TYPE_INSTRUCTION	0x00000002
+#define HSA_CACHE_TYPE_CPU		0x00000004
+#define HSA_CACHE_TYPE_HSACU		0x00000008
+#define HSA_CACHE_TYPE_RESERVED		0xfffffff0
+
+struct kfd_cache_properties {
+	struct list_head	list;
+	uint32_t		processor_id_low;
+	uint32_t		cache_level;
+	uint32_t		cache_size;
+	uint32_t		cacheline_size;
+	uint32_t		cachelines_per_tag;
+	uint32_t		cache_assoc;
+	uint32_t		cache_latency;
+	uint32_t		cache_type;
+	uint8_t			sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS];
+	struct kobject		*kobj;
+	struct attribute	attr;
+};
+
+struct kfd_iolink_properties {
+	struct list_head	list;
+	uint32_t		iolink_type;
+	uint32_t		ver_maj;
+	uint32_t		ver_min;
+	uint32_t		node_from;
+	uint32_t		node_to;
+	uint32_t		weight;
+	uint32_t		min_latency;
+	uint32_t		max_latency;
+	uint32_t		min_bandwidth;
+	uint32_t		max_bandwidth;
+	uint32_t		rec_transfer_size;
+	uint32_t		flags;
+	struct kobject		*kobj;
+	struct attribute	attr;
+};
+
+struct kfd_topology_device {
+	struct list_head		list;
+	uint32_t			gpu_id;
+	struct kfd_node_properties	node_props;
+	uint32_t			mem_bank_count;
+	struct list_head		mem_props;
+	uint32_t			cache_count;
+	struct list_head		cache_props;
+	uint32_t			io_link_count;
+	struct list_head		io_link_props;
+	struct kfd_dev			*gpu;
+	struct kobject			*kobj_node;
+	struct kobject			*kobj_mem;
+	struct kobject			*kobj_cache;
+	struct kobject			*kobj_iolink;
+	struct attribute		attr_gpuid;
+	struct attribute		attr_name;
+	struct attribute		attr_props;
+};
+
+struct kfd_system_properties {
+	uint32_t		num_devices;     /* Number of H-NUMA nodes */
+	uint32_t		generation_count;
+	uint64_t		platform_oem;
+	uint64_t		platform_id;
+	uint64_t		platform_rev;
+	struct kobject		*kobj_topology;
+	struct kobject		*kobj_nodes;
+	struct attribute	attr_genid;
+	struct attribute	attr_props;
+};
+
+
+
+#endif /* __KFD_TOPOLOGY_H__ */