diff options
Diffstat (limited to 'kernel/drivers/misc')
239 files changed, 98663 insertions, 0 deletions
diff --git a/kernel/drivers/misc/Kconfig b/kernel/drivers/misc/Kconfig new file mode 100644 index 000000000..453a61660 --- /dev/null +++ b/kernel/drivers/misc/Kconfig @@ -0,0 +1,569 @@ +# +# Misc strange devices +# + +menu "Misc devices" + +config SENSORS_LIS3LV02D + tristate + depends on INPUT + select INPUT_POLLDEV + default n + +config AD525X_DPOT + tristate "Analog Devices Digital Potentiometers" + depends on (I2C || SPI) && SYSFS + help + If you say yes here, you get support for the Analog Devices + AD5258, AD5259, AD5251, AD5252, AD5253, AD5254, AD5255 + AD5160, AD5161, AD5162, AD5165, AD5200, AD5201, AD5203, + AD5204, AD5206, AD5207, AD5231, AD5232, AD5233, AD5235, + AD5260, AD5262, AD5263, AD5290, AD5291, AD5292, AD5293, + AD7376, AD8400, AD8402, AD8403, ADN2850, AD5241, AD5242, + AD5243, AD5245, AD5246, AD5247, AD5248, AD5280, AD5282, + ADN2860, AD5273, AD5171, AD5170, AD5172, AD5173, AD5270, + AD5271, AD5272, AD5274 + digital potentiometer chips. + + See Documentation/misc-devices/ad525x_dpot.txt for the + userspace interface. + + This driver can also be built as a module. If so, the module + will be called ad525x_dpot. + +config AD525X_DPOT_I2C + tristate "support I2C bus connection" + depends on AD525X_DPOT && I2C + help + Say Y here if you have a digital potentiometers hooked to an I2C bus. + + To compile this driver as a module, choose M here: the + module will be called ad525x_dpot-i2c. + +config AD525X_DPOT_SPI + tristate "support SPI bus connection" + depends on AD525X_DPOT && SPI_MASTER + help + Say Y here if you have a digital potentiometers hooked to an SPI bus. + + If unsure, say N (but it's safe to say "Y"). + + To compile this driver as a module, choose M here: the + module will be called ad525x_dpot-spi. + +config ATMEL_TCLIB + bool "Atmel AT32/AT91 Timer/Counter Library" + depends on (AVR32 || ARCH_AT91) + default y if PREEMPT_RT_FULL + help + Select this if you want a library to allocate the Timer/Counter + blocks found on many Atmel processors. This facilitates using + these blocks by different drivers despite processor differences. + +config ATMEL_TCB_CLKSRC + bool "TC Block Clocksource" + depends on ATMEL_TCLIB + default y + help + Select this to get a high precision clocksource based on a + TC block with a 5+ MHz base clock rate. Two timer channels + are combined to make a single 32-bit timer. + + When GENERIC_CLOCKEVENTS is defined, the third timer channel + may be used as a clock event device supporting oneshot mode. + +config ATMEL_TCB_CLKSRC_BLOCK + int + depends on ATMEL_TCB_CLKSRC + prompt "TC Block" if CPU_AT32AP700X + default 0 + range 0 1 + help + Some chips provide more than one TC block, so you have the + choice of which one to use for the clock framework. The other + TC can be used for other purposes, such as PWM generation and + interval timing. + +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK + bool "TC Block use 32 KiHz clock" + depends on ATMEL_TCB_CLKSRC + default y if !PREEMPT_RT_FULL + help + Select this to use 32 KiHz base clock rate as TC block clock + source for clock events. + + +config DUMMY_IRQ + tristate "Dummy IRQ handler" + default n + ---help--- + This module accepts a single 'irq' parameter, which it should register for. + The sole purpose of this module is to help with debugging of systems on + which spurious IRQs would happen on disabled IRQ vector. + +config IBM_ASM + tristate "Device driver for IBM RSA service processor" + depends on X86 && PCI && INPUT + ---help--- + This option enables device driver support for in-band access to the + IBM RSA (Condor) service processor in eServer xSeries systems. + The ibmasm device driver allows user space application to access + ASM (Advanced Systems Management) functions on the service + processor. The driver is meant to be used in conjunction with + a user space API. + The ibmasm driver also enables the OS to use the UART on the + service processor board as a regular serial port. To make use of + this feature serial driver support (CONFIG_SERIAL_8250) must be + enabled. + + WARNING: This software may not be supported or function + correctly on your IBM server. Please consult the IBM ServerProven + website <http://www-03.ibm.com/systems/info/x86servers/serverproven/compat/us/> + for information on the specific driver level and support statement + for your IBM server. + +config HWLAT_DETECTOR + tristate "Testing module to detect hardware-induced latencies" + depends on DEBUG_FS + depends on RING_BUFFER + default m + ---help--- + A simple hardware latency detector. Use this module to detect + large latencies introduced by the behavior of the underlying + system firmware external to Linux. We do this using periodic + use of stop_machine to grab all available CPUs and measure + for unexplainable gaps in the CPU timestamp counter(s). By + default, the module is not enabled until the "enable" file + within the "hwlat_detector" debugfs directory is toggled. + + This module is often used to detect SMI (System Management + Interrupts) on x86 systems, though is not x86 specific. To + this end, we default to using a sample window of 1 second, + during which we will sample for 0.5 seconds. If an SMI or + similar event occurs during that time, it is recorded + into an 8K samples global ring buffer until retreived. + + WARNING: This software should never be enabled (it can be built + but should not be turned on after it is loaded) in a production + environment where high latencies are a concern since the + sampling mechanism actually introduces latencies for + regular tasks while the CPU(s) are being held. + + If unsure, say N + +config PHANTOM + tristate "Sensable PHANToM (PCI)" + depends on PCI + help + Say Y here if you want to build a driver for Sensable PHANToM device. + + This driver is only for PCI PHANToMs. + + If you choose to build module, its name will be phantom. If unsure, + say N here. + +config INTEL_MID_PTI + tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard" + depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST) + default n + help + The PTI (Parallel Trace Interface) driver directs + trace data routed from various parts in the system out + through an Intel Penwell PTI port and out of the mobile + device for analysis with a debugging tool (Lauterbach or Fido). + + You should select this driver if the target kernel is meant for + an Intel Atom (non-netbook) mobile device containing a MIPI + P1149.7 standard implementation. + +config SGI_IOC4 + tristate "SGI IOC4 Base IO support" + depends on PCI + ---help--- + This option enables basic support for the IOC4 chip on certain + SGI IO controller cards (IO9, IO10, and PCI-RT). This option + does not enable any specific functions on such a card, but provides + necessary infrastructure for other drivers to utilize. + + If you have an SGI Altix with an IOC4-based card say Y. + Otherwise say N. + +config TIFM_CORE + tristate "TI Flash Media interface support" + depends on PCI + help + If you want support for Texas Instruments(R) Flash Media adapters + you should select this option and then also choose an appropriate + host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter + support', if you have a TI PCI74xx compatible card reader, for + example. + You will also have to select some flash card format drivers. MMC/SD + cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD + Interface support (MMC_TIFM_SD)'. + + To compile this driver as a module, choose M here: the module will + be called tifm_core. + +config TIFM_7XX1 + tristate "TI Flash Media PCI74xx/PCI76xx host adapter support" + depends on PCI && TIFM_CORE + default TIFM_CORE + help + This option enables support for Texas Instruments(R) PCI74xx and + PCI76xx families of Flash Media adapters, found in many laptops. + To make actual use of the device, you will have to select some + flash card format drivers, as outlined in the TIFM_CORE Help. + + To compile this driver as a module, choose M here: the module will + be called tifm_7xx1. + +config ICS932S401 + tristate "Integrated Circuits ICS932S401" + depends on I2C + help + If you say yes here you get support for the Integrated Circuits + ICS932S401 clock control chips. + + This driver can also be built as a module. If so, the module + will be called ics932s401. + +config ATMEL_SSC + tristate "Device driver for Atmel SSC peripheral" + depends on HAS_IOMEM && (AVR32 || ARCH_AT91 || COMPILE_TEST) + ---help--- + This option enables device driver support for Atmel Synchronized + Serial Communication peripheral (SSC). + + The SSC peripheral supports a wide variety of serial frame based + communications, i.e. I2S, SPI, etc. + + If unsure, say N. + +config ENCLOSURE_SERVICES + tristate "Enclosure Services" + default n + help + Provides support for intelligent enclosures (bays which + contain storage devices). You also need either a host + driver (SCSI/ATA) which supports enclosures + or a SCSI enclosure device (SES) to use these services. + +config SGI_XP + tristate "Support communication between SGI SSIs" + depends on NET + depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP + select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + select SGI_GRU if X86_64 && SMP + ---help--- + An SGI machine can be divided into multiple Single System + Images which act independently of each other and have + hardware based memory protection from the others. Enabling + this feature will allow for direct communication between SSIs + based on a network adapter and DMA messaging. + +config CS5535_MFGPT + tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" + depends on MFD_CS5535 + default n + help + This driver provides access to MFGPT functionality for other + drivers that need timers. MFGPTs are available in the CS5535 and + CS5536 companion chips that are found in AMD Geode and several + other platforms. They have a better resolution and max interval + than the generic PIT, and are suitable for use as high-res timers. + You probably don't want to enable this manually; other drivers that + make use of it should enable it. + +config CS5535_MFGPT_DEFAULT_IRQ + int + depends on CS5535_MFGPT + default 7 + help + MFGPTs on the CS5535 require an interrupt. The selected IRQ + can be overridden as a module option as well as by driver that + use the cs5535_mfgpt_ API; however, different architectures might + want to use a different IRQ by default. This is here for + architectures to set as necessary. + +config CS5535_CLOCK_EVENT_SRC + tristate "CS5535/CS5536 high-res timer (MFGPT) events" + depends on GENERIC_CLOCKEVENTS && CS5535_MFGPT + help + This driver provides a clock event source based on the MFGPT + timer(s) in the CS5535 and CS5536 companion chips. + MFGPTs have a better resolution and max interval than the + generic PIT, and are suitable for use as high-res timers. + +config HP_ILO + tristate "Channel interface driver for the HP iLO processor" + depends on PCI + default n + help + The channel interface driver allows applications to communicate + with iLO management processors present on HP ProLiant servers. + Upon loading, the driver creates /dev/hpilo/dXccbN files, which + can be used to gather data from the management processor, via + read and write system calls. + + To compile this driver as a module, choose M here: the + module will be called hpilo. + +config SGI_GRU + tristate "SGI GRU driver" + depends on X86_UV && SMP + default n + select MMU_NOTIFIER + ---help--- + The GRU is a hardware resource located in the system chipset. The GRU + contains memory that can be mmapped into the user address space. This memory is + used to communicate with the GRU to perform functions such as load/store, + scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user + instructions using user virtual addresses. GRU instructions (ex., bcopy) use + user virtual addresses for operands. + + If you are not running on a SGI UV system, say N. + +config SGI_GRU_DEBUG + bool "SGI GRU driver debug" + depends on SGI_GRU + default n + ---help--- + This option enables additional debugging code for the SGI GRU driver. + If you are unsure, say N. + +config APDS9802ALS + tristate "Medfield Avago APDS9802 ALS Sensor module" + depends on I2C + help + If you say yes here you get support for the ALS APDS9802 ambient + light sensor. + + This driver can also be built as a module. If so, the module + will be called apds9802als. + +config ISL29003 + tristate "Intersil ISL29003 ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the Intersil ISL29003 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called isl29003. + +config ISL29020 + tristate "Intersil ISL29020 ambient light sensor" + depends on I2C + help + If you say yes here you get support for the Intersil ISL29020 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called isl29020. + +config SENSORS_TSL2550 + tristate "Taos TSL2550 ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the Taos TSL2550 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called tsl2550. + +config SENSORS_BH1780 + tristate "ROHM BH1780GLI ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the ROHM BH1780GLI + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called bh1780gli. + +config SENSORS_BH1770 + tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" + depends on I2C + ---help--- + Say Y here if you want to build a driver for BH1770GLC (ROHM) or + SFH7770 (Osram) combined ambient light and proximity sensor chip. + + To compile this driver as a module, choose M here: the + module will be called bh1770glc. If unsure, say N here. + +config SENSORS_APDS990X + tristate "APDS990X combined als and proximity sensors" + depends on I2C + default n + ---help--- + Say Y here if you want to build a driver for Avago APDS990x + combined ambient light and proximity sensor chip. + + To compile this driver as a module, choose M here: the + module will be called apds990x. If unsure, say N here. + +config HMC6352 + tristate "Honeywell HMC6352 compass" + depends on I2C + help + This driver provides support for the Honeywell HMC6352 compass, + providing configuration and heading data via sysfs. + +config DS1682 + tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm" + depends on I2C + help + If you say yes here you get support for Dallas Semiconductor + DS1682 Total Elapsed Time Recorder. + + This driver can also be built as a module. If so, the module + will be called ds1682. + +config SPEAR13XX_PCIE_GADGET + bool "PCIe gadget support for SPEAr13XX platform" + depends on ARCH_SPEAR13XX && BROKEN + default n + help + This option enables gadget support for PCIe controller. If + board file defines any controller as PCIe endpoint then a sysfs + entry will be created for that controller. User can use these + sysfs node to configure PCIe EP as per his requirements. + +config TI_DAC7512 + tristate "Texas Instruments DAC7512" + depends on SPI && SYSFS + help + If you say yes here you get support for the Texas Instruments + DAC7512 16-bit digital-to-analog converter. + + This driver can also be built as a module. If so, the module + will be called ti_dac7512. + +config VMWARE_BALLOON + tristate "VMware Balloon Driver" + depends on X86 && HYPERVISOR_GUEST + help + This is VMware physical memory management driver which acts + like a "balloon" that can be inflated to reclaim physical pages + by reserving them in the guest and invalidating them in the + monitor, freeing up the underlying machine pages so they can + be allocated to other guests. The balloon can also be deflated + to allow the guest to use more physical memory. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmw_balloon. + +config ARM_CHARLCD + bool "ARM Ltd. Character LCD Driver" + depends on PLAT_VERSATILE + help + This is a driver for the character LCD found on the ARM Ltd. + Versatile and RealView Platform Baseboards. It doesn't do + very much more than display the text "ARM Linux" on the first + line and the Linux version on the second line, but that's + still useful. + +config BMP085 + bool + depends on SYSFS + +config BMP085_I2C + tristate "BMP085 digital pressure sensor on I2C" + select BMP085 + select REGMAP_I2C + depends on I2C && SYSFS + help + Say Y here if you want to support Bosch Sensortec's digital pressure + sensor hooked to an I2C bus. + + To compile this driver as a module, choose M here: the + module will be called bmp085-i2c. + +config BMP085_SPI + tristate "BMP085 digital pressure sensor on SPI" + select BMP085 + select REGMAP_SPI + depends on SPI_MASTER && SYSFS + help + Say Y here if you want to support Bosch Sensortec's digital pressure + sensor hooked to an SPI bus. + + To compile this driver as a module, choose M here: the + module will be called bmp085-spi. + +config PCH_PHUB + tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB" + select GENERIC_NET_UTILS + depends on PCI && (X86_32 || COMPILE_TEST) + help + This driver is for PCH(Platform controller Hub) PHUB(Packet Hub) of + Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded + processor. The Topcliff has MAC address and Option ROM data in SROM. + This driver can access MAC address and Option ROM data in SROM. + + This driver also can be used for LAPIS Semiconductor's IOH, + ML7213/ML7223/ML7831. + ML7213 which is for IVI(In-Vehicle Infotainment) use. + ML7223 IOH is for MP(Media Phone) use. + ML7831 IOH is for general purpose use. + ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. + + To compile this driver as a module, choose M here: the module will + be called pch_phub. + +config USB_SWITCH_FSA9480 + tristate "FSA9480 USB Switch" + depends on I2C + help + The FSA9480 is a USB port accessory detector and switch. + The FSA9480 is fully controlled using I2C and enables USB data, + stereo and mono audio, video, microphone and UART data to use + a common connector port. + +config LATTICE_ECP3_CONFIG + tristate "Lattice ECP3 FPGA bitstream configuration via SPI" + depends on SPI && SYSFS + select FW_LOADER + default n + help + This option enables support for bitstream configuration (programming + or loading) of the Lattice ECP3 FPGA family via SPI. + + If unsure, say N. + +config SRAM + bool "Generic on-chip SRAM driver" + depends on HAS_IOMEM + select GENERIC_ALLOCATOR + help + This driver allows you to declare a memory region to be managed by + the genalloc API. It is supposed to be used for small on-chip SRAM + areas found on many SoCs. + +config VEXPRESS_SYSCFG + bool "Versatile Express System Configuration driver" + depends on VEXPRESS_CONFIG + default y + help + ARM Ltd. Versatile Express uses specialised platform configuration + bus. System Configuration interface is one of the possible means + of generating transactions on this bus. + +source "drivers/misc/c2port/Kconfig" +source "drivers/misc/eeprom/Kconfig" +source "drivers/misc/cb710/Kconfig" +source "drivers/misc/ti-st/Kconfig" +source "drivers/misc/lis3lv02d/Kconfig" +source "drivers/misc/carma/Kconfig" +source "drivers/misc/altera-stapl/Kconfig" +source "drivers/misc/mei/Kconfig" +source "drivers/misc/vmw_vmci/Kconfig" +source "drivers/misc/mic/Kconfig" +source "drivers/misc/genwqe/Kconfig" +source "drivers/misc/echo/Kconfig" +source "drivers/misc/cxl/Kconfig" +endmenu diff --git a/kernel/drivers/misc/Makefile b/kernel/drivers/misc/Makefile new file mode 100644 index 000000000..6a8e39388 --- /dev/null +++ b/kernel/drivers/misc/Makefile @@ -0,0 +1,59 @@ +# +# Makefile for misc devices that really don't fit anywhere else. +# + +obj-$(CONFIG_IBM_ASM) += ibmasm/ +obj-$(CONFIG_AD525X_DPOT) += ad525x_dpot.o +obj-$(CONFIG_AD525X_DPOT_I2C) += ad525x_dpot-i2c.o +obj-$(CONFIG_AD525X_DPOT_SPI) += ad525x_dpot-spi.o +obj-$(CONFIG_INTEL_MID_PTI) += pti.o +obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o +obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o +obj-$(CONFIG_BMP085) += bmp085.o +obj-$(CONFIG_BMP085_I2C) += bmp085-i2c.o +obj-$(CONFIG_BMP085_SPI) += bmp085-spi.o +obj-$(CONFIG_DUMMY_IRQ) += dummy-irq.o +obj-$(CONFIG_ICS932S401) += ics932s401.o +obj-$(CONFIG_LKDTM) += lkdtm.o +obj-$(CONFIG_TIFM_CORE) += tifm_core.o +obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o +obj-$(CONFIG_PHANTOM) += phantom.o +obj-$(CONFIG_SENSORS_BH1780) += bh1780gli.o +obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o +obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o +obj-$(CONFIG_SGI_IOC4) += ioc4.o +obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o +obj-$(CONFIG_KGDB_TESTS) += kgdbts.o +obj-$(CONFIG_SGI_XP) += sgi-xp/ +obj-$(CONFIG_SGI_GRU) += sgi-gru/ +obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o +obj-$(CONFIG_HP_ILO) += hpilo.o +obj-$(CONFIG_APDS9802ALS) += apds9802als.o +obj-$(CONFIG_ISL29003) += isl29003.o +obj-$(CONFIG_ISL29020) += isl29020.o +obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o +obj-$(CONFIG_DS1682) += ds1682.o +obj-$(CONFIG_TI_DAC7512) += ti_dac7512.o +obj-$(CONFIG_C2PORT) += c2port/ +obj-$(CONFIG_HMC6352) += hmc6352.o +obj-y += eeprom/ +obj-y += cb710/ +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o +obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o +obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o +obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o +obj-$(CONFIG_PCH_PHUB) += pch_phub.o +obj-y += ti-st/ +obj-y += lis3lv02d/ +obj-y += carma/ +obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o +obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/ +obj-$(CONFIG_INTEL_MEI) += mei/ +obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/ +obj-$(CONFIG_LATTICE_ECP3_CONFIG) += lattice-ecp3-config.o +obj-$(CONFIG_SRAM) += sram.o +obj-y += mic/ +obj-$(CONFIG_GENWQE) += genwqe/ +obj-$(CONFIG_ECHO) += echo/ +obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o +obj-$(CONFIG_CXL_BASE) += cxl/ diff --git a/kernel/drivers/misc/ad525x_dpot-i2c.c b/kernel/drivers/misc/ad525x_dpot-i2c.c new file mode 100644 index 000000000..705b881e1 --- /dev/null +++ b/kernel/drivers/misc/ad525x_dpot-i2c.c @@ -0,0 +1,121 @@ +/* + * Driver for the Analog Devices digital potentiometers (I2C bus) + * + * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/i2c.h> +#include <linux/module.h> + +#include "ad525x_dpot.h" + +/* I2C bus functions */ +static int write_d8(void *client, u8 val) +{ + return i2c_smbus_write_byte(client, val); +} + +static int write_r8d8(void *client, u8 reg, u8 val) +{ + return i2c_smbus_write_byte_data(client, reg, val); +} + +static int write_r8d16(void *client, u8 reg, u16 val) +{ + return i2c_smbus_write_word_data(client, reg, val); +} + +static int read_d8(void *client) +{ + return i2c_smbus_read_byte(client); +} + +static int read_r8d8(void *client, u8 reg) +{ + return i2c_smbus_read_byte_data(client, reg); +} + +static int read_r8d16(void *client, u8 reg) +{ + return i2c_smbus_read_word_data(client, reg); +} + +static const struct ad_dpot_bus_ops bops = { + .read_d8 = read_d8, + .read_r8d8 = read_r8d8, + .read_r8d16 = read_r8d16, + .write_d8 = write_d8, + .write_r8d8 = write_r8d8, + .write_r8d16 = write_r8d16, +}; + +static int ad_dpot_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ad_dpot_bus_data bdata = { + .client = client, + .bops = &bops, + }; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WORD_DATA)) { + dev_err(&client->dev, "SMBUS Word Data not Supported\n"); + return -EIO; + } + + return ad_dpot_probe(&client->dev, &bdata, id->driver_data, id->name); +} + +static int ad_dpot_i2c_remove(struct i2c_client *client) +{ + return ad_dpot_remove(&client->dev); +} + +static const struct i2c_device_id ad_dpot_id[] = { + {"ad5258", AD5258_ID}, + {"ad5259", AD5259_ID}, + {"ad5251", AD5251_ID}, + {"ad5252", AD5252_ID}, + {"ad5253", AD5253_ID}, + {"ad5254", AD5254_ID}, + {"ad5255", AD5255_ID}, + {"ad5241", AD5241_ID}, + {"ad5242", AD5242_ID}, + {"ad5243", AD5243_ID}, + {"ad5245", AD5245_ID}, + {"ad5246", AD5246_ID}, + {"ad5247", AD5247_ID}, + {"ad5248", AD5248_ID}, + {"ad5280", AD5280_ID}, + {"ad5282", AD5282_ID}, + {"adn2860", ADN2860_ID}, + {"ad5273", AD5273_ID}, + {"ad5161", AD5161_ID}, + {"ad5171", AD5171_ID}, + {"ad5170", AD5170_ID}, + {"ad5172", AD5172_ID}, + {"ad5173", AD5173_ID}, + {"ad5272", AD5272_ID}, + {"ad5274", AD5274_ID}, + {} +}; +MODULE_DEVICE_TABLE(i2c, ad_dpot_id); + +static struct i2c_driver ad_dpot_i2c_driver = { + .driver = { + .name = "ad_dpot", + .owner = THIS_MODULE, + }, + .probe = ad_dpot_i2c_probe, + .remove = ad_dpot_i2c_remove, + .id_table = ad_dpot_id, +}; + +module_i2c_driver(ad_dpot_i2c_driver); + +MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); +MODULE_DESCRIPTION("digital potentiometer I2C bus driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("i2c:ad_dpot"); diff --git a/kernel/drivers/misc/ad525x_dpot-spi.c b/kernel/drivers/misc/ad525x_dpot-spi.c new file mode 100644 index 000000000..f4c82eafa --- /dev/null +++ b/kernel/drivers/misc/ad525x_dpot-spi.c @@ -0,0 +1,147 @@ +/* + * Driver for the Analog Devices digital potentiometers (SPI bus) + * + * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/spi/spi.h> +#include <linux/module.h> + +#include "ad525x_dpot.h" + +/* SPI bus functions */ +static int write8(void *client, u8 val) +{ + u8 data = val; + + return spi_write(client, &data, 1); +} + +static int write16(void *client, u8 reg, u8 val) +{ + u8 data[2] = {reg, val}; + + return spi_write(client, data, 2); +} + +static int write24(void *client, u8 reg, u16 val) +{ + u8 data[3] = {reg, val >> 8, val}; + + return spi_write(client, data, 3); +} + +static int read8(void *client) +{ + int ret; + u8 data; + + ret = spi_read(client, &data, 1); + if (ret < 0) + return ret; + + return data; +} + +static int read16(void *client, u8 reg) +{ + int ret; + u8 buf_rx[2]; + + write16(client, reg, 0); + ret = spi_read(client, buf_rx, 2); + if (ret < 0) + return ret; + + return (buf_rx[0] << 8) | buf_rx[1]; +} + +static int read24(void *client, u8 reg) +{ + int ret; + u8 buf_rx[3]; + + write24(client, reg, 0); + ret = spi_read(client, buf_rx, 3); + if (ret < 0) + return ret; + + return (buf_rx[1] << 8) | buf_rx[2]; +} + +static const struct ad_dpot_bus_ops bops = { + .read_d8 = read8, + .read_r8d8 = read16, + .read_r8d16 = read24, + .write_d8 = write8, + .write_r8d8 = write16, + .write_r8d16 = write24, +}; +static int ad_dpot_spi_probe(struct spi_device *spi) +{ + struct ad_dpot_bus_data bdata = { + .client = spi, + .bops = &bops, + }; + + return ad_dpot_probe(&spi->dev, &bdata, + spi_get_device_id(spi)->driver_data, + spi_get_device_id(spi)->name); +} + +static int ad_dpot_spi_remove(struct spi_device *spi) +{ + return ad_dpot_remove(&spi->dev); +} + +static const struct spi_device_id ad_dpot_spi_id[] = { + {"ad5160", AD5160_ID}, + {"ad5161", AD5161_ID}, + {"ad5162", AD5162_ID}, + {"ad5165", AD5165_ID}, + {"ad5200", AD5200_ID}, + {"ad5201", AD5201_ID}, + {"ad5203", AD5203_ID}, + {"ad5204", AD5204_ID}, + {"ad5206", AD5206_ID}, + {"ad5207", AD5207_ID}, + {"ad5231", AD5231_ID}, + {"ad5232", AD5232_ID}, + {"ad5233", AD5233_ID}, + {"ad5235", AD5235_ID}, + {"ad5260", AD5260_ID}, + {"ad5262", AD5262_ID}, + {"ad5263", AD5263_ID}, + {"ad5290", AD5290_ID}, + {"ad5291", AD5291_ID}, + {"ad5292", AD5292_ID}, + {"ad5293", AD5293_ID}, + {"ad7376", AD7376_ID}, + {"ad8400", AD8400_ID}, + {"ad8402", AD8402_ID}, + {"ad8403", AD8403_ID}, + {"adn2850", ADN2850_ID}, + {"ad5270", AD5270_ID}, + {"ad5271", AD5271_ID}, + {} +}; +MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id); + +static struct spi_driver ad_dpot_spi_driver = { + .driver = { + .name = "ad_dpot", + .owner = THIS_MODULE, + }, + .probe = ad_dpot_spi_probe, + .remove = ad_dpot_spi_remove, + .id_table = ad_dpot_spi_id, +}; + +module_spi_driver(ad_dpot_spi_driver); + +MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); +MODULE_DESCRIPTION("digital potentiometer SPI bus driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:ad_dpot"); diff --git a/kernel/drivers/misc/ad525x_dpot.c b/kernel/drivers/misc/ad525x_dpot.c new file mode 100644 index 000000000..15e88078b --- /dev/null +++ b/kernel/drivers/misc/ad525x_dpot.c @@ -0,0 +1,765 @@ +/* + * ad525x_dpot: Driver for the Analog Devices digital potentiometers + * Copyright (c) 2009-2010 Analog Devices, Inc. + * Author: Michael Hennerich <hennerich@blackfin.uclinux.org> + * + * DEVID #Wipers #Positions Resistor Options (kOhm) + * AD5258 1 64 1, 10, 50, 100 + * AD5259 1 256 5, 10, 50, 100 + * AD5251 2 64 1, 10, 50, 100 + * AD5252 2 256 1, 10, 50, 100 + * AD5255 3 512 25, 250 + * AD5253 4 64 1, 10, 50, 100 + * AD5254 4 256 1, 10, 50, 100 + * AD5160 1 256 5, 10, 50, 100 + * AD5161 1 256 5, 10, 50, 100 + * AD5162 2 256 2.5, 10, 50, 100 + * AD5165 1 256 100 + * AD5200 1 256 10, 50 + * AD5201 1 33 10, 50 + * AD5203 4 64 10, 100 + * AD5204 4 256 10, 50, 100 + * AD5206 6 256 10, 50, 100 + * AD5207 2 256 10, 50, 100 + * AD5231 1 1024 10, 50, 100 + * AD5232 2 256 10, 50, 100 + * AD5233 4 64 10, 50, 100 + * AD5235 2 1024 25, 250 + * AD5260 1 256 20, 50, 200 + * AD5262 2 256 20, 50, 200 + * AD5263 4 256 20, 50, 200 + * AD5290 1 256 10, 50, 100 + * AD5291 1 256 20, 50, 100 (20-TP) + * AD5292 1 1024 20, 50, 100 (20-TP) + * AD5293 1 1024 20, 50, 100 + * AD7376 1 128 10, 50, 100, 1M + * AD8400 1 256 1, 10, 50, 100 + * AD8402 2 256 1, 10, 50, 100 + * AD8403 4 256 1, 10, 50, 100 + * ADN2850 3 512 25, 250 + * AD5241 1 256 10, 100, 1M + * AD5246 1 128 5, 10, 50, 100 + * AD5247 1 128 5, 10, 50, 100 + * AD5245 1 256 5, 10, 50, 100 + * AD5243 2 256 2.5, 10, 50, 100 + * AD5248 2 256 2.5, 10, 50, 100 + * AD5242 2 256 20, 50, 200 + * AD5280 1 256 20, 50, 200 + * AD5282 2 256 20, 50, 200 + * ADN2860 3 512 25, 250 + * AD5273 1 64 1, 10, 50, 100 (OTP) + * AD5171 1 64 5, 10, 50, 100 (OTP) + * AD5170 1 256 2.5, 10, 50, 100 (OTP) + * AD5172 2 256 2.5, 10, 50, 100 (OTP) + * AD5173 2 256 2.5, 10, 50, 100 (OTP) + * AD5270 1 1024 20, 50, 100 (50-TP) + * AD5271 1 256 20, 50, 100 (50-TP) + * AD5272 1 1024 20, 50, 100 (50-TP) + * AD5274 1 256 20, 50, 100 (50-TP) + * + * See Documentation/misc-devices/ad525x_dpot.txt for more info. + * + * derived from ad5258.c + * Copyright (c) 2009 Cyber Switching, Inc. + * Author: Chris Verges <chrisv@cyberswitching.com> + * + * derived from ad5252.c + * Copyright (c) 2006-2011 Michael Hennerich <hennerich@blackfin.uclinux.org> + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/slab.h> + +#include "ad525x_dpot.h" + +/* + * Client data (each client gets its own) + */ + +struct dpot_data { + struct ad_dpot_bus_data bdata; + struct mutex update_lock; + unsigned rdac_mask; + unsigned max_pos; + unsigned long devid; + unsigned uid; + unsigned feat; + unsigned wipers; + u16 rdac_cache[MAX_RDACS]; + DECLARE_BITMAP(otp_en_mask, MAX_RDACS); +}; + +static inline int dpot_read_d8(struct dpot_data *dpot) +{ + return dpot->bdata.bops->read_d8(dpot->bdata.client); +} + +static inline int dpot_read_r8d8(struct dpot_data *dpot, u8 reg) +{ + return dpot->bdata.bops->read_r8d8(dpot->bdata.client, reg); +} + +static inline int dpot_read_r8d16(struct dpot_data *dpot, u8 reg) +{ + return dpot->bdata.bops->read_r8d16(dpot->bdata.client, reg); +} + +static inline int dpot_write_d8(struct dpot_data *dpot, u8 val) +{ + return dpot->bdata.bops->write_d8(dpot->bdata.client, val); +} + +static inline int dpot_write_r8d8(struct dpot_data *dpot, u8 reg, u16 val) +{ + return dpot->bdata.bops->write_r8d8(dpot->bdata.client, reg, val); +} + +static inline int dpot_write_r8d16(struct dpot_data *dpot, u8 reg, u16 val) +{ + return dpot->bdata.bops->write_r8d16(dpot->bdata.client, reg, val); +} + +static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg) +{ + unsigned ctrl = 0; + int value; + + if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD))) { + + if (dpot->feat & F_RDACS_WONLY) + return dpot->rdac_cache[reg & DPOT_RDAC_MASK]; + if (dpot->uid == DPOT_UID(AD5291_ID) || + dpot->uid == DPOT_UID(AD5292_ID) || + dpot->uid == DPOT_UID(AD5293_ID)) { + + value = dpot_read_r8d8(dpot, + DPOT_AD5291_READ_RDAC << 2); + + if (dpot->uid == DPOT_UID(AD5291_ID)) + value = value >> 2; + + return value; + } else if (dpot->uid == DPOT_UID(AD5270_ID) || + dpot->uid == DPOT_UID(AD5271_ID)) { + + value = dpot_read_r8d8(dpot, + DPOT_AD5270_1_2_4_READ_RDAC << 2); + + if (value < 0) + return value; + + if (dpot->uid == DPOT_UID(AD5271_ID)) + value = value >> 2; + + return value; + } + + ctrl = DPOT_SPI_READ_RDAC; + } else if (reg & DPOT_ADDR_EEPROM) { + ctrl = DPOT_SPI_READ_EEPROM; + } + + if (dpot->feat & F_SPI_16BIT) + return dpot_read_r8d8(dpot, ctrl); + else if (dpot->feat & F_SPI_24BIT) + return dpot_read_r8d16(dpot, ctrl); + + return -EFAULT; +} + +static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg) +{ + int value; + unsigned ctrl = 0; + + switch (dpot->uid) { + case DPOT_UID(AD5246_ID): + case DPOT_UID(AD5247_ID): + return dpot_read_d8(dpot); + case DPOT_UID(AD5245_ID): + case DPOT_UID(AD5241_ID): + case DPOT_UID(AD5242_ID): + case DPOT_UID(AD5243_ID): + case DPOT_UID(AD5248_ID): + case DPOT_UID(AD5280_ID): + case DPOT_UID(AD5282_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5282_RDAC_AB; + return dpot_read_r8d8(dpot, ctrl); + case DPOT_UID(AD5170_ID): + case DPOT_UID(AD5171_ID): + case DPOT_UID(AD5273_ID): + return dpot_read_d8(dpot); + case DPOT_UID(AD5172_ID): + case DPOT_UID(AD5173_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5172_3_A0; + return dpot_read_r8d8(dpot, ctrl); + case DPOT_UID(AD5272_ID): + case DPOT_UID(AD5274_ID): + dpot_write_r8d8(dpot, + (DPOT_AD5270_1_2_4_READ_RDAC << 2), 0); + + value = dpot_read_r8d16(dpot, + DPOT_AD5270_1_2_4_RDAC << 2); + + if (value < 0) + return value; + /* + * AD5272/AD5274 returns high byte first, however + * underling smbus expects low byte first. + */ + value = swab16(value); + + if (dpot->uid == DPOT_UID(AD5271_ID)) + value = value >> 2; + return value; + default: + if ((reg & DPOT_REG_TOL) || (dpot->max_pos > 256)) + return dpot_read_r8d16(dpot, (reg & 0xF8) | + ((reg & 0x7) << 1)); + else + return dpot_read_r8d8(dpot, reg); + } +} + +static s32 dpot_read(struct dpot_data *dpot, u8 reg) +{ + if (dpot->feat & F_SPI) + return dpot_read_spi(dpot, reg); + else + return dpot_read_i2c(dpot, reg); +} + +static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value) +{ + unsigned val = 0; + + if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD | DPOT_ADDR_OTP))) { + if (dpot->feat & F_RDACS_WONLY) + dpot->rdac_cache[reg & DPOT_RDAC_MASK] = value; + + if (dpot->feat & F_AD_APPDATA) { + if (dpot->feat & F_SPI_8BIT) { + val = ((reg & DPOT_RDAC_MASK) << + DPOT_MAX_POS(dpot->devid)) | + value; + return dpot_write_d8(dpot, val); + } else if (dpot->feat & F_SPI_16BIT) { + val = ((reg & DPOT_RDAC_MASK) << + DPOT_MAX_POS(dpot->devid)) | + value; + return dpot_write_r8d8(dpot, val >> 8, + val & 0xFF); + } else + BUG(); + } else { + if (dpot->uid == DPOT_UID(AD5291_ID) || + dpot->uid == DPOT_UID(AD5292_ID) || + dpot->uid == DPOT_UID(AD5293_ID)) { + + dpot_write_r8d8(dpot, DPOT_AD5291_CTRLREG << 2, + DPOT_AD5291_UNLOCK_CMD); + + if (dpot->uid == DPOT_UID(AD5291_ID)) + value = value << 2; + + return dpot_write_r8d8(dpot, + (DPOT_AD5291_RDAC << 2) | + (value >> 8), value & 0xFF); + } else if (dpot->uid == DPOT_UID(AD5270_ID) || + dpot->uid == DPOT_UID(AD5271_ID)) { + dpot_write_r8d8(dpot, + DPOT_AD5270_1_2_4_CTRLREG << 2, + DPOT_AD5270_1_2_4_UNLOCK_CMD); + + if (dpot->uid == DPOT_UID(AD5271_ID)) + value = value << 2; + + return dpot_write_r8d8(dpot, + (DPOT_AD5270_1_2_4_RDAC << 2) | + (value >> 8), value & 0xFF); + } + val = DPOT_SPI_RDAC | (reg & DPOT_RDAC_MASK); + } + } else if (reg & DPOT_ADDR_EEPROM) { + val = DPOT_SPI_EEPROM | (reg & DPOT_RDAC_MASK); + } else if (reg & DPOT_ADDR_CMD) { + switch (reg) { + case DPOT_DEC_ALL_6DB: + val = DPOT_SPI_DEC_ALL_6DB; + break; + case DPOT_INC_ALL_6DB: + val = DPOT_SPI_INC_ALL_6DB; + break; + case DPOT_DEC_ALL: + val = DPOT_SPI_DEC_ALL; + break; + case DPOT_INC_ALL: + val = DPOT_SPI_INC_ALL; + break; + } + } else if (reg & DPOT_ADDR_OTP) { + if (dpot->uid == DPOT_UID(AD5291_ID) || + dpot->uid == DPOT_UID(AD5292_ID)) { + return dpot_write_r8d8(dpot, + DPOT_AD5291_STORE_XTPM << 2, 0); + } else if (dpot->uid == DPOT_UID(AD5270_ID) || + dpot->uid == DPOT_UID(AD5271_ID)) { + return dpot_write_r8d8(dpot, + DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0); + } + } else + BUG(); + + if (dpot->feat & F_SPI_16BIT) + return dpot_write_r8d8(dpot, val, value); + else if (dpot->feat & F_SPI_24BIT) + return dpot_write_r8d16(dpot, val, value); + + return -EFAULT; +} + +static s32 dpot_write_i2c(struct dpot_data *dpot, u8 reg, u16 value) +{ + /* Only write the instruction byte for certain commands */ + unsigned tmp = 0, ctrl = 0; + + switch (dpot->uid) { + case DPOT_UID(AD5246_ID): + case DPOT_UID(AD5247_ID): + return dpot_write_d8(dpot, value); + + case DPOT_UID(AD5245_ID): + case DPOT_UID(AD5241_ID): + case DPOT_UID(AD5242_ID): + case DPOT_UID(AD5243_ID): + case DPOT_UID(AD5248_ID): + case DPOT_UID(AD5280_ID): + case DPOT_UID(AD5282_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5282_RDAC_AB; + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5171_ID): + case DPOT_UID(AD5273_ID): + if (reg & DPOT_ADDR_OTP) { + tmp = dpot_read_d8(dpot); + if (tmp >> 6) /* Ready to Program? */ + return -EFAULT; + ctrl = DPOT_AD5273_FUSE; + } + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5172_ID): + case DPOT_UID(AD5173_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5172_3_A0; + if (reg & DPOT_ADDR_OTP) { + tmp = dpot_read_r8d16(dpot, ctrl); + if (tmp >> 14) /* Ready to Program? */ + return -EFAULT; + ctrl |= DPOT_AD5170_2_3_FUSE; + } + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5170_ID): + if (reg & DPOT_ADDR_OTP) { + tmp = dpot_read_r8d16(dpot, tmp); + if (tmp >> 14) /* Ready to Program? */ + return -EFAULT; + ctrl = DPOT_AD5170_2_3_FUSE; + } + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5272_ID): + case DPOT_UID(AD5274_ID): + dpot_write_r8d8(dpot, DPOT_AD5270_1_2_4_CTRLREG << 2, + DPOT_AD5270_1_2_4_UNLOCK_CMD); + + if (reg & DPOT_ADDR_OTP) + return dpot_write_r8d8(dpot, + DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0); + + if (dpot->uid == DPOT_UID(AD5274_ID)) + value = value << 2; + + return dpot_write_r8d8(dpot, (DPOT_AD5270_1_2_4_RDAC << 2) | + (value >> 8), value & 0xFF); + default: + if (reg & DPOT_ADDR_CMD) + return dpot_write_d8(dpot, reg); + + if (dpot->max_pos > 256) + return dpot_write_r8d16(dpot, (reg & 0xF8) | + ((reg & 0x7) << 1), value); + else + /* All other registers require instruction + data bytes */ + return dpot_write_r8d8(dpot, reg, value); + } +} + +static s32 dpot_write(struct dpot_data *dpot, u8 reg, u16 value) +{ + if (dpot->feat & F_SPI) + return dpot_write_spi(dpot, reg, value); + else + return dpot_write_i2c(dpot, reg, value); +} + +/* sysfs functions */ + +static ssize_t sysfs_show_reg(struct device *dev, + struct device_attribute *attr, + char *buf, u32 reg) +{ + struct dpot_data *data = dev_get_drvdata(dev); + s32 value; + + if (reg & DPOT_ADDR_OTP_EN) + return sprintf(buf, "%s\n", + test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask) ? + "enabled" : "disabled"); + + + mutex_lock(&data->update_lock); + value = dpot_read(data, reg); + mutex_unlock(&data->update_lock); + + if (value < 0) + return -EINVAL; + /* + * Let someone else deal with converting this ... + * the tolerance is a two-byte value where the MSB + * is a sign + integer value, and the LSB is a + * decimal value. See page 18 of the AD5258 + * datasheet (Rev. A) for more details. + */ + + if (reg & DPOT_REG_TOL) + return sprintf(buf, "0x%04x\n", value & 0xFFFF); + else + return sprintf(buf, "%u\n", value & data->rdac_mask); +} + +static ssize_t sysfs_set_reg(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, u32 reg) +{ + struct dpot_data *data = dev_get_drvdata(dev); + unsigned long value; + int err; + + if (reg & DPOT_ADDR_OTP_EN) { + if (!strncmp(buf, "enabled", sizeof("enabled"))) + set_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask); + else + clear_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask); + + return count; + } + + if ((reg & DPOT_ADDR_OTP) && + !test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask)) + return -EPERM; + + err = kstrtoul(buf, 10, &value); + if (err) + return err; + + if (value > data->rdac_mask) + value = data->rdac_mask; + + mutex_lock(&data->update_lock); + dpot_write(data, reg, value); + if (reg & DPOT_ADDR_EEPROM) + msleep(26); /* Sleep while the EEPROM updates */ + else if (reg & DPOT_ADDR_OTP) + msleep(400); /* Sleep while the OTP updates */ + mutex_unlock(&data->update_lock); + + return count; +} + +static ssize_t sysfs_do_cmd(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, u32 reg) +{ + struct dpot_data *data = dev_get_drvdata(dev); + + mutex_lock(&data->update_lock); + dpot_write(data, reg, 0); + mutex_unlock(&data->update_lock); + + return count; +} + +/* ------------------------------------------------------------------------- */ + +#define DPOT_DEVICE_SHOW(_name, _reg) static ssize_t \ +show_##_name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return sysfs_show_reg(dev, attr, buf, _reg); \ +} + +#define DPOT_DEVICE_SET(_name, _reg) static ssize_t \ +set_##_name(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + return sysfs_set_reg(dev, attr, buf, count, _reg); \ +} + +#define DPOT_DEVICE_SHOW_SET(name, reg) \ +DPOT_DEVICE_SHOW(name, reg) \ +DPOT_DEVICE_SET(name, reg) \ +static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name); + +#define DPOT_DEVICE_SHOW_ONLY(name, reg) \ +DPOT_DEVICE_SHOW(name, reg) \ +static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL); + +DPOT_DEVICE_SHOW_SET(rdac0, DPOT_ADDR_RDAC | DPOT_RDAC0); +DPOT_DEVICE_SHOW_SET(eeprom0, DPOT_ADDR_EEPROM | DPOT_RDAC0); +DPOT_DEVICE_SHOW_ONLY(tolerance0, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC0); +DPOT_DEVICE_SHOW_SET(otp0, DPOT_ADDR_OTP | DPOT_RDAC0); +DPOT_DEVICE_SHOW_SET(otp0en, DPOT_ADDR_OTP_EN | DPOT_RDAC0); + +DPOT_DEVICE_SHOW_SET(rdac1, DPOT_ADDR_RDAC | DPOT_RDAC1); +DPOT_DEVICE_SHOW_SET(eeprom1, DPOT_ADDR_EEPROM | DPOT_RDAC1); +DPOT_DEVICE_SHOW_ONLY(tolerance1, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC1); +DPOT_DEVICE_SHOW_SET(otp1, DPOT_ADDR_OTP | DPOT_RDAC1); +DPOT_DEVICE_SHOW_SET(otp1en, DPOT_ADDR_OTP_EN | DPOT_RDAC1); + +DPOT_DEVICE_SHOW_SET(rdac2, DPOT_ADDR_RDAC | DPOT_RDAC2); +DPOT_DEVICE_SHOW_SET(eeprom2, DPOT_ADDR_EEPROM | DPOT_RDAC2); +DPOT_DEVICE_SHOW_ONLY(tolerance2, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC2); +DPOT_DEVICE_SHOW_SET(otp2, DPOT_ADDR_OTP | DPOT_RDAC2); +DPOT_DEVICE_SHOW_SET(otp2en, DPOT_ADDR_OTP_EN | DPOT_RDAC2); + +DPOT_DEVICE_SHOW_SET(rdac3, DPOT_ADDR_RDAC | DPOT_RDAC3); +DPOT_DEVICE_SHOW_SET(eeprom3, DPOT_ADDR_EEPROM | DPOT_RDAC3); +DPOT_DEVICE_SHOW_ONLY(tolerance3, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC3); +DPOT_DEVICE_SHOW_SET(otp3, DPOT_ADDR_OTP | DPOT_RDAC3); +DPOT_DEVICE_SHOW_SET(otp3en, DPOT_ADDR_OTP_EN | DPOT_RDAC3); + +DPOT_DEVICE_SHOW_SET(rdac4, DPOT_ADDR_RDAC | DPOT_RDAC4); +DPOT_DEVICE_SHOW_SET(eeprom4, DPOT_ADDR_EEPROM | DPOT_RDAC4); +DPOT_DEVICE_SHOW_ONLY(tolerance4, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC4); +DPOT_DEVICE_SHOW_SET(otp4, DPOT_ADDR_OTP | DPOT_RDAC4); +DPOT_DEVICE_SHOW_SET(otp4en, DPOT_ADDR_OTP_EN | DPOT_RDAC4); + +DPOT_DEVICE_SHOW_SET(rdac5, DPOT_ADDR_RDAC | DPOT_RDAC5); +DPOT_DEVICE_SHOW_SET(eeprom5, DPOT_ADDR_EEPROM | DPOT_RDAC5); +DPOT_DEVICE_SHOW_ONLY(tolerance5, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC5); +DPOT_DEVICE_SHOW_SET(otp5, DPOT_ADDR_OTP | DPOT_RDAC5); +DPOT_DEVICE_SHOW_SET(otp5en, DPOT_ADDR_OTP_EN | DPOT_RDAC5); + +static const struct attribute *dpot_attrib_wipers[] = { + &dev_attr_rdac0.attr, + &dev_attr_rdac1.attr, + &dev_attr_rdac2.attr, + &dev_attr_rdac3.attr, + &dev_attr_rdac4.attr, + &dev_attr_rdac5.attr, + NULL +}; + +static const struct attribute *dpot_attrib_eeprom[] = { + &dev_attr_eeprom0.attr, + &dev_attr_eeprom1.attr, + &dev_attr_eeprom2.attr, + &dev_attr_eeprom3.attr, + &dev_attr_eeprom4.attr, + &dev_attr_eeprom5.attr, + NULL +}; + +static const struct attribute *dpot_attrib_otp[] = { + &dev_attr_otp0.attr, + &dev_attr_otp1.attr, + &dev_attr_otp2.attr, + &dev_attr_otp3.attr, + &dev_attr_otp4.attr, + &dev_attr_otp5.attr, + NULL +}; + +static const struct attribute *dpot_attrib_otp_en[] = { + &dev_attr_otp0en.attr, + &dev_attr_otp1en.attr, + &dev_attr_otp2en.attr, + &dev_attr_otp3en.attr, + &dev_attr_otp4en.attr, + &dev_attr_otp5en.attr, + NULL +}; + +static const struct attribute *dpot_attrib_tolerance[] = { + &dev_attr_tolerance0.attr, + &dev_attr_tolerance1.attr, + &dev_attr_tolerance2.attr, + &dev_attr_tolerance3.attr, + &dev_attr_tolerance4.attr, + &dev_attr_tolerance5.attr, + NULL +}; + +/* ------------------------------------------------------------------------- */ + +#define DPOT_DEVICE_DO_CMD(_name, _cmd) static ssize_t \ +set_##_name(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + return sysfs_do_cmd(dev, attr, buf, count, _cmd); \ +} \ +static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name); + +DPOT_DEVICE_DO_CMD(inc_all, DPOT_INC_ALL); +DPOT_DEVICE_DO_CMD(dec_all, DPOT_DEC_ALL); +DPOT_DEVICE_DO_CMD(inc_all_6db, DPOT_INC_ALL_6DB); +DPOT_DEVICE_DO_CMD(dec_all_6db, DPOT_DEC_ALL_6DB); + +static struct attribute *ad525x_attributes_commands[] = { + &dev_attr_inc_all.attr, + &dev_attr_dec_all.attr, + &dev_attr_inc_all_6db.attr, + &dev_attr_dec_all_6db.attr, + NULL +}; + +static const struct attribute_group ad525x_group_commands = { + .attrs = ad525x_attributes_commands, +}; + +static int ad_dpot_add_files(struct device *dev, + unsigned features, unsigned rdac) +{ + int err = sysfs_create_file(&dev->kobj, + dpot_attrib_wipers[rdac]); + if (features & F_CMD_EEP) + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_eeprom[rdac]); + if (features & F_CMD_TOL) + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_tolerance[rdac]); + if (features & F_CMD_OTP) { + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_otp_en[rdac]); + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_otp[rdac]); + } + + if (err) + dev_err(dev, "failed to register sysfs hooks for RDAC%d\n", + rdac); + + return err; +} + +static inline void ad_dpot_remove_files(struct device *dev, + unsigned features, unsigned rdac) +{ + sysfs_remove_file(&dev->kobj, + dpot_attrib_wipers[rdac]); + if (features & F_CMD_EEP) + sysfs_remove_file(&dev->kobj, + dpot_attrib_eeprom[rdac]); + if (features & F_CMD_TOL) + sysfs_remove_file(&dev->kobj, + dpot_attrib_tolerance[rdac]); + if (features & F_CMD_OTP) { + sysfs_remove_file(&dev->kobj, + dpot_attrib_otp_en[rdac]); + sysfs_remove_file(&dev->kobj, + dpot_attrib_otp[rdac]); + } +} + +int ad_dpot_probe(struct device *dev, + struct ad_dpot_bus_data *bdata, unsigned long devid, + const char *name) +{ + + struct dpot_data *data; + int i, err = 0; + + data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + dev_set_drvdata(dev, data); + mutex_init(&data->update_lock); + + data->bdata = *bdata; + data->devid = devid; + + data->max_pos = 1 << DPOT_MAX_POS(devid); + data->rdac_mask = data->max_pos - 1; + data->feat = DPOT_FEAT(devid); + data->uid = DPOT_UID(devid); + data->wipers = DPOT_WIPERS(devid); + + for (i = DPOT_RDAC0; i < MAX_RDACS; i++) + if (data->wipers & (1 << i)) { + err = ad_dpot_add_files(dev, data->feat, i); + if (err) + goto exit_remove_files; + /* power-up midscale */ + if (data->feat & F_RDACS_WONLY) + data->rdac_cache[i] = data->max_pos / 2; + } + + if (data->feat & F_CMD_INC) + err = sysfs_create_group(&dev->kobj, &ad525x_group_commands); + + if (err) { + dev_err(dev, "failed to register sysfs hooks\n"); + goto exit_free; + } + + dev_info(dev, "%s %d-Position Digital Potentiometer registered\n", + name, data->max_pos); + + return 0; + +exit_remove_files: + for (i = DPOT_RDAC0; i < MAX_RDACS; i++) + if (data->wipers & (1 << i)) + ad_dpot_remove_files(dev, data->feat, i); + +exit_free: + kfree(data); + dev_set_drvdata(dev, NULL); +exit: + dev_err(dev, "failed to create client for %s ID 0x%lX\n", + name, devid); + return err; +} +EXPORT_SYMBOL(ad_dpot_probe); + +int ad_dpot_remove(struct device *dev) +{ + struct dpot_data *data = dev_get_drvdata(dev); + int i; + + for (i = DPOT_RDAC0; i < MAX_RDACS; i++) + if (data->wipers & (1 << i)) + ad_dpot_remove_files(dev, data->feat, i); + + kfree(data); + + return 0; +} +EXPORT_SYMBOL(ad_dpot_remove); + + +MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, " + "Michael Hennerich <hennerich@blackfin.uclinux.org>"); +MODULE_DESCRIPTION("Digital potentiometer driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/ad525x_dpot.h b/kernel/drivers/misc/ad525x_dpot.h new file mode 100644 index 000000000..6bd1eba23 --- /dev/null +++ b/kernel/drivers/misc/ad525x_dpot.h @@ -0,0 +1,215 @@ +/* + * Driver for the Analog Devices digital potentiometers + * + * Copyright (C) 2010 Michael Hennerich, Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef _AD_DPOT_H_ +#define _AD_DPOT_H_ + +#include <linux/types.h> + +#define DPOT_CONF(features, wipers, max_pos, uid) \ + (((features) << 18) | (((wipers) & 0xFF) << 10) | \ + ((max_pos & 0xF) << 6) | (uid & 0x3F)) + +#define DPOT_UID(conf) (conf & 0x3F) +#define DPOT_MAX_POS(conf) ((conf >> 6) & 0xF) +#define DPOT_WIPERS(conf) ((conf >> 10) & 0xFF) +#define DPOT_FEAT(conf) (conf >> 18) + +#define BRDAC0 (1 << 0) +#define BRDAC1 (1 << 1) +#define BRDAC2 (1 << 2) +#define BRDAC3 (1 << 3) +#define BRDAC4 (1 << 4) +#define BRDAC5 (1 << 5) +#define MAX_RDACS 6 + +#define F_CMD_INC (1 << 0) /* Features INC/DEC ALL, 6dB */ +#define F_CMD_EEP (1 << 1) /* Features EEPROM */ +#define F_CMD_OTP (1 << 2) /* Features OTP */ +#define F_CMD_TOL (1 << 3) /* RDACS feature Tolerance REG */ +#define F_RDACS_RW (1 << 4) /* RDACS are Read/Write */ +#define F_RDACS_WONLY (1 << 5) /* RDACS are Write only */ +#define F_AD_APPDATA (1 << 6) /* RDAC Address append to data */ +#define F_SPI_8BIT (1 << 7) /* All SPI XFERS are 8-bit */ +#define F_SPI_16BIT (1 << 8) /* All SPI XFERS are 16-bit */ +#define F_SPI_24BIT (1 << 9) /* All SPI XFERS are 24-bit */ + +#define F_RDACS_RW_TOL (F_RDACS_RW | F_CMD_EEP | F_CMD_TOL) +#define F_RDACS_RW_EEP (F_RDACS_RW | F_CMD_EEP) +#define F_SPI (F_SPI_8BIT | F_SPI_16BIT | F_SPI_24BIT) + +enum dpot_devid { + AD5258_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 6, 0), /* I2C */ + AD5259_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 8, 1), + AD5251_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC1 | BRDAC3, 6, 2), + AD5252_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC1 | BRDAC3, 8, 3), + AD5253_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 4), + AD5254_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 5), + AD5255_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2, 9, 6), + AD5160_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 7), /* SPI */ + AD5161_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 8), + AD5162_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 9), + AD5165_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 10), + AD5200_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 11), + AD5201_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 5, 12), + AD5203_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 13), + AD5204_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 14), + AD5206_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3 | BRDAC4 | BRDAC5, + 8, 15), + AD5207_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 16), + AD5231_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT, + BRDAC0, 10, 17), + AD5232_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 18), + AD5233_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 19), + AD5235_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT, + BRDAC0 | BRDAC1, 10, 20), + AD5260_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 21), + AD5262_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 22), + AD5263_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 23), + AD5290_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 24), + AD5291_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP, + BRDAC0, 8, 25), + AD5292_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP, + BRDAC0, 10, 26), + AD5293_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT, BRDAC0, 10, 27), + AD7376_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 7, 28), + AD8400_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0, 8, 29), + AD8402_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 30), + AD8403_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2, 8, 31), + ADN2850_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT, + BRDAC0 | BRDAC1, 10, 32), + AD5241_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 33), + AD5242_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 34), + AD5243_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 35), + AD5245_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 36), + AD5246_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 37), + AD5247_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 38), + AD5248_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 39), + AD5280_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 40), + AD5282_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 41), + ADN2860_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2, 9, 42), + AD5273_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 43), + AD5171_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 44), + AD5170_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 45), + AD5172_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 46), + AD5173_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 47), + AD5270_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT, + BRDAC0, 10, 48), + AD5271_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT, + BRDAC0, 8, 49), + AD5272_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 10, 50), + AD5274_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 51), +}; + +#define DPOT_RDAC0 0 +#define DPOT_RDAC1 1 +#define DPOT_RDAC2 2 +#define DPOT_RDAC3 3 +#define DPOT_RDAC4 4 +#define DPOT_RDAC5 5 + +#define DPOT_RDAC_MASK 0x1F + +#define DPOT_REG_TOL 0x18 +#define DPOT_TOL_RDAC0 (DPOT_REG_TOL | DPOT_RDAC0) +#define DPOT_TOL_RDAC1 (DPOT_REG_TOL | DPOT_RDAC1) +#define DPOT_TOL_RDAC2 (DPOT_REG_TOL | DPOT_RDAC2) +#define DPOT_TOL_RDAC3 (DPOT_REG_TOL | DPOT_RDAC3) +#define DPOT_TOL_RDAC4 (DPOT_REG_TOL | DPOT_RDAC4) +#define DPOT_TOL_RDAC5 (DPOT_REG_TOL | DPOT_RDAC5) + +/* RDAC-to-EEPROM Interface Commands */ +#define DPOT_ADDR_RDAC (0x0 << 5) +#define DPOT_ADDR_EEPROM (0x1 << 5) +#define DPOT_ADDR_OTP (0x1 << 6) +#define DPOT_ADDR_CMD (0x1 << 7) +#define DPOT_ADDR_OTP_EN (0x1 << 9) + +#define DPOT_DEC_ALL_6DB (DPOT_ADDR_CMD | (0x4 << 3)) +#define DPOT_INC_ALL_6DB (DPOT_ADDR_CMD | (0x9 << 3)) +#define DPOT_DEC_ALL (DPOT_ADDR_CMD | (0x6 << 3)) +#define DPOT_INC_ALL (DPOT_ADDR_CMD | (0xB << 3)) + +#define DPOT_SPI_RDAC 0xB0 +#define DPOT_SPI_EEPROM 0x30 +#define DPOT_SPI_READ_RDAC 0xA0 +#define DPOT_SPI_READ_EEPROM 0x90 +#define DPOT_SPI_DEC_ALL_6DB 0x50 +#define DPOT_SPI_INC_ALL_6DB 0xD0 +#define DPOT_SPI_DEC_ALL 0x70 +#define DPOT_SPI_INC_ALL 0xF0 + +/* AD5291/2/3 use special commands */ +#define DPOT_AD5291_RDAC 0x01 +#define DPOT_AD5291_READ_RDAC 0x02 +#define DPOT_AD5291_STORE_XTPM 0x03 +#define DPOT_AD5291_CTRLREG 0x06 +#define DPOT_AD5291_UNLOCK_CMD 0x03 + +/* AD5270/1/2/4 use special commands */ +#define DPOT_AD5270_1_2_4_RDAC 0x01 +#define DPOT_AD5270_1_2_4_READ_RDAC 0x02 +#define DPOT_AD5270_1_2_4_STORE_XTPM 0x03 +#define DPOT_AD5270_1_2_4_CTRLREG 0x07 +#define DPOT_AD5270_1_2_4_UNLOCK_CMD 0x03 + +#define DPOT_AD5282_RDAC_AB 0x80 + +#define DPOT_AD5273_FUSE 0x80 +#define DPOT_AD5170_2_3_FUSE 0x20 +#define DPOT_AD5170_2_3_OW 0x08 +#define DPOT_AD5172_3_A0 0x08 +#define DPOT_AD5170_2FUSE 0x80 + +struct dpot_data; + +struct ad_dpot_bus_ops { + int (*read_d8) (void *client); + int (*read_r8d8) (void *client, u8 reg); + int (*read_r8d16) (void *client, u8 reg); + int (*write_d8) (void *client, u8 val); + int (*write_r8d8) (void *client, u8 reg, u8 val); + int (*write_r8d16) (void *client, u8 reg, u16 val); +}; + +struct ad_dpot_bus_data { + void *client; + const struct ad_dpot_bus_ops *bops; +}; + +int ad_dpot_probe(struct device *dev, struct ad_dpot_bus_data *bdata, + unsigned long devid, const char *name); +int ad_dpot_remove(struct device *dev); + +#endif diff --git a/kernel/drivers/misc/altera-stapl/Kconfig b/kernel/drivers/misc/altera-stapl/Kconfig new file mode 100644 index 000000000..7f01d8e93 --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/Kconfig @@ -0,0 +1,8 @@ +comment "Altera FPGA firmware download module" + +config ALTERA_STAPL + tristate "Altera FPGA firmware download module" + depends on I2C + default n + help + An Altera FPGA module. Say Y when you want to support this tool. diff --git a/kernel/drivers/misc/altera-stapl/Makefile b/kernel/drivers/misc/altera-stapl/Makefile new file mode 100644 index 000000000..055f61ee7 --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/Makefile @@ -0,0 +1,3 @@ +altera-stapl-objs = altera-lpt.o altera-jtag.o altera-comp.o altera.o + +obj-$(CONFIG_ALTERA_STAPL) += altera-stapl.o diff --git a/kernel/drivers/misc/altera-stapl/altera-comp.c b/kernel/drivers/misc/altera-stapl/altera-comp.c new file mode 100644 index 000000000..49b103bed --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/altera-comp.c @@ -0,0 +1,142 @@ +/* + * altera-comp.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include "altera-exprt.h" + +#define SHORT_BITS 16 +#define CHAR_BITS 8 +#define DATA_BLOB_LENGTH 3 +#define MATCH_DATA_LENGTH 8192 +#define ALTERA_REQUEST_SIZE 1024 +#define ALTERA_BUFFER_SIZE (MATCH_DATA_LENGTH + ALTERA_REQUEST_SIZE) + +static u32 altera_bits_req(u32 n) +{ + u32 result = SHORT_BITS; + + if (n == 0) + result = 1; + else { + /* Look for the highest non-zero bit position */ + while ((n & (1 << (SHORT_BITS - 1))) == 0) { + n <<= 1; + --result; + } + } + + return result; +} + +static u32 altera_read_packed(u8 *buffer, u32 bits, u32 *bits_avail, + u32 *in_index) +{ + u32 result = 0; + u32 shift = 0; + u32 databyte = 0; + + while (bits > 0) { + databyte = buffer[*in_index]; + result |= (((databyte >> (CHAR_BITS - *bits_avail)) + & (0xff >> (CHAR_BITS - *bits_avail))) << shift); + + if (bits <= *bits_avail) { + result &= (0xffff >> (SHORT_BITS - (bits + shift))); + *bits_avail -= bits; + bits = 0; + } else { + ++(*in_index); + shift += *bits_avail; + bits -= *bits_avail; + *bits_avail = CHAR_BITS; + } + } + + return result; +} + +u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version) +{ + u32 i, j, data_length = 0L; + u32 offset, length; + u32 match_data_length = MATCH_DATA_LENGTH; + u32 bits_avail = CHAR_BITS; + u32 in_index = 0L; + + if (version > 0) + --match_data_length; + + for (i = 0; i < out_length; ++i) + out[i] = 0; + + /* Read number of bytes in data. */ + for (i = 0; i < sizeof(in_length); ++i) { + data_length = data_length | ( + altera_read_packed(in, + CHAR_BITS, + &bits_avail, + &in_index) << (i * CHAR_BITS)); + } + + if (data_length > out_length) { + data_length = 0L; + return data_length; + } + + i = 0; + while (i < data_length) { + /* A 0 bit indicates literal data. */ + if (altera_read_packed(in, 1, &bits_avail, + &in_index) == 0) { + for (j = 0; j < DATA_BLOB_LENGTH; ++j) { + if (i < data_length) { + out[i] = (u8)altera_read_packed(in, + CHAR_BITS, + &bits_avail, + &in_index); + i++; + } + } + } else { + /* A 1 bit indicates offset/length to follow. */ + offset = altera_read_packed(in, altera_bits_req((s16) + (i > match_data_length ? + match_data_length : i)), + &bits_avail, + &in_index); + length = altera_read_packed(in, CHAR_BITS, + &bits_avail, + &in_index); + for (j = 0; j < length; ++j) { + if (i < data_length) { + out[i] = out[i - offset]; + i++; + } + } + } + } + + return data_length; +} diff --git a/kernel/drivers/misc/altera-stapl/altera-exprt.h b/kernel/drivers/misc/altera-stapl/altera-exprt.h new file mode 100644 index 000000000..39c38d84a --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/altera-exprt.h @@ -0,0 +1,33 @@ +/* + * altera-exprt.h + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef ALTERA_EXPRT_H +#define ALTERA_EXPRT_H + + +u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version); +int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo); + +#endif /* ALTERA_EXPRT_H */ diff --git a/kernel/drivers/misc/altera-stapl/altera-jtag.c b/kernel/drivers/misc/altera-stapl/altera-jtag.c new file mode 100644 index 000000000..f4bf20096 --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/altera-jtag.c @@ -0,0 +1,1021 @@ +/* + * altera-jtag.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/slab.h> +#include <misc/altera.h> +#include "altera-exprt.h" +#include "altera-jtag.h" + +#define alt_jtag_io(a, b, c)\ + astate->config->jtag_io(astate->config->dev, a, b, c); + +#define alt_malloc(a) kzalloc(a, GFP_KERNEL); + +/* + * This structure shows, for each JTAG state, which state is reached after + * a single TCK clock cycle with TMS high or TMS low, respectively. This + * describes all possible state transitions in the JTAG state machine. + */ +struct altera_jtag_machine { + enum altera_jtag_state tms_high; + enum altera_jtag_state tms_low; +}; + +static const struct altera_jtag_machine altera_transitions[] = { + /* RESET */ { RESET, IDLE }, + /* IDLE */ { DRSELECT, IDLE }, + /* DRSELECT */ { IRSELECT, DRCAPTURE }, + /* DRCAPTURE */ { DREXIT1, DRSHIFT }, + /* DRSHIFT */ { DREXIT1, DRSHIFT }, + /* DREXIT1 */ { DRUPDATE, DRPAUSE }, + /* DRPAUSE */ { DREXIT2, DRPAUSE }, + /* DREXIT2 */ { DRUPDATE, DRSHIFT }, + /* DRUPDATE */ { DRSELECT, IDLE }, + /* IRSELECT */ { RESET, IRCAPTURE }, + /* IRCAPTURE */ { IREXIT1, IRSHIFT }, + /* IRSHIFT */ { IREXIT1, IRSHIFT }, + /* IREXIT1 */ { IRUPDATE, IRPAUSE }, + /* IRPAUSE */ { IREXIT2, IRPAUSE }, + /* IREXIT2 */ { IRUPDATE, IRSHIFT }, + /* IRUPDATE */ { DRSELECT, IDLE } +}; + +/* + * This table contains the TMS value to be used to take the NEXT STEP on + * the path to the desired state. The array index is the current state, + * and the bit position is the desired endstate. To find out which state + * is used as the intermediate state, look up the TMS value in the + * altera_transitions[] table. + */ +static const u16 altera_jtag_path_map[16] = { + /* RST RTI SDRS CDR SDR E1DR PDR E2DR */ + 0x0001, 0xFFFD, 0xFE01, 0xFFE7, 0xFFEF, 0xFF0F, 0xFFBF, 0xFFFF, + /* UDR SIRS CIR SIR E1IR PIR E2IR UIR */ + 0xFEFD, 0x0001, 0xF3FF, 0xF7FF, 0x87FF, 0xDFFF, 0xFFFF, 0x7FFD +}; + +/* Flag bits for alt_jtag_io() function */ +#define TMS_HIGH 1 +#define TMS_LOW 0 +#define TDI_HIGH 1 +#define TDI_LOW 0 +#define READ_TDO 1 +#define IGNORE_TDO 0 + +int altera_jinit(struct altera_state *astate) +{ + struct altera_jtag *js = &astate->js; + + /* initial JTAG state is unknown */ + js->jtag_state = ILLEGAL_JTAG_STATE; + + /* initialize to default state */ + js->drstop_state = IDLE; + js->irstop_state = IDLE; + js->dr_pre = 0; + js->dr_post = 0; + js->ir_pre = 0; + js->ir_post = 0; + js->dr_length = 0; + js->ir_length = 0; + + js->dr_pre_data = NULL; + js->dr_post_data = NULL; + js->ir_pre_data = NULL; + js->ir_post_data = NULL; + js->dr_buffer = NULL; + js->ir_buffer = NULL; + + return 0; +} + +int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state) +{ + js->drstop_state = state; + + return 0; +} + +int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state) +{ + js->irstop_state = state; + + return 0; +} + +int altera_set_dr_pre(struct altera_jtag *js, + u32 count, u32 start_index, + u8 *preamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->dr_pre) { + kfree(js->dr_pre_data); + js->dr_pre_data = (u8 *)alt_malloc((count + 7) >> 3); + if (js->dr_pre_data == NULL) + status = -ENOMEM; + else + js->dr_pre = count; + } else + js->dr_pre = count; + + if (status == 0) { + for (i = 0; i < count; ++i) { + j = i + start_index; + + if (preamble_data == NULL) + js->dr_pre_data[i >> 3] |= (1 << (i & 7)); + else { + if (preamble_data[j >> 3] & (1 << (j & 7))) + js->dr_pre_data[i >> 3] |= + (1 << (i & 7)); + else + js->dr_pre_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + } + + return status; +} + +int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index, + u8 *preamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->ir_pre) { + kfree(js->ir_pre_data); + js->ir_pre_data = (u8 *)alt_malloc((count + 7) >> 3); + if (js->ir_pre_data == NULL) + status = -ENOMEM; + else + js->ir_pre = count; + + } else + js->ir_pre = count; + + if (status == 0) { + for (i = 0; i < count; ++i) { + j = i + start_index; + if (preamble_data == NULL) + js->ir_pre_data[i >> 3] |= (1 << (i & 7)); + else { + if (preamble_data[j >> 3] & (1 << (j & 7))) + js->ir_pre_data[i >> 3] |= + (1 << (i & 7)); + else + js->ir_pre_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + } + + return status; +} + +int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->dr_post) { + kfree(js->dr_post_data); + js->dr_post_data = (u8 *)alt_malloc((count + 7) >> 3); + + if (js->dr_post_data == NULL) + status = -ENOMEM; + else + js->dr_post = count; + + } else + js->dr_post = count; + + if (status == 0) { + for (i = 0; i < count; ++i) { + j = i + start_index; + + if (postamble_data == NULL) + js->dr_post_data[i >> 3] |= (1 << (i & 7)); + else { + if (postamble_data[j >> 3] & (1 << (j & 7))) + js->dr_post_data[i >> 3] |= + (1 << (i & 7)); + else + js->dr_post_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + } + + return status; +} + +int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->ir_post) { + kfree(js->ir_post_data); + js->ir_post_data = (u8 *)alt_malloc((count + 7) >> 3); + if (js->ir_post_data == NULL) + status = -ENOMEM; + else + js->ir_post = count; + + } else + js->ir_post = count; + + if (status != 0) + return status; + + for (i = 0; i < count; ++i) { + j = i + start_index; + + if (postamble_data == NULL) + js->ir_post_data[i >> 3] |= (1 << (i & 7)); + else { + if (postamble_data[j >> 3] & (1 << (j & 7))) + js->ir_post_data[i >> 3] |= (1 << (i & 7)); + else + js->ir_post_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + + return status; +} + +static void altera_jreset_idle(struct altera_state *astate) +{ + struct altera_jtag *js = &astate->js; + int i; + /* Go to Test Logic Reset (no matter what the starting state may be) */ + for (i = 0; i < 5; ++i) + alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO); + + /* Now step to Run Test / Idle */ + alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO); + js->jtag_state = IDLE; +} + +int altera_goto_jstate(struct altera_state *astate, + enum altera_jtag_state state) +{ + struct altera_jtag *js = &astate->js; + int tms; + int count = 0; + int status = 0; + + if (js->jtag_state == ILLEGAL_JTAG_STATE) + /* initialize JTAG chain to known state */ + altera_jreset_idle(astate); + + if (js->jtag_state == state) { + /* + * We are already in the desired state. + * If it is a stable state, loop here. + * Otherwise do nothing (no clock cycles). + */ + if ((state == IDLE) || (state == DRSHIFT) || + (state == DRPAUSE) || (state == IRSHIFT) || + (state == IRPAUSE)) { + alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO); + } else if (state == RESET) + alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO); + + } else { + while ((js->jtag_state != state) && (count < 9)) { + /* Get TMS value to take a step toward desired state */ + tms = (altera_jtag_path_map[js->jtag_state] & + (1 << state)) + ? TMS_HIGH : TMS_LOW; + + /* Take a step */ + alt_jtag_io(tms, TDI_LOW, IGNORE_TDO); + + if (tms) + js->jtag_state = + altera_transitions[js->jtag_state].tms_high; + else + js->jtag_state = + altera_transitions[js->jtag_state].tms_low; + + ++count; + } + } + + if (js->jtag_state != state) + status = -EREMOTEIO; + + return status; +} + +int altera_wait_cycles(struct altera_state *astate, + s32 cycles, + enum altera_jtag_state wait_state) +{ + struct altera_jtag *js = &astate->js; + int tms; + s32 count; + int status = 0; + + if (js->jtag_state != wait_state) + status = altera_goto_jstate(astate, wait_state); + + if (status == 0) { + /* + * Set TMS high to loop in RESET state + * Set TMS low to loop in any other stable state + */ + tms = (wait_state == RESET) ? TMS_HIGH : TMS_LOW; + + for (count = 0L; count < cycles; count++) + alt_jtag_io(tms, TDI_LOW, IGNORE_TDO); + + } + + return status; +} + +int altera_wait_msecs(struct altera_state *astate, + s32 microseconds, enum altera_jtag_state wait_state) +/* + * Causes JTAG hardware to sit in the specified stable + * state for the specified duration of real time. If + * no JTAG operations have been performed yet, then only + * a delay is performed. This permits the WAIT USECS + * statement to be used in VECTOR programs without causing + * any JTAG operations. + * Returns 0 for success, else appropriate error code. + */ +{ + struct altera_jtag *js = &astate->js; + int status = 0; + + if ((js->jtag_state != ILLEGAL_JTAG_STATE) && + (js->jtag_state != wait_state)) + status = altera_goto_jstate(astate, wait_state); + + if (status == 0) + /* Wait for specified time interval */ + udelay(microseconds); + + return status; +} + +static void altera_concatenate_data(u8 *buffer, + u8 *preamble_data, + u32 preamble_count, + u8 *target_data, + u32 start_index, + u32 target_count, + u8 *postamble_data, + u32 postamble_count) +/* + * Copies preamble data, target data, and postamble data + * into one buffer for IR or DR scans. + */ +{ + u32 i, j, k; + + for (i = 0L; i < preamble_count; ++i) { + if (preamble_data[i >> 3L] & (1L << (i & 7L))) + buffer[i >> 3L] |= (1L << (i & 7L)); + else + buffer[i >> 3L] &= ~(u32)(1L << (i & 7L)); + + } + + j = start_index; + k = preamble_count + target_count; + for (; i < k; ++i, ++j) { + if (target_data[j >> 3L] & (1L << (j & 7L))) + buffer[i >> 3L] |= (1L << (i & 7L)); + else + buffer[i >> 3L] &= ~(u32)(1L << (i & 7L)); + + } + + j = 0L; + k = preamble_count + target_count + postamble_count; + for (; i < k; ++i, ++j) { + if (postamble_data[j >> 3L] & (1L << (j & 7L))) + buffer[i >> 3L] |= (1L << (i & 7L)); + else + buffer[i >> 3L] &= ~(u32)(1L << (i & 7L)); + + } +} + +static int alt_jtag_drscan(struct altera_state *astate, + int start_state, + int count, + u8 *tdi, + u8 *tdo) +{ + int i = 0; + int tdo_bit = 0; + int status = 1; + + /* First go to DRSHIFT state */ + switch (start_state) { + case 0: /* IDLE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(0, 0, 0); /* DRCAPTURE */ + alt_jtag_io(0, 0, 0); /* DRSHIFT */ + break; + + case 1: /* DRPAUSE */ + alt_jtag_io(1, 0, 0); /* DREXIT2 */ + alt_jtag_io(1, 0, 0); /* DRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(0, 0, 0); /* DRCAPTURE */ + alt_jtag_io(0, 0, 0); /* DRSHIFT */ + break; + + case 2: /* IRPAUSE */ + alt_jtag_io(1, 0, 0); /* IREXIT2 */ + alt_jtag_io(1, 0, 0); /* IRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(0, 0, 0); /* DRCAPTURE */ + alt_jtag_io(0, 0, 0); /* DRSHIFT */ + break; + + default: + status = 0; + } + + if (status) { + /* loop in the SHIFT-DR state */ + for (i = 0; i < count; i++) { + tdo_bit = alt_jtag_io( + (i == count - 1), + tdi[i >> 3] & (1 << (i & 7)), + (tdo != NULL)); + + if (tdo != NULL) { + if (tdo_bit) + tdo[i >> 3] |= (1 << (i & 7)); + else + tdo[i >> 3] &= ~(u32)(1 << (i & 7)); + + } + } + + alt_jtag_io(0, 0, 0); /* DRPAUSE */ + } + + return status; +} + +static int alt_jtag_irscan(struct altera_state *astate, + int start_state, + int count, + u8 *tdi, + u8 *tdo) +{ + int i = 0; + int tdo_bit = 0; + int status = 1; + + /* First go to IRSHIFT state */ + switch (start_state) { + case 0: /* IDLE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(1, 0, 0); /* IRSELECT */ + alt_jtag_io(0, 0, 0); /* IRCAPTURE */ + alt_jtag_io(0, 0, 0); /* IRSHIFT */ + break; + + case 1: /* DRPAUSE */ + alt_jtag_io(1, 0, 0); /* DREXIT2 */ + alt_jtag_io(1, 0, 0); /* DRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(1, 0, 0); /* IRSELECT */ + alt_jtag_io(0, 0, 0); /* IRCAPTURE */ + alt_jtag_io(0, 0, 0); /* IRSHIFT */ + break; + + case 2: /* IRPAUSE */ + alt_jtag_io(1, 0, 0); /* IREXIT2 */ + alt_jtag_io(1, 0, 0); /* IRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(1, 0, 0); /* IRSELECT */ + alt_jtag_io(0, 0, 0); /* IRCAPTURE */ + alt_jtag_io(0, 0, 0); /* IRSHIFT */ + break; + + default: + status = 0; + } + + if (status) { + /* loop in the SHIFT-IR state */ + for (i = 0; i < count; i++) { + tdo_bit = alt_jtag_io( + (i == count - 1), + tdi[i >> 3] & (1 << (i & 7)), + (tdo != NULL)); + if (tdo != NULL) { + if (tdo_bit) + tdo[i >> 3] |= (1 << (i & 7)); + else + tdo[i >> 3] &= ~(u32)(1 << (i & 7)); + + } + } + + alt_jtag_io(0, 0, 0); /* IRPAUSE */ + } + + return status; +} + +static void altera_extract_target_data(u8 *buffer, + u8 *target_data, + u32 start_index, + u32 preamble_count, + u32 target_count) +/* + * Copies target data from scan buffer, filtering out + * preamble and postamble data. + */ +{ + u32 i; + u32 j; + u32 k; + + j = preamble_count; + k = start_index + target_count; + for (i = start_index; i < k; ++i, ++j) { + if (buffer[j >> 3] & (1 << (j & 7))) + target_data[i >> 3] |= (1 << (i & 7)); + else + target_data[i >> 3] &= ~(u32)(1 << (i & 7)); + + } +} + +int altera_irscan(struct altera_state *astate, + u32 count, + u8 *tdi_data, + u32 start_index) +/* Shifts data into instruction register */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->ir_pre + count + js->ir_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->ir_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->ir_buffer); + js->ir_buffer = (u8 *)alt_malloc(alloc_chars); + if (js->ir_buffer == NULL) + status = -ENOMEM; + else + js->ir_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, IR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->ir_buffer, + js->ir_pre_data, + js->ir_pre, + tdi_data, + start_index, + count, + js->ir_post_data, + js->ir_post); + /* Do the IRSCAN */ + alt_jtag_irscan(astate, + start_code, + shift_count, + js->ir_buffer, + NULL); + + /* alt_jtag_irscan() always ends in IRPAUSE state */ + js->jtag_state = IRPAUSE; + } + + if (status == 0) + if (js->irstop_state != IRPAUSE) + status = altera_goto_jstate(astate, js->irstop_state); + + + return status; +} + +int altera_swap_ir(struct altera_state *astate, + u32 count, + u8 *in_data, + u32 in_index, + u8 *out_data, + u32 out_index) +/* Shifts data into instruction register, capturing output data */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->ir_pre + count + js->ir_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->ir_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->ir_buffer); + js->ir_buffer = (u8 *)alt_malloc(alloc_chars); + if (js->ir_buffer == NULL) + status = -ENOMEM; + else + js->ir_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, IR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->ir_buffer, + js->ir_pre_data, + js->ir_pre, + in_data, + in_index, + count, + js->ir_post_data, + js->ir_post); + + /* Do the IRSCAN */ + alt_jtag_irscan(astate, + start_code, + shift_count, + js->ir_buffer, + js->ir_buffer); + + /* alt_jtag_irscan() always ends in IRPAUSE state */ + js->jtag_state = IRPAUSE; + } + + if (status == 0) + if (js->irstop_state != IRPAUSE) + status = altera_goto_jstate(astate, js->irstop_state); + + + if (status == 0) + /* Now extract the returned data from the buffer */ + altera_extract_target_data(js->ir_buffer, + out_data, out_index, + js->ir_pre, count); + + return status; +} + +int altera_drscan(struct altera_state *astate, + u32 count, + u8 *tdi_data, + u32 start_index) +/* Shifts data into data register (ignoring output data) */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->dr_pre + count + js->dr_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->dr_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->dr_buffer); + js->dr_buffer = (u8 *)alt_malloc(alloc_chars); + if (js->dr_buffer == NULL) + status = -ENOMEM; + else + js->dr_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, DR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->dr_buffer, + js->dr_pre_data, + js->dr_pre, + tdi_data, + start_index, + count, + js->dr_post_data, + js->dr_post); + /* Do the DRSCAN */ + alt_jtag_drscan(astate, start_code, shift_count, + js->dr_buffer, NULL); + /* alt_jtag_drscan() always ends in DRPAUSE state */ + js->jtag_state = DRPAUSE; + } + + if (status == 0) + if (js->drstop_state != DRPAUSE) + status = altera_goto_jstate(astate, js->drstop_state); + + return status; +} + +int altera_swap_dr(struct altera_state *astate, u32 count, + u8 *in_data, u32 in_index, + u8 *out_data, u32 out_index) +/* Shifts data into data register, capturing output data */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->dr_pre + count + js->dr_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->dr_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->dr_buffer); + js->dr_buffer = (u8 *)alt_malloc(alloc_chars); + + if (js->dr_buffer == NULL) + status = -ENOMEM; + else + js->dr_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, DR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->dr_buffer, + js->dr_pre_data, + js->dr_pre, + in_data, + in_index, + count, + js->dr_post_data, + js->dr_post); + + /* Do the DRSCAN */ + alt_jtag_drscan(astate, + start_code, + shift_count, + js->dr_buffer, + js->dr_buffer); + + /* alt_jtag_drscan() always ends in DRPAUSE state */ + js->jtag_state = DRPAUSE; + } + + if (status == 0) + if (js->drstop_state != DRPAUSE) + status = altera_goto_jstate(astate, js->drstop_state); + + if (status == 0) + /* Now extract the returned data from the buffer */ + altera_extract_target_data(js->dr_buffer, + out_data, + out_index, + js->dr_pre, + count); + + return status; +} + +void altera_free_buffers(struct altera_state *astate) +{ + struct altera_jtag *js = &astate->js; + /* If the JTAG interface was used, reset it to TLR */ + if (js->jtag_state != ILLEGAL_JTAG_STATE) + altera_jreset_idle(astate); + + kfree(js->dr_pre_data); + js->dr_pre_data = NULL; + + kfree(js->dr_post_data); + js->dr_post_data = NULL; + + kfree(js->dr_buffer); + js->dr_buffer = NULL; + + kfree(js->ir_pre_data); + js->ir_pre_data = NULL; + + kfree(js->ir_post_data); + js->ir_post_data = NULL; + + kfree(js->ir_buffer); + js->ir_buffer = NULL; +} diff --git a/kernel/drivers/misc/altera-stapl/altera-jtag.h b/kernel/drivers/misc/altera-stapl/altera-jtag.h new file mode 100644 index 000000000..2f97e36a2 --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/altera-jtag.h @@ -0,0 +1,113 @@ +/* + * altera-jtag.h + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef ALTERA_JTAG_H +#define ALTERA_JTAG_H + +/* Function Prototypes */ +enum altera_jtag_state { + ILLEGAL_JTAG_STATE = -1, + RESET = 0, + IDLE = 1, + DRSELECT = 2, + DRCAPTURE = 3, + DRSHIFT = 4, + DREXIT1 = 5, + DRPAUSE = 6, + DREXIT2 = 7, + DRUPDATE = 8, + IRSELECT = 9, + IRCAPTURE = 10, + IRSHIFT = 11, + IREXIT1 = 12, + IRPAUSE = 13, + IREXIT2 = 14, + IRUPDATE = 15 + +}; + +struct altera_jtag { + /* Global variable to store the current JTAG state */ + enum altera_jtag_state jtag_state; + + /* Store current stop-state for DR and IR scan commands */ + enum altera_jtag_state drstop_state; + enum altera_jtag_state irstop_state; + + /* Store current padding values */ + u32 dr_pre; + u32 dr_post; + u32 ir_pre; + u32 ir_post; + u32 dr_length; + u32 ir_length; + u8 *dr_pre_data; + u8 *dr_post_data; + u8 *ir_pre_data; + u8 *ir_post_data; + u8 *dr_buffer; + u8 *ir_buffer; +}; + +#define ALTERA_STACK_SIZE 128 +#define ALTERA_MESSAGE_LENGTH 1024 + +struct altera_state { + struct altera_config *config; + struct altera_jtag js; + char msg_buff[ALTERA_MESSAGE_LENGTH + 1]; + long stack[ALTERA_STACK_SIZE]; +}; + +int altera_jinit(struct altera_state *astate); +int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state); +int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state); +int altera_set_dr_pre(struct altera_jtag *js, u32 count, u32 start_index, + u8 *preamble_data); +int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index, + u8 *preamble_data); +int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data); +int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data); +int altera_goto_jstate(struct altera_state *astate, + enum altera_jtag_state state); +int altera_wait_cycles(struct altera_state *astate, s32 cycles, + enum altera_jtag_state wait_state); +int altera_wait_msecs(struct altera_state *astate, s32 microseconds, + enum altera_jtag_state wait_state); +int altera_irscan(struct altera_state *astate, u32 count, + u8 *tdi_data, u32 start_index); +int altera_swap_ir(struct altera_state *astate, + u32 count, u8 *in_data, + u32 in_index, u8 *out_data, + u32 out_index); +int altera_drscan(struct altera_state *astate, u32 count, + u8 *tdi_data, u32 start_index); +int altera_swap_dr(struct altera_state *astate, u32 count, + u8 *in_data, u32 in_index, + u8 *out_data, u32 out_index); +void altera_free_buffers(struct altera_state *astate); +#endif /* ALTERA_JTAG_H */ diff --git a/kernel/drivers/misc/altera-stapl/altera-lpt.c b/kernel/drivers/misc/altera-stapl/altera-lpt.c new file mode 100644 index 000000000..91456a036 --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/altera-lpt.c @@ -0,0 +1,70 @@ +/* + * altera-lpt.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Abylay Ospan <aospan@netup.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/io.h> +#include <linux/kernel.h> +#include "altera-exprt.h" + +static int lpt_hardware_initialized; + +static void byteblaster_write(int port, int data) +{ + outb((u8)data, (u16)(port + 0x378)); +}; + +static int byteblaster_read(int port) +{ + int data = 0; + data = inb((u16)(port + 0x378)); + return data & 0xff; +}; + +int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo) +{ + int data = 0; + int tdo = 0; + int initial_lpt_ctrl = 0; + + if (!lpt_hardware_initialized) { + initial_lpt_ctrl = byteblaster_read(2); + byteblaster_write(2, (initial_lpt_ctrl | 0x02) & 0xdf); + lpt_hardware_initialized = 1; + } + + data = ((tdi ? 0x40 : 0) | (tms ? 0x02 : 0)); + + byteblaster_write(0, data); + + if (read_tdo) { + tdo = byteblaster_read(1); + tdo = ((tdo & 0x80) ? 0 : 1); + } + + byteblaster_write(0, data | 0x01); + + byteblaster_write(0, data); + + return tdo; +} diff --git a/kernel/drivers/misc/altera-stapl/altera.c b/kernel/drivers/misc/altera-stapl/altera.c new file mode 100644 index 000000000..bca2630d0 --- /dev/null +++ b/kernel/drivers/misc/altera-stapl/altera.c @@ -0,0 +1,2537 @@ +/* + * altera.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010,2011 NetUP Inc. + * Copyright (C) 2010,2011 Igor M. Liplianin <liplianin@netup.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <asm/unaligned.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <misc/altera.h> +#include "altera-exprt.h" +#include "altera-jtag.h" + +static int debug = 1; +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "enable debugging information"); + +MODULE_DESCRIPTION("altera FPGA kernel module"); +MODULE_AUTHOR("Igor M. Liplianin <liplianin@netup.ru>"); +MODULE_LICENSE("GPL"); + +#define dprintk(args...) \ + if (debug) { \ + printk(KERN_DEBUG args); \ + } + +enum altera_fpga_opcode { + OP_NOP = 0, + OP_DUP, + OP_SWP, + OP_ADD, + OP_SUB, + OP_MULT, + OP_DIV, + OP_MOD, + OP_SHL, + OP_SHR, + OP_NOT, + OP_AND, + OP_OR, + OP_XOR, + OP_INV, + OP_GT, + OP_LT, + OP_RET, + OP_CMPS, + OP_PINT, + OP_PRNT, + OP_DSS, + OP_DSSC, + OP_ISS, + OP_ISSC, + OP_DPR = 0x1c, + OP_DPRL, + OP_DPO, + OP_DPOL, + OP_IPR, + OP_IPRL, + OP_IPO, + OP_IPOL, + OP_PCHR, + OP_EXIT, + OP_EQU, + OP_POPT, + OP_ABS = 0x2c, + OP_BCH0, + OP_PSH0 = 0x2f, + OP_PSHL = 0x40, + OP_PSHV, + OP_JMP, + OP_CALL, + OP_NEXT, + OP_PSTR, + OP_SINT = 0x47, + OP_ST, + OP_ISTP, + OP_DSTP, + OP_SWPN, + OP_DUPN, + OP_POPV, + OP_POPE, + OP_POPA, + OP_JMPZ, + OP_DS, + OP_IS, + OP_DPRA, + OP_DPOA, + OP_IPRA, + OP_IPOA, + OP_EXPT, + OP_PSHE, + OP_PSHA, + OP_DYNA, + OP_EXPV = 0x5c, + OP_COPY = 0x80, + OP_REVA, + OP_DSC, + OP_ISC, + OP_WAIT, + OP_VS, + OP_CMPA = 0xc0, + OP_VSC, +}; + +struct altera_procinfo { + char *name; + u8 attrs; + struct altera_procinfo *next; +}; + +/* This function checks if enough parameters are available on the stack. */ +static int altera_check_stack(int stack_ptr, int count, int *status) +{ + if (stack_ptr < count) { + *status = -EOVERFLOW; + return 0; + } + + return 1; +} + +static void altera_export_int(char *key, s32 value) +{ + dprintk("Export: key = \"%s\", value = %d\n", key, value); +} + +#define HEX_LINE_CHARS 72 +#define HEX_LINE_BITS (HEX_LINE_CHARS * 4) + +static void altera_export_bool_array(char *key, u8 *data, s32 count) +{ + char string[HEX_LINE_CHARS + 1]; + s32 i, offset; + u32 size, line, lines, linebits, value, j, k; + + if (count > HEX_LINE_BITS) { + dprintk("Export: key = \"%s\", %d bits, value = HEX\n", + key, count); + lines = (count + (HEX_LINE_BITS - 1)) / HEX_LINE_BITS; + + for (line = 0; line < lines; ++line) { + if (line < (lines - 1)) { + linebits = HEX_LINE_BITS; + size = HEX_LINE_CHARS; + offset = count - ((line + 1) * HEX_LINE_BITS); + } else { + linebits = + count - ((lines - 1) * HEX_LINE_BITS); + size = (linebits + 3) / 4; + offset = 0L; + } + + string[size] = '\0'; + j = size - 1; + value = 0; + + for (k = 0; k < linebits; ++k) { + i = k + offset; + if (data[i >> 3] & (1 << (i & 7))) + value |= (1 << (i & 3)); + if ((i & 3) == 3) { + sprintf(&string[j], "%1x", value); + value = 0; + --j; + } + } + if ((k & 3) > 0) + sprintf(&string[j], "%1x", value); + + dprintk("%s\n", string); + } + + } else { + size = (count + 3) / 4; + string[size] = '\0'; + j = size - 1; + value = 0; + + for (i = 0; i < count; ++i) { + if (data[i >> 3] & (1 << (i & 7))) + value |= (1 << (i & 3)); + if ((i & 3) == 3) { + sprintf(&string[j], "%1x", value); + value = 0; + --j; + } + } + if ((i & 3) > 0) + sprintf(&string[j], "%1x", value); + + dprintk("Export: key = \"%s\", %d bits, value = HEX %s\n", + key, count, string); + } +} + +static int altera_execute(struct altera_state *astate, + u8 *p, + s32 program_size, + s32 *error_address, + int *exit_code, + int *format_version) +{ + struct altera_config *aconf = astate->config; + char *msg_buff = astate->msg_buff; + long *stack = astate->stack; + int status = 0; + u32 first_word = 0L; + u32 action_table = 0L; + u32 proc_table = 0L; + u32 str_table = 0L; + u32 sym_table = 0L; + u32 data_sect = 0L; + u32 code_sect = 0L; + u32 debug_sect = 0L; + u32 action_count = 0L; + u32 proc_count = 0L; + u32 sym_count = 0L; + long *vars = NULL; + s32 *var_size = NULL; + char *attrs = NULL; + u8 *proc_attributes = NULL; + u32 pc; + u32 opcode_address; + u32 args[3]; + u32 opcode; + u32 name_id; + u8 charbuf[4]; + long long_tmp; + u32 variable_id; + u8 *charptr_tmp; + u8 *charptr_tmp2; + long *longptr_tmp; + int version = 0; + int delta = 0; + int stack_ptr = 0; + u32 arg_count; + int done = 0; + int bad_opcode = 0; + u32 count; + u32 index; + u32 index2; + s32 long_count; + s32 long_idx; + s32 long_idx2; + u32 i; + u32 j; + u32 uncomp_size; + u32 offset; + u32 value; + int current_proc = 0; + int reverse; + + char *name; + + dprintk("%s\n", __func__); + + /* Read header information */ + if (program_size > 52L) { + first_word = get_unaligned_be32(&p[0]); + version = (first_word & 1L); + *format_version = version + 1; + delta = version * 8; + + action_table = get_unaligned_be32(&p[4]); + proc_table = get_unaligned_be32(&p[8]); + str_table = get_unaligned_be32(&p[4 + delta]); + sym_table = get_unaligned_be32(&p[16 + delta]); + data_sect = get_unaligned_be32(&p[20 + delta]); + code_sect = get_unaligned_be32(&p[24 + delta]); + debug_sect = get_unaligned_be32(&p[28 + delta]); + action_count = get_unaligned_be32(&p[40 + delta]); + proc_count = get_unaligned_be32(&p[44 + delta]); + sym_count = get_unaligned_be32(&p[48 + (2 * delta)]); + } + + if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) { + done = 1; + status = -EIO; + goto exit_done; + } + + if (sym_count <= 0) + goto exit_done; + + vars = kzalloc(sym_count * sizeof(long), GFP_KERNEL); + + if (vars == NULL) + status = -ENOMEM; + + if (status == 0) { + var_size = kzalloc(sym_count * sizeof(s32), GFP_KERNEL); + + if (var_size == NULL) + status = -ENOMEM; + } + + if (status == 0) { + attrs = kzalloc(sym_count, GFP_KERNEL); + + if (attrs == NULL) + status = -ENOMEM; + } + + if ((status == 0) && (version > 0)) { + proc_attributes = kzalloc(proc_count, GFP_KERNEL); + + if (proc_attributes == NULL) + status = -ENOMEM; + } + + if (status != 0) + goto exit_done; + + delta = version * 2; + + for (i = 0; i < sym_count; ++i) { + offset = (sym_table + ((11 + delta) * i)); + + value = get_unaligned_be32(&p[offset + 3 + delta]); + + attrs[i] = p[offset]; + + /* + * use bit 7 of attribute byte to indicate that + * this buffer was dynamically allocated + * and should be freed later + */ + attrs[i] &= 0x7f; + + var_size[i] = get_unaligned_be32(&p[offset + 7 + delta]); + + /* + * Attribute bits: + * bit 0: 0 = read-only, 1 = read-write + * bit 1: 0 = not compressed, 1 = compressed + * bit 2: 0 = not initialized, 1 = initialized + * bit 3: 0 = scalar, 1 = array + * bit 4: 0 = Boolean, 1 = integer + * bit 5: 0 = declared variable, + * 1 = compiler created temporary variable + */ + + if ((attrs[i] & 0x0c) == 0x04) + /* initialized scalar variable */ + vars[i] = value; + else if ((attrs[i] & 0x1e) == 0x0e) { + /* initialized compressed Boolean array */ + uncomp_size = get_unaligned_le32(&p[data_sect + value]); + + /* allocate a buffer for the uncompressed data */ + vars[i] = (long)kzalloc(uncomp_size, GFP_KERNEL); + if (vars[i] == 0L) + status = -ENOMEM; + else { + /* set flag so buffer will be freed later */ + attrs[i] |= 0x80; + + /* uncompress the data */ + if (altera_shrink(&p[data_sect + value], + var_size[i], + (u8 *)vars[i], + uncomp_size, + version) != uncomp_size) + /* decompression failed */ + status = -EIO; + else + var_size[i] = uncomp_size * 8L; + + } + } else if ((attrs[i] & 0x1e) == 0x0c) { + /* initialized Boolean array */ + vars[i] = value + data_sect + (long)p; + } else if ((attrs[i] & 0x1c) == 0x1c) { + /* initialized integer array */ + vars[i] = value + data_sect; + } else if ((attrs[i] & 0x0c) == 0x08) { + /* uninitialized array */ + + /* flag attrs so that memory is freed */ + attrs[i] |= 0x80; + + if (var_size[i] > 0) { + u32 size; + + if (attrs[i] & 0x10) + /* integer array */ + size = (var_size[i] * sizeof(s32)); + else + /* Boolean array */ + size = ((var_size[i] + 7L) / 8L); + + vars[i] = (long)kzalloc(size, GFP_KERNEL); + + if (vars[i] == 0) { + status = -ENOMEM; + } else { + /* zero out memory */ + for (j = 0; j < size; ++j) + ((u8 *)(vars[i]))[j] = 0; + + } + } else + vars[i] = 0; + + } else + vars[i] = 0; + + } + +exit_done: + if (status != 0) + done = 1; + + altera_jinit(astate); + + pc = code_sect; + msg_buff[0] = '\0'; + + /* + * For JBC version 2, we will execute the procedures corresponding to + * the selected ACTION + */ + if (version > 0) { + if (aconf->action == NULL) { + status = -EINVAL; + done = 1; + } else { + int action_found = 0; + for (i = 0; (i < action_count) && !action_found; ++i) { + name_id = get_unaligned_be32(&p[action_table + + (12 * i)]); + + name = &p[str_table + name_id]; + + if (strncasecmp(aconf->action, name, strlen(name)) == 0) { + action_found = 1; + current_proc = + get_unaligned_be32(&p[action_table + + (12 * i) + 8]); + } + } + + if (!action_found) { + status = -EINVAL; + done = 1; + } + } + + if (status == 0) { + int first_time = 1; + i = current_proc; + while ((i != 0) || first_time) { + first_time = 0; + /* check procedure attribute byte */ + proc_attributes[i] = + (p[proc_table + + (13 * i) + 8] & + 0x03); + + /* + * BIT0 - OPTIONAL + * BIT1 - RECOMMENDED + * BIT6 - FORCED OFF + * BIT7 - FORCED ON + */ + + i = get_unaligned_be32(&p[proc_table + + (13 * i) + 4]); + } + + /* + * Set current_proc to the first procedure + * to be executed + */ + i = current_proc; + while ((i != 0) && + ((proc_attributes[i] == 1) || + ((proc_attributes[i] & 0xc0) == 0x40))) { + i = get_unaligned_be32(&p[proc_table + + (13 * i) + 4]); + } + + if ((i != 0) || ((i == 0) && (current_proc == 0) && + ((proc_attributes[0] != 1) && + ((proc_attributes[0] & 0xc0) != 0x40)))) { + current_proc = i; + pc = code_sect + + get_unaligned_be32(&p[proc_table + + (13 * i) + 9]); + if ((pc < code_sect) || (pc >= debug_sect)) + status = -ERANGE; + } else + /* there are no procedures to execute! */ + done = 1; + + } + } + + msg_buff[0] = '\0'; + + while (!done) { + opcode = (p[pc] & 0xff); + opcode_address = pc; + ++pc; + + if (debug > 1) + printk("opcode: %02x\n", opcode); + + arg_count = (opcode >> 6) & 3; + for (i = 0; i < arg_count; ++i) { + args[i] = get_unaligned_be32(&p[pc]); + pc += 4; + } + + switch (opcode) { + case OP_NOP: + break; + case OP_DUP: + if (altera_check_stack(stack_ptr, 1, &status)) { + stack[stack_ptr] = stack[stack_ptr - 1]; + ++stack_ptr; + } + break; + case OP_SWP: + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + break; + case OP_ADD: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] += stack[stack_ptr]; + } + break; + case OP_SUB: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] -= stack[stack_ptr]; + } + break; + case OP_MULT: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] *= stack[stack_ptr]; + } + break; + case OP_DIV: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] /= stack[stack_ptr]; + } + break; + case OP_MOD: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] %= stack[stack_ptr]; + } + break; + case OP_SHL: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] <<= stack[stack_ptr]; + } + break; + case OP_SHR: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] >>= stack[stack_ptr]; + } + break; + case OP_NOT: + if (altera_check_stack(stack_ptr, 1, &status)) + stack[stack_ptr - 1] ^= (-1L); + + break; + case OP_AND: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] &= stack[stack_ptr]; + } + break; + case OP_OR: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] |= stack[stack_ptr]; + } + break; + case OP_XOR: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] ^= stack[stack_ptr]; + } + break; + case OP_INV: + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + stack[stack_ptr - 1] = stack[stack_ptr - 1] ? 0L : 1L; + break; + case OP_GT: + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + --stack_ptr; + stack[stack_ptr - 1] = + (stack[stack_ptr - 1] > stack[stack_ptr]) ? + 1L : 0L; + + break; + case OP_LT: + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + --stack_ptr; + stack[stack_ptr - 1] = + (stack[stack_ptr - 1] < stack[stack_ptr]) ? + 1L : 0L; + + break; + case OP_RET: + if ((version > 0) && (stack_ptr == 0)) { + /* + * We completed one of the main procedures + * of an ACTION. + * Find the next procedure + * to be executed and jump to it. + * If there are no more procedures, then EXIT. + */ + i = get_unaligned_be32(&p[proc_table + + (13 * current_proc) + 4]); + while ((i != 0) && + ((proc_attributes[i] == 1) || + ((proc_attributes[i] & 0xc0) == 0x40))) + i = get_unaligned_be32(&p[proc_table + + (13 * i) + 4]); + + if (i == 0) { + /* no procedures to execute! */ + done = 1; + *exit_code = 0; /* success */ + } else { + current_proc = i; + pc = code_sect + get_unaligned_be32( + &p[proc_table + + (13 * i) + 9]); + if ((pc < code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + } + + } else + if (altera_check_stack(stack_ptr, 1, &status)) { + pc = stack[--stack_ptr] + code_sect; + if ((pc <= code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + + } + + break; + case OP_CMPS: + /* + * Array short compare + * ...stack 0 is source 1 value + * ...stack 1 is source 2 value + * ...stack 2 is mask value + * ...stack 3 is count + */ + if (altera_check_stack(stack_ptr, 4, &status)) { + s32 a = stack[--stack_ptr]; + s32 b = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + count = stack[stack_ptr - 1]; + + if ((count < 1) || (count > 32)) + status = -ERANGE; + else { + long_tmp &= ((-1L) >> (32 - count)); + + stack[stack_ptr - 1] = + ((a & long_tmp) == (b & long_tmp)) + ? 1L : 0L; + } + } + break; + case OP_PINT: + /* + * PRINT add integer + * ...stack 0 is integer value + */ + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + sprintf(&msg_buff[strlen(msg_buff)], + "%ld", stack[--stack_ptr]); + break; + case OP_PRNT: + /* PRINT finish */ + if (debug) + printk(msg_buff, "\n"); + + msg_buff[0] = '\0'; + break; + case OP_DSS: + /* + * DRSCAN short + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_drscan(astate, count, charbuf, 0); + break; + case OP_DSSC: + /* + * DRSCAN short with capture + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[stack_ptr - 1]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_swap_dr(astate, count, charbuf, + 0, charbuf, 0); + stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]); + break; + case OP_ISS: + /* + * IRSCAN short + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_irscan(astate, count, charbuf, 0); + break; + case OP_ISSC: + /* + * IRSCAN short with capture + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[stack_ptr - 1]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_swap_ir(astate, count, charbuf, + 0, charbuf, 0); + stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]); + break; + case OP_DPR: + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + count = stack[--stack_ptr]; + status = altera_set_dr_pre(&astate->js, count, 0, NULL); + break; + case OP_DPRL: + /* + * DRPRE with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_dr_pre(&astate->js, count, 0, + charbuf); + break; + case OP_DPO: + /* + * DRPOST + * ...stack 0 is count + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + count = stack[--stack_ptr]; + status = altera_set_dr_post(&astate->js, count, + 0, NULL); + } + break; + case OP_DPOL: + /* + * DRPOST with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_dr_post(&astate->js, count, 0, + charbuf); + break; + case OP_IPR: + if (altera_check_stack(stack_ptr, 1, &status)) { + count = stack[--stack_ptr]; + status = altera_set_ir_pre(&astate->js, count, + 0, NULL); + } + break; + case OP_IPRL: + /* + * IRPRE with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (altera_check_stack(stack_ptr, 2, &status)) { + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_ir_pre(&astate->js, count, + 0, charbuf); + } + break; + case OP_IPO: + /* + * IRPOST + * ...stack 0 is count + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + count = stack[--stack_ptr]; + status = altera_set_ir_post(&astate->js, count, + 0, NULL); + } + break; + case OP_IPOL: + /* + * IRPOST with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_ir_post(&astate->js, count, 0, + charbuf); + break; + case OP_PCHR: + if (altera_check_stack(stack_ptr, 1, &status)) { + u8 ch; + count = strlen(msg_buff); + ch = (char) stack[--stack_ptr]; + if ((ch < 1) || (ch > 127)) { + /* + * character code out of range + * instead of flagging an error, + * force the value to 127 + */ + ch = 127; + } + msg_buff[count] = ch; + msg_buff[count + 1] = '\0'; + } + break; + case OP_EXIT: + if (altera_check_stack(stack_ptr, 1, &status)) + *exit_code = stack[--stack_ptr]; + + done = 1; + break; + case OP_EQU: + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + --stack_ptr; + stack[stack_ptr - 1] = + (stack[stack_ptr - 1] == stack[stack_ptr]) ? + 1L : 0L; + break; + case OP_POPT: + if (altera_check_stack(stack_ptr, 1, &status)) + --stack_ptr; + + break; + case OP_ABS: + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + if (stack[stack_ptr - 1] < 0) + stack[stack_ptr - 1] = 0 - stack[stack_ptr - 1]; + + break; + case OP_BCH0: + /* + * Batch operation 0 + * SWP + * SWPN 7 + * SWP + * SWPN 6 + * DUPN 8 + * SWPN 2 + * SWP + * DUPN 6 + * DUPN 6 + */ + + /* SWP */ + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWPN 7 */ + index = 7 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWP */ + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWPN 6 */ + index = 6 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* DUPN 8 */ + index = 8 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + + /* SWPN 2 */ + index = 2 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWP */ + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* DUPN 6 */ + index = 6 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + + /* DUPN 6 */ + index = 6 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + break; + case OP_PSH0: + stack[stack_ptr++] = 0; + break; + case OP_PSHL: + stack[stack_ptr++] = (s32) args[0]; + break; + case OP_PSHV: + stack[stack_ptr++] = vars[args[0]]; + break; + case OP_JMP: + pc = args[0] + code_sect; + if ((pc < code_sect) || (pc >= debug_sect)) + status = -ERANGE; + break; + case OP_CALL: + stack[stack_ptr++] = pc; + pc = args[0] + code_sect; + if ((pc < code_sect) || (pc >= debug_sect)) + status = -ERANGE; + break; + case OP_NEXT: + /* + * Process FOR / NEXT loop + * ...argument 0 is variable ID + * ...stack 0 is step value + * ...stack 1 is end value + * ...stack 2 is top address + */ + if (altera_check_stack(stack_ptr, 3, &status)) { + s32 step = stack[stack_ptr - 1]; + s32 end = stack[stack_ptr - 2]; + s32 top = stack[stack_ptr - 3]; + s32 iterator = vars[args[0]]; + int break_out = 0; + + if (step < 0) { + if (iterator <= end) + break_out = 1; + } else if (iterator >= end) + break_out = 1; + + if (break_out) { + stack_ptr -= 3; + } else { + vars[args[0]] = iterator + step; + pc = top + code_sect; + if ((pc < code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + } + } + break; + case OP_PSTR: + /* + * PRINT add string + * ...argument 0 is string ID + */ + count = strlen(msg_buff); + strlcpy(&msg_buff[count], + &p[str_table + args[0]], + ALTERA_MESSAGE_LENGTH - count); + break; + case OP_SINT: + /* + * STATE intermediate state + * ...argument 0 is state code + */ + status = altera_goto_jstate(astate, args[0]); + break; + case OP_ST: + /* + * STATE final state + * ...argument 0 is state code + */ + status = altera_goto_jstate(astate, args[0]); + break; + case OP_ISTP: + /* + * IRSTOP state + * ...argument 0 is state code + */ + status = altera_set_irstop(&astate->js, args[0]); + break; + case OP_DSTP: + /* + * DRSTOP state + * ...argument 0 is state code + */ + status = altera_set_drstop(&astate->js, args[0]); + break; + + case OP_SWPN: + /* + * Exchange top with Nth stack value + * ...argument 0 is 0-based stack entry + * to swap with top element + */ + index = (args[0]) + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + break; + case OP_DUPN: + /* + * Duplicate Nth stack value + * ...argument 0 is 0-based stack entry to duplicate + */ + index = (args[0]) + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + break; + case OP_POPV: + /* + * Pop stack into scalar variable + * ...argument 0 is variable ID + * ...stack 0 is value + */ + if (altera_check_stack(stack_ptr, 1, &status)) + vars[args[0]] = stack[--stack_ptr]; + + break; + case OP_POPE: + /* + * Pop stack into integer array element + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is value + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + variable_id = args[0]; + + /* + * If variable is read-only, + * convert to writable array + */ + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x1c)) { + /* Allocate a writable buffer for this array */ + count = var_size[variable_id]; + long_tmp = vars[variable_id]; + longptr_tmp = kzalloc(count * sizeof(long), + GFP_KERNEL); + vars[variable_id] = (long)longptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* copy previous contents into buffer */ + for (i = 0; i < count; ++i) { + longptr_tmp[i] = + get_unaligned_be32(&p[long_tmp]); + long_tmp += sizeof(long); + } + + /* + * set bit 7 - buffer was + * dynamically allocated + */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + + } + + /* check that variable is a writable integer array */ + if ((attrs[variable_id] & 0x1c) != 0x18) + status = -ERANGE; + else { + longptr_tmp = (long *)vars[variable_id]; + + /* pop the array index */ + index = stack[--stack_ptr]; + + /* pop the value and store it into the array */ + longptr_tmp[index] = stack[--stack_ptr]; + } + + break; + case OP_POPA: + /* + * Pop stack into Boolean array + * ...argument 0 is variable ID + * ...stack 0 is count + * ...stack 1 is array index + * ...stack 2 is value + */ + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + variable_id = args[0]; + + /* + * If variable is read-only, + * convert to writable array + */ + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x0c)) { + /* Allocate a writable buffer for this array */ + long_tmp = + (var_size[variable_id] + 7L) >> 3L; + charptr_tmp2 = (u8 *)vars[variable_id]; + charptr_tmp = + kzalloc(long_tmp, GFP_KERNEL); + vars[variable_id] = (long)charptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* zero the buffer */ + for (long_idx = 0L; + long_idx < long_tmp; + ++long_idx) { + charptr_tmp[long_idx] = 0; + } + + /* copy previous contents into buffer */ + for (long_idx = 0L; + long_idx < var_size[variable_id]; + ++long_idx) { + long_idx2 = long_idx; + + if (charptr_tmp2[long_idx2 >> 3] & + (1 << (long_idx2 & 7))) { + charptr_tmp[long_idx >> 3] |= + (1 << (long_idx & 7)); + } + } + + /* + * set bit 7 - buffer was + * dynamically allocated + */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + + } + + /* + * check that variable is + * a writable Boolean array + */ + if ((attrs[variable_id] & 0x1c) != 0x08) { + status = -ERANGE; + break; + } + + charptr_tmp = (u8 *)vars[variable_id]; + + /* pop the count (number of bits to copy) */ + long_count = stack[--stack_ptr]; + + /* pop the array index */ + long_idx = stack[--stack_ptr]; + + reverse = 0; + + if (version > 0) { + /* + * stack 0 = array right index + * stack 1 = array left index + */ + + if (long_idx > long_count) { + reverse = 1; + long_tmp = long_count; + long_count = 1 + long_idx - + long_count; + long_idx = long_tmp; + + /* reverse POPA is not supported */ + status = -ERANGE; + break; + } else + long_count = 1 + long_count - + long_idx; + + } + + /* pop the data */ + long_tmp = stack[--stack_ptr]; + + if (long_count < 1) { + status = -ERANGE; + break; + } + + for (i = 0; i < long_count; ++i) { + if (long_tmp & (1L << (s32) i)) + charptr_tmp[long_idx >> 3L] |= + (1L << (long_idx & 7L)); + else + charptr_tmp[long_idx >> 3L] &= + ~(1L << (long_idx & 7L)); + + ++long_idx; + } + + break; + case OP_JMPZ: + /* + * Pop stack and branch if zero + * ...argument 0 is address + * ...stack 0 is condition value + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + if (stack[--stack_ptr] == 0) { + pc = args[0] + code_sect; + if ((pc < code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + } + } + break; + case OP_DS: + case OP_IS: + /* + * DRSCAN + * IRSCAN + * ...argument 0 is scan data variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_idx = stack[--stack_ptr]; + long_count = stack[--stack_ptr]; + reverse = 0; + if (version > 0) { + /* + * stack 0 = array right index + * stack 1 = array left index + * stack 2 = count + */ + long_tmp = long_count; + long_count = stack[--stack_ptr]; + + if (long_idx > long_tmp) { + reverse = 1; + long_idx = long_tmp; + } + } + + charptr_tmp = (u8 *)vars[args[0]]; + + if (reverse) { + /* + * allocate a buffer + * and reverse the data order + */ + charptr_tmp2 = charptr_tmp; + charptr_tmp = kzalloc((long_count >> 3) + 1, + GFP_KERNEL); + if (charptr_tmp == NULL) { + status = -ENOMEM; + break; + } + + long_tmp = long_idx + long_count - 1; + long_idx2 = 0; + while (long_idx2 < long_count) { + if (charptr_tmp2[long_tmp >> 3] & + (1 << (long_tmp & 7))) + charptr_tmp[long_idx2 >> 3] |= + (1 << (long_idx2 & 7)); + else + charptr_tmp[long_idx2 >> 3] &= + ~(1 << (long_idx2 & 7)); + + --long_tmp; + ++long_idx2; + } + } + + if (opcode == 0x51) /* DS */ + status = altera_drscan(astate, long_count, + charptr_tmp, long_idx); + else /* IS */ + status = altera_irscan(astate, long_count, + charptr_tmp, long_idx); + + if (reverse) + kfree(charptr_tmp); + + break; + case OP_DPRA: + /* + * DRPRE with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_dr_pre(&astate->js, count, index, + charptr_tmp); + break; + case OP_DPOA: + /* + * DRPOST with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_dr_post(&astate->js, count, index, + charptr_tmp); + break; + case OP_IPRA: + /* + * IRPRE with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_ir_pre(&astate->js, count, index, + charptr_tmp); + + break; + case OP_IPOA: + /* + * IRPOST with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_ir_post(&astate->js, count, index, + charptr_tmp); + + break; + case OP_EXPT: + /* + * EXPORT + * ...argument 0 is string ID + * ...stack 0 is integer expression + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + name = &p[str_table + args[0]]; + long_tmp = stack[--stack_ptr]; + altera_export_int(name, long_tmp); + } + break; + case OP_PSHE: + /* + * Push integer array element + * ...argument 0 is variable ID + * ...stack 0 is array index + */ + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + variable_id = args[0]; + index = stack[stack_ptr - 1]; + + /* check variable type */ + if ((attrs[variable_id] & 0x1f) == 0x19) { + /* writable integer array */ + longptr_tmp = (long *)vars[variable_id]; + stack[stack_ptr - 1] = longptr_tmp[index]; + } else if ((attrs[variable_id] & 0x1f) == 0x1c) { + /* read-only integer array */ + long_tmp = vars[variable_id] + + (index * sizeof(long)); + stack[stack_ptr - 1] = + get_unaligned_be32(&p[long_tmp]); + } else + status = -ERANGE; + + break; + case OP_PSHA: + /* + * Push Boolean array + * ...argument 0 is variable ID + * ...stack 0 is count + * ...stack 1 is array index + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + variable_id = args[0]; + + /* check that variable is a Boolean array */ + if ((attrs[variable_id] & 0x18) != 0x08) { + status = -ERANGE; + break; + } + + charptr_tmp = (u8 *)vars[variable_id]; + + /* pop the count (number of bits to copy) */ + count = stack[--stack_ptr]; + + /* pop the array index */ + index = stack[stack_ptr - 1]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + if ((count < 1) || (count > 32)) { + status = -ERANGE; + break; + } + + long_tmp = 0L; + + for (i = 0; i < count; ++i) + if (charptr_tmp[(i + index) >> 3] & + (1 << ((i + index) & 7))) + long_tmp |= (1L << i); + + stack[stack_ptr - 1] = long_tmp; + + break; + case OP_DYNA: + /* + * Dynamically change size of array + * ...argument 0 is variable ID + * ...stack 0 is new size + */ + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + variable_id = args[0]; + long_tmp = stack[--stack_ptr]; + + if (long_tmp > var_size[variable_id]) { + var_size[variable_id] = long_tmp; + + if (attrs[variable_id] & 0x10) + /* allocate integer array */ + long_tmp *= sizeof(long); + else + /* allocate Boolean array */ + long_tmp = (long_tmp + 7) >> 3; + + /* + * If the buffer was previously allocated, + * free it + */ + if (attrs[variable_id] & 0x80) { + kfree((void *)vars[variable_id]); + vars[variable_id] = 0; + } + + /* + * Allocate a new buffer + * of the requested size + */ + vars[variable_id] = (long) + kzalloc(long_tmp, GFP_KERNEL); + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* + * Set the attribute bit to indicate that + * this buffer was dynamically allocated and + * should be freed later + */ + attrs[variable_id] |= 0x80; + + /* zero out memory */ + count = ((var_size[variable_id] + 7L) / + 8L); + charptr_tmp = (u8 *)(vars[variable_id]); + for (index = 0; index < count; ++index) + charptr_tmp[index] = 0; + + } + + break; + case OP_EXPV: + /* + * Export Boolean array + * ...argument 0 is string ID + * ...stack 0 is variable ID + * ...stack 1 is array right index + * ...stack 2 is array left index + */ + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + if (version == 0) { + /* EXPV is not supported in JBC 1.0 */ + bad_opcode = 1; + break; + } + name = &p[str_table + args[0]]; + variable_id = stack[--stack_ptr]; + long_idx = stack[--stack_ptr];/* right indx */ + long_idx2 = stack[--stack_ptr];/* left indx */ + + if (long_idx > long_idx2) { + /* reverse indices not supported */ + status = -ERANGE; + break; + } + + long_count = 1 + long_idx2 - long_idx; + + charptr_tmp = (u8 *)vars[variable_id]; + charptr_tmp2 = NULL; + + if ((long_idx & 7L) != 0) { + s32 k = long_idx; + charptr_tmp2 = + kzalloc(((long_count + 7L) / 8L), + GFP_KERNEL); + if (charptr_tmp2 == NULL) { + status = -ENOMEM; + break; + } + + for (i = 0; i < long_count; ++i) { + if (charptr_tmp[k >> 3] & + (1 << (k & 7))) + charptr_tmp2[i >> 3] |= + (1 << (i & 7)); + else + charptr_tmp2[i >> 3] &= + ~(1 << (i & 7)); + + ++k; + } + charptr_tmp = charptr_tmp2; + + } else if (long_idx != 0) + charptr_tmp = &charptr_tmp[long_idx >> 3]; + + altera_export_bool_array(name, charptr_tmp, + long_count); + + /* free allocated buffer */ + if ((long_idx & 7L) != 0) + kfree(charptr_tmp2); + + break; + case OP_COPY: { + /* + * Array copy + * ...argument 0 is dest ID + * ...argument 1 is source ID + * ...stack 0 is count + * ...stack 1 is dest index + * ...stack 2 is source index + */ + s32 copy_count; + s32 copy_index; + s32 copy_index2; + s32 destleft; + s32 src_count; + s32 dest_count; + int src_reverse = 0; + int dest_reverse = 0; + + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + + copy_count = stack[--stack_ptr]; + copy_index = stack[--stack_ptr]; + copy_index2 = stack[--stack_ptr]; + reverse = 0; + + if (version > 0) { + /* + * stack 0 = source right index + * stack 1 = source left index + * stack 2 = destination right index + * stack 3 = destination left index + */ + destleft = stack[--stack_ptr]; + + if (copy_count > copy_index) { + src_reverse = 1; + reverse = 1; + src_count = 1 + copy_count - copy_index; + /* copy_index = source start index */ + } else { + src_count = 1 + copy_index - copy_count; + /* source start index */ + copy_index = copy_count; + } + + if (copy_index2 > destleft) { + dest_reverse = 1; + reverse = !reverse; + dest_count = 1 + copy_index2 - destleft; + /* destination start index */ + copy_index2 = destleft; + } else + dest_count = 1 + destleft - copy_index2; + + copy_count = (src_count < dest_count) ? + src_count : dest_count; + + if ((src_reverse || dest_reverse) && + (src_count != dest_count)) + /* + * If either the source or destination + * is reversed, we can't tolerate + * a length mismatch, because we + * "left justify" arrays when copying. + * This won't work correctly + * with reversed arrays. + */ + status = -ERANGE; + + } + + count = copy_count; + index = copy_index; + index2 = copy_index2; + + /* + * If destination is a read-only array, + * allocate a buffer and convert it to a writable array + */ + variable_id = args[1]; + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x0c)) { + /* Allocate a writable buffer for this array */ + long_tmp = + (var_size[variable_id] + 7L) >> 3L; + charptr_tmp2 = (u8 *)vars[variable_id]; + charptr_tmp = + kzalloc(long_tmp, GFP_KERNEL); + vars[variable_id] = (long)charptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* zero the buffer */ + for (long_idx = 0L; long_idx < long_tmp; + ++long_idx) + charptr_tmp[long_idx] = 0; + + /* copy previous contents into buffer */ + for (long_idx = 0L; + long_idx < var_size[variable_id]; + ++long_idx) { + long_idx2 = long_idx; + + if (charptr_tmp2[long_idx2 >> 3] & + (1 << (long_idx2 & 7))) + charptr_tmp[long_idx >> 3] |= + (1 << (long_idx & 7)); + + } + + /* + set bit 7 - buffer was dynamically allocated */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + } + + charptr_tmp = (u8 *)vars[args[1]]; + charptr_tmp2 = (u8 *)vars[args[0]]; + + /* check if destination is a writable Boolean array */ + if ((attrs[args[1]] & 0x1c) != 0x08) { + status = -ERANGE; + break; + } + + if (count < 1) { + status = -ERANGE; + break; + } + + if (reverse) + index2 += (count - 1); + + for (i = 0; i < count; ++i) { + if (charptr_tmp2[index >> 3] & + (1 << (index & 7))) + charptr_tmp[index2 >> 3] |= + (1 << (index2 & 7)); + else + charptr_tmp[index2 >> 3] &= + ~(1 << (index2 & 7)); + + ++index; + if (reverse) + --index2; + else + ++index2; + } + + break; + } + case OP_DSC: + case OP_ISC: { + /* + * DRSCAN with capture + * IRSCAN with capture + * ...argument 0 is scan data variable ID + * ...argument 1 is capture variable ID + * ...stack 0 is capture index + * ...stack 1 is scan data index + * ...stack 2 is count + */ + s32 scan_right, scan_left; + s32 capture_count = 0; + s32 scan_count = 0; + s32 capture_index; + s32 scan_index; + + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + + capture_index = stack[--stack_ptr]; + scan_index = stack[--stack_ptr]; + + if (version > 0) { + /* + * stack 0 = capture right index + * stack 1 = capture left index + * stack 2 = scan right index + * stack 3 = scan left index + * stack 4 = count + */ + scan_right = stack[--stack_ptr]; + scan_left = stack[--stack_ptr]; + capture_count = 1 + scan_index - capture_index; + scan_count = 1 + scan_left - scan_right; + scan_index = scan_right; + } + + long_count = stack[--stack_ptr]; + /* + * If capture array is read-only, allocate a buffer + * and convert it to a writable array + */ + variable_id = args[1]; + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x0c)) { + /* Allocate a writable buffer for this array */ + long_tmp = + (var_size[variable_id] + 7L) >> 3L; + charptr_tmp2 = (u8 *)vars[variable_id]; + charptr_tmp = + kzalloc(long_tmp, GFP_KERNEL); + vars[variable_id] = (long)charptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* zero the buffer */ + for (long_idx = 0L; long_idx < long_tmp; + ++long_idx) + charptr_tmp[long_idx] = 0; + + /* copy previous contents into buffer */ + for (long_idx = 0L; + long_idx < var_size[variable_id]; + ++long_idx) { + long_idx2 = long_idx; + + if (charptr_tmp2[long_idx2 >> 3] & + (1 << (long_idx2 & 7))) + charptr_tmp[long_idx >> 3] |= + (1 << (long_idx & 7)); + + } + + /* + * set bit 7 - buffer was + * dynamically allocated + */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + + } + + charptr_tmp = (u8 *)vars[args[0]]; + charptr_tmp2 = (u8 *)vars[args[1]]; + + if ((version > 0) && + ((long_count > capture_count) || + (long_count > scan_count))) { + status = -ERANGE; + break; + } + + /* + * check that capture array + * is a writable Boolean array + */ + if ((attrs[args[1]] & 0x1c) != 0x08) { + status = -ERANGE; + break; + } + + if (status == 0) { + if (opcode == 0x82) /* DSC */ + status = altera_swap_dr(astate, + long_count, + charptr_tmp, + scan_index, + charptr_tmp2, + capture_index); + else /* ISC */ + status = altera_swap_ir(astate, + long_count, + charptr_tmp, + scan_index, + charptr_tmp2, + capture_index); + + } + + break; + } + case OP_WAIT: + /* + * WAIT + * ...argument 0 is wait state + * ...argument 1 is end state + * ...stack 0 is cycles + * ...stack 1 is microseconds + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + + if (long_tmp != 0L) + status = altera_wait_cycles(astate, long_tmp, + args[0]); + + long_tmp = stack[--stack_ptr]; + + if ((status == 0) && (long_tmp != 0L)) + status = altera_wait_msecs(astate, + long_tmp, + args[0]); + + if ((status == 0) && (args[1] != args[0])) + status = altera_goto_jstate(astate, + args[1]); + + if (version > 0) { + --stack_ptr; /* throw away MAX cycles */ + --stack_ptr; /* throw away MAX microseconds */ + } + break; + case OP_CMPA: { + /* + * Array compare + * ...argument 0 is source 1 ID + * ...argument 1 is source 2 ID + * ...argument 2 is mask ID + * ...stack 0 is source 1 index + * ...stack 1 is source 2 index + * ...stack 2 is mask index + * ...stack 3 is count + */ + s32 a, b; + u8 *source1 = (u8 *)vars[args[0]]; + u8 *source2 = (u8 *)vars[args[1]]; + u8 *mask = (u8 *)vars[args[2]]; + u32 index1; + u32 index2; + u32 mask_index; + + if (!altera_check_stack(stack_ptr, 4, &status)) + break; + + index1 = stack[--stack_ptr]; + index2 = stack[--stack_ptr]; + mask_index = stack[--stack_ptr]; + long_count = stack[--stack_ptr]; + + if (version > 0) { + /* + * stack 0 = source 1 right index + * stack 1 = source 1 left index + * stack 2 = source 2 right index + * stack 3 = source 2 left index + * stack 4 = mask right index + * stack 5 = mask left index + */ + s32 mask_right = stack[--stack_ptr]; + s32 mask_left = stack[--stack_ptr]; + /* source 1 count */ + a = 1 + index2 - index1; + /* source 2 count */ + b = 1 + long_count - mask_index; + a = (a < b) ? a : b; + /* mask count */ + b = 1 + mask_left - mask_right; + a = (a < b) ? a : b; + /* source 2 start index */ + index2 = mask_index; + /* mask start index */ + mask_index = mask_right; + long_count = a; + } + + long_tmp = 1L; + + if (long_count < 1) + status = -ERANGE; + else { + count = long_count; + + for (i = 0; i < count; ++i) { + if (mask[mask_index >> 3] & + (1 << (mask_index & 7))) { + a = source1[index1 >> 3] & + (1 << (index1 & 7)) + ? 1 : 0; + b = source2[index2 >> 3] & + (1 << (index2 & 7)) + ? 1 : 0; + + if (a != b) /* failure */ + long_tmp = 0L; + } + ++index1; + ++index2; + ++mask_index; + } + } + + stack[stack_ptr++] = long_tmp; + + break; + } + default: + /* Unrecognized opcode -- ERROR! */ + bad_opcode = 1; + break; + } + + if (bad_opcode) + status = -ENOSYS; + + if ((stack_ptr < 0) || (stack_ptr >= ALTERA_STACK_SIZE)) + status = -EOVERFLOW; + + if (status != 0) { + done = 1; + *error_address = (s32)(opcode_address - code_sect); + } + } + + altera_free_buffers(astate); + + /* Free all dynamically allocated arrays */ + if ((attrs != NULL) && (vars != NULL)) + for (i = 0; i < sym_count; ++i) + if (attrs[i] & 0x80) + kfree((void *)vars[i]); + + kfree(vars); + kfree(var_size); + kfree(attrs); + kfree(proc_attributes); + + return status; +} + +static int altera_get_note(u8 *p, s32 program_size, + s32 *offset, char *key, char *value, int length) +/* + * Gets key and value of NOTE fields in the JBC file. + * Can be called in two modes: if offset pointer is NULL, + * then the function searches for note fields which match + * the key string provided. If offset is not NULL, then + * the function finds the next note field of any key, + * starting at the offset specified by the offset pointer. + * Returns 0 for success, else appropriate error code + */ +{ + int status = -ENODATA; + u32 note_strings = 0L; + u32 note_table = 0L; + u32 note_count = 0L; + u32 first_word = 0L; + int version = 0; + int delta = 0; + char *key_ptr; + char *value_ptr; + int i; + + /* Read header information */ + if (program_size > 52L) { + first_word = get_unaligned_be32(&p[0]); + version = (first_word & 1L); + delta = version * 8; + + note_strings = get_unaligned_be32(&p[8 + delta]); + note_table = get_unaligned_be32(&p[12 + delta]); + note_count = get_unaligned_be32(&p[44 + (2 * delta)]); + } + + if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) + return -EIO; + + if (note_count <= 0L) + return status; + + if (offset == NULL) { + /* + * We will search for the first note with a specific key, + * and return only the value + */ + for (i = 0; (i < note_count) && + (status != 0); ++i) { + key_ptr = &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i)])]; + if ((strncasecmp(key, key_ptr, strlen(key_ptr)) == 0) && + (key != NULL)) { + status = 0; + + value_ptr = &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i) + 4])]; + + if (value != NULL) + strlcpy(value, value_ptr, length); + + } + } + } else { + /* + * We will search for the next note, regardless of the key, + * and return both the value and the key + */ + + i = *offset; + + if ((i >= 0) && (i < note_count)) { + status = 0; + + if (key != NULL) + strlcpy(key, &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i)])], + length); + + if (value != NULL) + strlcpy(value, &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i) + 4])], + length); + + *offset = i + 1; + } + } + + return status; +} + +static int altera_check_crc(u8 *p, s32 program_size) +{ + int status = 0; + u16 local_expected = 0, + local_actual = 0, + shift_reg = 0xffff; + int bit, feedback; + u8 databyte; + u32 i; + u32 crc_section = 0L; + u32 first_word = 0L; + int version = 0; + int delta = 0; + + if (program_size > 52L) { + first_word = get_unaligned_be32(&p[0]); + version = (first_word & 1L); + delta = version * 8; + + crc_section = get_unaligned_be32(&p[32 + delta]); + } + + if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) + status = -EIO; + + if (crc_section >= program_size) + status = -EIO; + + if (status == 0) { + local_expected = (u16)get_unaligned_be16(&p[crc_section]); + + for (i = 0; i < crc_section; ++i) { + databyte = p[i]; + for (bit = 0; bit < 8; bit++) { + feedback = (databyte ^ shift_reg) & 0x01; + shift_reg >>= 1; + if (feedback) + shift_reg ^= 0x8408; + + databyte >>= 1; + } + } + + local_actual = (u16)~shift_reg; + + if (local_expected != local_actual) + status = -EILSEQ; + + } + + if (debug || status) { + switch (status) { + case 0: + printk(KERN_INFO "%s: CRC matched: %04x\n", __func__, + local_actual); + break; + case -EILSEQ: + printk(KERN_ERR "%s: CRC mismatch: expected %04x, " + "actual %04x\n", __func__, local_expected, + local_actual); + break; + case -ENODATA: + printk(KERN_ERR "%s: expected CRC not found, " + "actual CRC = %04x\n", __func__, + local_actual); + break; + case -EIO: + printk(KERN_ERR "%s: error: format isn't " + "recognized.\n", __func__); + break; + default: + printk(KERN_ERR "%s: CRC function returned error " + "code %d\n", __func__, status); + break; + } + } + + return status; +} + +static int altera_get_file_info(u8 *p, + s32 program_size, + int *format_version, + int *action_count, + int *procedure_count) +{ + int status = -EIO; + u32 first_word = 0; + int version = 0; + + if (program_size <= 52L) + return status; + + first_word = get_unaligned_be32(&p[0]); + + if ((first_word == 0x4A414D00L) || (first_word == 0x4A414D01L)) { + status = 0; + + version = (first_word & 1L); + *format_version = version + 1; + + if (version > 0) { + *action_count = get_unaligned_be32(&p[48]); + *procedure_count = get_unaligned_be32(&p[52]); + } + } + + return status; +} + +static int altera_get_act_info(u8 *p, + s32 program_size, + int index, + char **name, + char **description, + struct altera_procinfo **proc_list) +{ + int status = -EIO; + struct altera_procinfo *procptr = NULL; + struct altera_procinfo *tmpptr = NULL; + u32 first_word = 0L; + u32 action_table = 0L; + u32 proc_table = 0L; + u32 str_table = 0L; + u32 note_strings = 0L; + u32 action_count = 0L; + u32 proc_count = 0L; + u32 act_name_id = 0L; + u32 act_desc_id = 0L; + u32 act_proc_id = 0L; + u32 act_proc_name = 0L; + u8 act_proc_attribute = 0; + + if (program_size <= 52L) + return status; + /* Read header information */ + first_word = get_unaligned_be32(&p[0]); + + if (first_word != 0x4A414D01L) + return status; + + action_table = get_unaligned_be32(&p[4]); + proc_table = get_unaligned_be32(&p[8]); + str_table = get_unaligned_be32(&p[12]); + note_strings = get_unaligned_be32(&p[16]); + action_count = get_unaligned_be32(&p[48]); + proc_count = get_unaligned_be32(&p[52]); + + if (index >= action_count) + return status; + + act_name_id = get_unaligned_be32(&p[action_table + (12 * index)]); + act_desc_id = get_unaligned_be32(&p[action_table + (12 * index) + 4]); + act_proc_id = get_unaligned_be32(&p[action_table + (12 * index) + 8]); + + *name = &p[str_table + act_name_id]; + + if (act_desc_id < (note_strings - str_table)) + *description = &p[str_table + act_desc_id]; + + do { + act_proc_name = get_unaligned_be32( + &p[proc_table + (13 * act_proc_id)]); + act_proc_attribute = + (p[proc_table + (13 * act_proc_id) + 8] & 0x03); + + procptr = + kzalloc(sizeof(struct altera_procinfo), + GFP_KERNEL); + + if (procptr == NULL) + status = -ENOMEM; + else { + procptr->name = &p[str_table + act_proc_name]; + procptr->attrs = act_proc_attribute; + procptr->next = NULL; + + /* add record to end of linked list */ + if (*proc_list == NULL) + *proc_list = procptr; + else { + tmpptr = *proc_list; + while (tmpptr->next != NULL) + tmpptr = tmpptr->next; + tmpptr->next = procptr; + } + } + + act_proc_id = get_unaligned_be32( + &p[proc_table + (13 * act_proc_id) + 4]); + } while ((act_proc_id != 0) && (act_proc_id < proc_count)); + + return status; +} + +int altera_init(struct altera_config *config, const struct firmware *fw) +{ + struct altera_state *astate = NULL; + struct altera_procinfo *proc_list = NULL; + struct altera_procinfo *procptr = NULL; + char *key = NULL; + char *value = NULL; + char *action_name = NULL; + char *description = NULL; + int exec_result = 0; + int exit_code = 0; + int format_version = 0; + int action_count = 0; + int procedure_count = 0; + int index = 0; + s32 offset = 0L; + s32 error_address = 0L; + int retval = 0; + + key = kzalloc(33, GFP_KERNEL); + if (!key) { + retval = -ENOMEM; + goto out; + } + value = kzalloc(257, GFP_KERNEL); + if (!value) { + retval = -ENOMEM; + goto free_key; + } + astate = kzalloc(sizeof(struct altera_state), GFP_KERNEL); + if (!astate) { + retval = -ENOMEM; + goto free_value; + } + + astate->config = config; + if (!astate->config->jtag_io) { + dprintk(KERN_INFO "%s: using byteblaster!\n", __func__); + astate->config->jtag_io = netup_jtag_io_lpt; + } + + altera_check_crc((u8 *)fw->data, fw->size); + + if (debug) { + altera_get_file_info((u8 *)fw->data, fw->size, &format_version, + &action_count, &procedure_count); + printk(KERN_INFO "%s: File format is %s ByteCode format\n", + __func__, (format_version == 2) ? "Jam STAPL" : + "pre-standardized Jam 1.1"); + while (altera_get_note((u8 *)fw->data, fw->size, + &offset, key, value, 256) == 0) + printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n", + __func__, key, value); + } + + if (debug && (format_version == 2) && (action_count > 0)) { + printk(KERN_INFO "%s: Actions available:\n", __func__); + for (index = 0; index < action_count; ++index) { + altera_get_act_info((u8 *)fw->data, fw->size, + index, &action_name, + &description, + &proc_list); + + if (description == NULL) + printk(KERN_INFO "%s: %s\n", + __func__, + action_name); + else + printk(KERN_INFO "%s: %s \"%s\"\n", + __func__, + action_name, + description); + + procptr = proc_list; + while (procptr != NULL) { + if (procptr->attrs != 0) + printk(KERN_INFO "%s: %s (%s)\n", + __func__, + procptr->name, + (procptr->attrs == 1) ? + "optional" : "recommended"); + + proc_list = procptr->next; + kfree(procptr); + procptr = proc_list; + } + } + + printk(KERN_INFO "\n"); + } + + exec_result = altera_execute(astate, (u8 *)fw->data, fw->size, + &error_address, &exit_code, &format_version); + + if (exit_code) + exec_result = -EREMOTEIO; + + if ((format_version == 2) && (exec_result == -EINVAL)) { + if (astate->config->action == NULL) + printk(KERN_ERR "%s: error: no action specified for " + "Jam STAPL file.\nprogram terminated.\n", + __func__); + else + printk(KERN_ERR "%s: error: action \"%s\"" + " is not supported " + "for this Jam STAPL file.\n" + "Program terminated.\n", __func__, + astate->config->action); + + } else if (exec_result) + printk(KERN_ERR "%s: error %d\n", __func__, exec_result); + + kfree(astate); +free_value: + kfree(value); +free_key: + kfree(key); +out: + return retval; +} +EXPORT_SYMBOL(altera_init); diff --git a/kernel/drivers/misc/apds9802als.c b/kernel/drivers/misc/apds9802als.c new file mode 100644 index 000000000..c6cc3dc8a --- /dev/null +++ b/kernel/drivers/misc/apds9802als.c @@ -0,0 +1,322 @@ +/* + * apds9802als.c - apds9802 ALS Driver + * + * Copyright (C) 2009 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/sysfs.h> +#include <linux/pm_runtime.h> + +#define ALS_MIN_RANGE_VAL 1 +#define ALS_MAX_RANGE_VAL 2 +#define POWER_STA_ENABLE 1 +#define POWER_STA_DISABLE 0 + +#define DRIVER_NAME "apds9802als" + +struct als_data { + struct mutex mutex; +}; + +static ssize_t als_sensing_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + int val; + + val = i2c_smbus_read_byte_data(client, 0x81); + if (val < 0) + return val; + if (val & 1) + return sprintf(buf, "4095\n"); + else + return sprintf(buf, "65535\n"); +} + +static int als_wait_for_data_ready(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + int retry = 10; + + do { + msleep(30); + ret = i2c_smbus_read_byte_data(client, 0x86); + } while (!(ret & 0x80) && retry--); + + if (retry < 0) { + dev_warn(dev, "timeout waiting for data ready\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static ssize_t als_lux0_input_data_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct als_data *data = i2c_get_clientdata(client); + int ret_val; + int temp; + + /* Protect against parallel reads */ + pm_runtime_get_sync(dev); + mutex_lock(&data->mutex); + + /* clear EOC interrupt status */ + i2c_smbus_write_byte(client, 0x40); + /* start measurement */ + temp = i2c_smbus_read_byte_data(client, 0x81); + i2c_smbus_write_byte_data(client, 0x81, temp | 0x08); + + ret_val = als_wait_for_data_ready(dev); + if (ret_val < 0) + goto failed; + + temp = i2c_smbus_read_byte_data(client, 0x8C); /* LSB data */ + if (temp < 0) { + ret_val = temp; + goto failed; + } + ret_val = i2c_smbus_read_byte_data(client, 0x8D); /* MSB data */ + if (ret_val < 0) + goto failed; + + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + + temp = (ret_val << 8) | temp; + return sprintf(buf, "%d\n", temp); +failed: + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + return ret_val; +} + +static ssize_t als_sensing_range_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct als_data *data = i2c_get_clientdata(client); + int ret_val; + unsigned long val; + + ret_val = kstrtoul(buf, 10, &val); + if (ret_val) + return ret_val; + + if (val < 4096) + val = 1; + else if (val < 65536) + val = 2; + else + return -ERANGE; + + pm_runtime_get_sync(dev); + + /* Make sure nobody else reads/modifies/writes 0x81 while we + are active */ + mutex_lock(&data->mutex); + + ret_val = i2c_smbus_read_byte_data(client, 0x81); + if (ret_val < 0) + goto fail; + + /* Reset the bits before setting them */ + ret_val = ret_val & 0xFA; + + if (val == 1) /* Setting detection range up to 4k LUX */ + ret_val = (ret_val | 0x01); + else /* Setting detection range up to 64k LUX*/ + ret_val = (ret_val | 0x00); + + ret_val = i2c_smbus_write_byte_data(client, 0x81, ret_val); + + if (ret_val >= 0) { + /* All OK */ + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + return count; + } +fail: + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + return ret_val; +} + +static int als_set_power_state(struct i2c_client *client, bool on_off) +{ + int ret_val; + struct als_data *data = i2c_get_clientdata(client); + + mutex_lock(&data->mutex); + ret_val = i2c_smbus_read_byte_data(client, 0x80); + if (ret_val < 0) + goto fail; + if (on_off) + ret_val = ret_val | 0x01; + else + ret_val = ret_val & 0xFE; + ret_val = i2c_smbus_write_byte_data(client, 0x80, ret_val); +fail: + mutex_unlock(&data->mutex); + return ret_val; +} + +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR, + als_sensing_range_show, als_sensing_range_store); +static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux0_input_data_show, NULL); + +static struct attribute *mid_att_als[] = { + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_input.attr, + NULL +}; + +static struct attribute_group m_als_gr = { + .name = "apds9802als", + .attrs = mid_att_als +}; + +static int als_set_default_config(struct i2c_client *client) +{ + int ret_val; + /* Write the command and then switch on */ + ret_val = i2c_smbus_write_byte_data(client, 0x80, 0x01); + if (ret_val < 0) { + dev_err(&client->dev, "failed default switch on write\n"); + return ret_val; + } + /* detection range: 1~64K Lux, maunal measurement */ + ret_val = i2c_smbus_write_byte_data(client, 0x81, 0x08); + if (ret_val < 0) + dev_err(&client->dev, "failed default LUX on write\n"); + + /* We always get 0 for the 1st measurement after system power on, + * so make sure it is finished before user asks for data. + */ + als_wait_for_data_ready(&client->dev); + + return ret_val; +} + +static int apds9802als_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int res; + struct als_data *data; + + data = kzalloc(sizeof(struct als_data), GFP_KERNEL); + if (data == NULL) { + dev_err(&client->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + i2c_set_clientdata(client, data); + res = sysfs_create_group(&client->dev.kobj, &m_als_gr); + if (res) { + dev_err(&client->dev, "device create file failed\n"); + goto als_error1; + } + dev_info(&client->dev, "ALS chip found\n"); + als_set_default_config(client); + mutex_init(&data->mutex); + + pm_runtime_set_active(&client->dev); + pm_runtime_enable(&client->dev); + + return res; +als_error1: + kfree(data); + return res; +} + +static int apds9802als_remove(struct i2c_client *client) +{ + struct als_data *data = i2c_get_clientdata(client); + + pm_runtime_get_sync(&client->dev); + + als_set_power_state(client, false); + sysfs_remove_group(&client->dev.kobj, &m_als_gr); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + pm_runtime_put_noidle(&client->dev); + + kfree(data); + return 0; +} + +#ifdef CONFIG_PM + +static int apds9802als_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + als_set_power_state(client, false); + return 0; +} + +static int apds9802als_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + als_set_power_state(client, true); + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(apds9802als_pm_ops, apds9802als_suspend, + apds9802als_resume, NULL); + +#define APDS9802ALS_PM_OPS (&apds9802als_pm_ops) + +#else /* CONFIG_PM */ +#define APDS9802ALS_PM_OPS NULL +#endif /* CONFIG_PM */ + +static struct i2c_device_id apds9802als_id[] = { + { DRIVER_NAME, 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, apds9802als_id); + +static struct i2c_driver apds9802als_driver = { + .driver = { + .name = DRIVER_NAME, + .pm = APDS9802ALS_PM_OPS, + }, + .probe = apds9802als_probe, + .remove = apds9802als_remove, + .id_table = apds9802als_id, +}; + +module_i2c_driver(apds9802als_driver); + +MODULE_AUTHOR("Anantha Narayanan <Anantha.Narayanan@intel.com"); +MODULE_DESCRIPTION("Avago apds9802als ALS Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/apds990x.c b/kernel/drivers/misc/apds990x.c new file mode 100644 index 000000000..3739ffa9c --- /dev/null +++ b/kernel/drivers/misc/apds990x.c @@ -0,0 +1,1290 @@ +/* + * This file is part of the APDS990x sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo <samu.p.onkalo@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/regulator/consumer.h> +#include <linux/pm_runtime.h> +#include <linux/delay.h> +#include <linux/wait.h> +#include <linux/slab.h> +#include <linux/i2c/apds990x.h> + +/* Register map */ +#define APDS990X_ENABLE 0x00 /* Enable of states and interrupts */ +#define APDS990X_ATIME 0x01 /* ALS ADC time */ +#define APDS990X_PTIME 0x02 /* Proximity ADC time */ +#define APDS990X_WTIME 0x03 /* Wait time */ +#define APDS990X_AILTL 0x04 /* ALS interrupt low threshold low byte */ +#define APDS990X_AILTH 0x05 /* ALS interrupt low threshold hi byte */ +#define APDS990X_AIHTL 0x06 /* ALS interrupt hi threshold low byte */ +#define APDS990X_AIHTH 0x07 /* ALS interrupt hi threshold hi byte */ +#define APDS990X_PILTL 0x08 /* Proximity interrupt low threshold low byte */ +#define APDS990X_PILTH 0x09 /* Proximity interrupt low threshold hi byte */ +#define APDS990X_PIHTL 0x0a /* Proximity interrupt hi threshold low byte */ +#define APDS990X_PIHTH 0x0b /* Proximity interrupt hi threshold hi byte */ +#define APDS990X_PERS 0x0c /* Interrupt persistence filters */ +#define APDS990X_CONFIG 0x0d /* Configuration */ +#define APDS990X_PPCOUNT 0x0e /* Proximity pulse count */ +#define APDS990X_CONTROL 0x0f /* Gain control register */ +#define APDS990X_REV 0x11 /* Revision Number */ +#define APDS990X_ID 0x12 /* Device ID */ +#define APDS990X_STATUS 0x13 /* Device status */ +#define APDS990X_CDATAL 0x14 /* Clear ADC low data register */ +#define APDS990X_CDATAH 0x15 /* Clear ADC high data register */ +#define APDS990X_IRDATAL 0x16 /* IR ADC low data register */ +#define APDS990X_IRDATAH 0x17 /* IR ADC high data register */ +#define APDS990X_PDATAL 0x18 /* Proximity ADC low data register */ +#define APDS990X_PDATAH 0x19 /* Proximity ADC high data register */ + +/* Control */ +#define APDS990X_MAX_AGAIN 3 + +/* Enable register */ +#define APDS990X_EN_PIEN (0x1 << 5) +#define APDS990X_EN_AIEN (0x1 << 4) +#define APDS990X_EN_WEN (0x1 << 3) +#define APDS990X_EN_PEN (0x1 << 2) +#define APDS990X_EN_AEN (0x1 << 1) +#define APDS990X_EN_PON (0x1 << 0) +#define APDS990X_EN_DISABLE_ALL 0 + +/* Status register */ +#define APDS990X_ST_PINT (0x1 << 5) +#define APDS990X_ST_AINT (0x1 << 4) + +/* I2C access types */ +#define APDS990x_CMD_TYPE_MASK (0x03 << 5) +#define APDS990x_CMD_TYPE_RB (0x00 << 5) /* Repeated byte */ +#define APDS990x_CMD_TYPE_INC (0x01 << 5) /* Auto increment */ +#define APDS990x_CMD_TYPE_SPE (0x03 << 5) /* Special function */ + +#define APDS990x_ADDR_SHIFT 0 +#define APDS990x_CMD 0x80 + +/* Interrupt ack commands */ +#define APDS990X_INT_ACK_ALS 0x6 +#define APDS990X_INT_ACK_PS 0x5 +#define APDS990X_INT_ACK_BOTH 0x7 + +/* ptime */ +#define APDS990X_PTIME_DEFAULT 0xff /* Recommended conversion time 2.7ms*/ + +/* wtime */ +#define APDS990X_WTIME_DEFAULT 0xee /* ~50ms wait time */ + +#define APDS990X_TIME_TO_ADC 1024 /* One timetick as ADC count value */ + +/* Persistence */ +#define APDS990X_APERS_SHIFT 0 +#define APDS990X_PPERS_SHIFT 4 + +/* Supported ID:s */ +#define APDS990X_ID_0 0x0 +#define APDS990X_ID_4 0x4 +#define APDS990X_ID_29 0x29 + +/* pgain and pdiode settings */ +#define APDS_PGAIN_1X 0x0 +#define APDS_PDIODE_IR 0x2 + +#define APDS990X_LUX_OUTPUT_SCALE 10 + +/* Reverse chip factors for threshold calculation */ +struct reverse_factors { + u32 afactor; + int cf1; + int irf1; + int cf2; + int irf2; +}; + +struct apds990x_chip { + struct apds990x_platform_data *pdata; + struct i2c_client *client; + struct mutex mutex; /* avoid parallel access */ + struct regulator_bulk_data regs[2]; + wait_queue_head_t wait; + + int prox_en; + bool prox_continuous_mode; + bool lux_wait_fresh_res; + + /* Chip parameters */ + struct apds990x_chip_factors cf; + struct reverse_factors rcf; + u16 atime; /* als integration time */ + u16 arate; /* als reporting rate */ + u16 a_max_result; /* Max possible ADC value with current atime */ + u8 again_meas; /* Gain used in last measurement */ + u8 again_next; /* Next calculated gain */ + u8 pgain; + u8 pdiode; + u8 pdrive; + u8 lux_persistence; + u8 prox_persistence; + + u32 lux_raw; + u32 lux; + u16 lux_clear; + u16 lux_ir; + u16 lux_calib; + u32 lux_thres_hi; + u32 lux_thres_lo; + + u32 prox_thres; + u16 prox_data; + u16 prox_calib; + + char chipname[10]; + u8 revision; +}; + +#define APDS_CALIB_SCALER 8192 +#define APDS_LUX_NEUTRAL_CALIB_VALUE (1 * APDS_CALIB_SCALER) +#define APDS_PROX_NEUTRAL_CALIB_VALUE (1 * APDS_CALIB_SCALER) + +#define APDS_PROX_DEF_THRES 600 +#define APDS_PROX_HYSTERESIS 50 +#define APDS_LUX_DEF_THRES_HI 101 +#define APDS_LUX_DEF_THRES_LO 100 +#define APDS_DEFAULT_PROX_PERS 1 + +#define APDS_TIMEOUT 2000 +#define APDS_STARTUP_DELAY 25000 /* us */ +#define APDS_RANGE 65535 +#define APDS_PROX_RANGE 1023 +#define APDS_LUX_GAIN_LO_LIMIT 100 +#define APDS_LUX_GAIN_LO_LIMIT_STRICT 25 + +#define TIMESTEP 87 /* 2.7ms is about 87 / 32 */ +#define TIME_STEP_SCALER 32 + +#define APDS_LUX_AVERAGING_TIME 50 /* tolerates 50/60Hz ripple */ +#define APDS_LUX_DEFAULT_RATE 200 + +static const u8 again[] = {1, 8, 16, 120}; /* ALS gain steps */ +static const u8 ir_currents[] = {100, 50, 25, 12}; /* IRled currents in mA */ + +/* Following two tables must match i.e 10Hz rate means 1 as persistence value */ +static const u16 arates_hz[] = {10, 5, 2, 1}; +static const u8 apersis[] = {1, 2, 4, 5}; + +/* Regulators */ +static const char reg_vcc[] = "Vdd"; +static const char reg_vled[] = "Vled"; + +static int apds990x_read_byte(struct apds990x_chip *chip, u8 reg, u8 *data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB; + + ret = i2c_smbus_read_byte_data(client, reg); + *data = ret; + return (int)ret; +} + +static int apds990x_read_word(struct apds990x_chip *chip, u8 reg, u16 *data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC; + + ret = i2c_smbus_read_word_data(client, reg); + *data = ret; + return (int)ret; +} + +static int apds990x_write_byte(struct apds990x_chip *chip, u8 reg, u8 data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB; + + ret = i2c_smbus_write_byte_data(client, reg, data); + return (int)ret; +} + +static int apds990x_write_word(struct apds990x_chip *chip, u8 reg, u16 data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC; + + ret = i2c_smbus_write_word_data(client, reg, data); + return (int)ret; +} + +static int apds990x_mode_on(struct apds990x_chip *chip) +{ + /* ALS is mandatory, proximity optional */ + u8 reg = APDS990X_EN_AIEN | APDS990X_EN_PON | APDS990X_EN_AEN | + APDS990X_EN_WEN; + + if (chip->prox_en) + reg |= APDS990X_EN_PIEN | APDS990X_EN_PEN; + + return apds990x_write_byte(chip, APDS990X_ENABLE, reg); +} + +static u16 apds990x_lux_to_threshold(struct apds990x_chip *chip, u32 lux) +{ + u32 thres; + u32 cpl; + u32 ir; + + if (lux == 0) + return 0; + else if (lux == APDS_RANGE) + return APDS_RANGE; + + /* + * Reported LUX value is a combination of the IR and CLEAR channel + * values. However, interrupt threshold is only for clear channel. + * This function approximates needed HW threshold value for a given + * LUX value in the current lightning type. + * IR level compared to visible light varies heavily depending on the + * source of the light + * + * Calculate threshold value for the next measurement period. + * Math: threshold = lux * cpl where + * cpl = atime * again / (glass_attenuation * device_factor) + * (count-per-lux) + * + * First remove calibration. Division by four is to avoid overflow + */ + lux = lux * (APDS_CALIB_SCALER / 4) / (chip->lux_calib / 4); + + /* Multiplication by 64 is to increase accuracy */ + cpl = ((u32)chip->atime * (u32)again[chip->again_next] * + APDS_PARAM_SCALE * 64) / (chip->cf.ga * chip->cf.df); + + thres = lux * cpl / 64; + /* + * Convert IR light from the latest result to match with + * new gain step. This helps to adapt with the current + * source of light. + */ + ir = (u32)chip->lux_ir * (u32)again[chip->again_next] / + (u32)again[chip->again_meas]; + + /* + * Compensate count with IR light impact + * IAC1 > IAC2 (see apds990x_get_lux for formulas) + */ + if (chip->lux_clear * APDS_PARAM_SCALE >= + chip->rcf.afactor * chip->lux_ir) + thres = (chip->rcf.cf1 * thres + chip->rcf.irf1 * ir) / + APDS_PARAM_SCALE; + else + thres = (chip->rcf.cf2 * thres + chip->rcf.irf2 * ir) / + APDS_PARAM_SCALE; + + if (thres >= chip->a_max_result) + thres = chip->a_max_result - 1; + return thres; +} + +static inline int apds990x_set_atime(struct apds990x_chip *chip, u32 time_ms) +{ + u8 reg_value; + + chip->atime = time_ms; + /* Formula is specified in the data sheet */ + reg_value = 256 - ((time_ms * TIME_STEP_SCALER) / TIMESTEP); + /* Calculate max ADC value for given integration time */ + chip->a_max_result = (u16)(256 - reg_value) * APDS990X_TIME_TO_ADC; + return apds990x_write_byte(chip, APDS990X_ATIME, reg_value); +} + +/* Called always with mutex locked */ +static int apds990x_refresh_pthres(struct apds990x_chip *chip, int data) +{ + int ret, lo, hi; + + /* If the chip is not in use, don't try to access it */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + if (data < chip->prox_thres) { + lo = 0; + hi = chip->prox_thres; + } else { + lo = chip->prox_thres - APDS_PROX_HYSTERESIS; + if (chip->prox_continuous_mode) + hi = chip->prox_thres; + else + hi = APDS_RANGE; + } + + ret = apds990x_write_word(chip, APDS990X_PILTL, lo); + ret |= apds990x_write_word(chip, APDS990X_PIHTL, hi); + return ret; +} + +/* Called always with mutex locked */ +static int apds990x_refresh_athres(struct apds990x_chip *chip) +{ + int ret; + /* If the chip is not in use, don't try to access it */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + ret = apds990x_write_word(chip, APDS990X_AILTL, + apds990x_lux_to_threshold(chip, chip->lux_thres_lo)); + ret |= apds990x_write_word(chip, APDS990X_AIHTL, + apds990x_lux_to_threshold(chip, chip->lux_thres_hi)); + + return ret; +} + +/* Called always with mutex locked */ +static void apds990x_force_a_refresh(struct apds990x_chip *chip) +{ + /* This will force ALS interrupt after the next measurement. */ + apds990x_write_word(chip, APDS990X_AILTL, APDS_LUX_DEF_THRES_LO); + apds990x_write_word(chip, APDS990X_AIHTL, APDS_LUX_DEF_THRES_HI); +} + +/* Called always with mutex locked */ +static void apds990x_force_p_refresh(struct apds990x_chip *chip) +{ + /* This will force proximity interrupt after the next measurement. */ + apds990x_write_word(chip, APDS990X_PILTL, APDS_PROX_DEF_THRES - 1); + apds990x_write_word(chip, APDS990X_PIHTL, APDS_PROX_DEF_THRES); +} + +/* Called always with mutex locked */ +static int apds990x_calc_again(struct apds990x_chip *chip) +{ + int curr_again = chip->again_meas; + int next_again = chip->again_meas; + int ret = 0; + + /* Calculate suitable als gain */ + if (chip->lux_clear == chip->a_max_result) + next_again -= 2; /* ALS saturated. Decrease gain by 2 steps */ + else if (chip->lux_clear > chip->a_max_result / 2) + next_again--; + else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT) + next_again += 2; /* Too dark. Increase gain by 2 steps */ + else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT) + next_again++; + + /* Limit gain to available range */ + if (next_again < 0) + next_again = 0; + else if (next_again > APDS990X_MAX_AGAIN) + next_again = APDS990X_MAX_AGAIN; + + /* Let's check can we trust the measured result */ + if (chip->lux_clear == chip->a_max_result) + /* Result can be totally garbage due to saturation */ + ret = -ERANGE; + else if (next_again != curr_again && + chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT) + /* + * Gain is changed and measurement result is very small. + * Result can be totally garbage due to underflow + */ + ret = -ERANGE; + + chip->again_next = next_again; + apds990x_write_byte(chip, APDS990X_CONTROL, + (chip->pdrive << 6) | + (chip->pdiode << 4) | + (chip->pgain << 2) | + (chip->again_next << 0)); + + /* + * Error means bad result -> re-measurement is needed. The forced + * refresh uses fastest possible persistence setting to get result + * as soon as possible. + */ + if (ret < 0) + apds990x_force_a_refresh(chip); + else + apds990x_refresh_athres(chip); + + return ret; +} + +/* Called always with mutex locked */ +static int apds990x_get_lux(struct apds990x_chip *chip, int clear, int ir) +{ + int iac, iac1, iac2; /* IR adjusted counts */ + u32 lpc; /* Lux per count */ + + /* Formulas: + * iac1 = CF1 * CLEAR_CH - IRF1 * IR_CH + * iac2 = CF2 * CLEAR_CH - IRF2 * IR_CH + */ + iac1 = (chip->cf.cf1 * clear - chip->cf.irf1 * ir) / APDS_PARAM_SCALE; + iac2 = (chip->cf.cf2 * clear - chip->cf.irf2 * ir) / APDS_PARAM_SCALE; + + iac = max(iac1, iac2); + iac = max(iac, 0); + + lpc = APDS990X_LUX_OUTPUT_SCALE * (chip->cf.df * chip->cf.ga) / + (u32)(again[chip->again_meas] * (u32)chip->atime); + + return (iac * lpc) / APDS_PARAM_SCALE; +} + +static int apds990x_ack_int(struct apds990x_chip *chip, u8 mode) +{ + struct i2c_client *client = chip->client; + s32 ret; + u8 reg = APDS990x_CMD | APDS990x_CMD_TYPE_SPE; + + switch (mode & (APDS990X_ST_AINT | APDS990X_ST_PINT)) { + case APDS990X_ST_AINT: + reg |= APDS990X_INT_ACK_ALS; + break; + case APDS990X_ST_PINT: + reg |= APDS990X_INT_ACK_PS; + break; + default: + reg |= APDS990X_INT_ACK_BOTH; + break; + } + + ret = i2c_smbus_read_byte_data(client, reg); + return (int)ret; +} + +static irqreturn_t apds990x_irq(int irq, void *data) +{ + struct apds990x_chip *chip = data; + u8 status; + + apds990x_read_byte(chip, APDS990X_STATUS, &status); + apds990x_ack_int(chip, status); + + mutex_lock(&chip->mutex); + if (!pm_runtime_suspended(&chip->client->dev)) { + if (status & APDS990X_ST_AINT) { + apds990x_read_word(chip, APDS990X_CDATAL, + &chip->lux_clear); + apds990x_read_word(chip, APDS990X_IRDATAL, + &chip->lux_ir); + /* Store used gain for calculations */ + chip->again_meas = chip->again_next; + + chip->lux_raw = apds990x_get_lux(chip, + chip->lux_clear, + chip->lux_ir); + + if (apds990x_calc_again(chip) == 0) { + /* Result is valid */ + chip->lux = chip->lux_raw; + chip->lux_wait_fresh_res = false; + wake_up(&chip->wait); + sysfs_notify(&chip->client->dev.kobj, + NULL, "lux0_input"); + } + } + + if ((status & APDS990X_ST_PINT) && chip->prox_en) { + u16 clr_ch; + + apds990x_read_word(chip, APDS990X_CDATAL, &clr_ch); + /* + * If ALS channel is saturated at min gain, + * proximity gives false posivite values. + * Just ignore them. + */ + if (chip->again_meas == 0 && + clr_ch == chip->a_max_result) + chip->prox_data = 0; + else + apds990x_read_word(chip, + APDS990X_PDATAL, + &chip->prox_data); + + apds990x_refresh_pthres(chip, chip->prox_data); + if (chip->prox_data < chip->prox_thres) + chip->prox_data = 0; + else if (!chip->prox_continuous_mode) + chip->prox_data = APDS_PROX_RANGE; + sysfs_notify(&chip->client->dev.kobj, + NULL, "prox0_raw"); + } + } + mutex_unlock(&chip->mutex); + return IRQ_HANDLED; +} + +static int apds990x_configure(struct apds990x_chip *chip) +{ + /* It is recommended to use disabled mode during these operations */ + apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL); + + /* conversion and wait times for different state machince states */ + apds990x_write_byte(chip, APDS990X_PTIME, APDS990X_PTIME_DEFAULT); + apds990x_write_byte(chip, APDS990X_WTIME, APDS990X_WTIME_DEFAULT); + apds990x_set_atime(chip, APDS_LUX_AVERAGING_TIME); + + apds990x_write_byte(chip, APDS990X_CONFIG, 0); + + /* Persistence levels */ + apds990x_write_byte(chip, APDS990X_PERS, + (chip->lux_persistence << APDS990X_APERS_SHIFT) | + (chip->prox_persistence << APDS990X_PPERS_SHIFT)); + + apds990x_write_byte(chip, APDS990X_PPCOUNT, chip->pdata->ppcount); + + /* Start with relatively small gain */ + chip->again_meas = 1; + chip->again_next = 1; + apds990x_write_byte(chip, APDS990X_CONTROL, + (chip->pdrive << 6) | + (chip->pdiode << 4) | + (chip->pgain << 2) | + (chip->again_next << 0)); + return 0; +} + +static int apds990x_detect(struct apds990x_chip *chip) +{ + struct i2c_client *client = chip->client; + int ret; + u8 id; + + ret = apds990x_read_byte(chip, APDS990X_ID, &id); + if (ret < 0) { + dev_err(&client->dev, "ID read failed\n"); + return ret; + } + + ret = apds990x_read_byte(chip, APDS990X_REV, &chip->revision); + if (ret < 0) { + dev_err(&client->dev, "REV read failed\n"); + return ret; + } + + switch (id) { + case APDS990X_ID_0: + case APDS990X_ID_4: + case APDS990X_ID_29: + snprintf(chip->chipname, sizeof(chip->chipname), "APDS-990x"); + break; + default: + ret = -ENODEV; + break; + } + return ret; +} + +#ifdef CONFIG_PM +static int apds990x_chip_on(struct apds990x_chip *chip) +{ + int err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), + chip->regs); + if (err < 0) + return err; + + usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY); + + /* Refresh all configs in case of regulators were off */ + chip->prox_data = 0; + apds990x_configure(chip); + apds990x_mode_on(chip); + return 0; +} +#endif + +static int apds990x_chip_off(struct apds990x_chip *chip) +{ + apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL); + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); + return 0; +} + +static ssize_t apds990x_lux_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + ssize_t ret; + u32 result; + long timeout; + + if (pm_runtime_suspended(dev)) + return -EIO; + + timeout = wait_event_interruptible_timeout(chip->wait, + !chip->lux_wait_fresh_res, + msecs_to_jiffies(APDS_TIMEOUT)); + if (!timeout) + return -EIO; + + mutex_lock(&chip->mutex); + result = (chip->lux * chip->lux_calib) / APDS_CALIB_SCALER; + if (result > (APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE)) + result = APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE; + + ret = sprintf(buf, "%d.%d\n", + result / APDS990X_LUX_OUTPUT_SCALE, + result % APDS990X_LUX_OUTPUT_SCALE); + mutex_unlock(&chip->mutex); + return ret; +} + +static DEVICE_ATTR(lux0_input, S_IRUGO, apds990x_lux_show, NULL); + +static ssize_t apds990x_lux_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", APDS_RANGE); +} + +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, apds990x_lux_range_show, NULL); + +static ssize_t apds990x_lux_calib_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", APDS_CALIB_SCALER); +} + +static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO, + apds990x_lux_calib_format_show, NULL); + +static ssize_t apds990x_lux_calib_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", chip->lux_calib); +} + +static ssize_t apds990x_lux_calib_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + chip->lux_calib = value; + + return len; +} + +static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, apds990x_lux_calib_show, + apds990x_lux_calib_store); + +static ssize_t apds990x_rate_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i; + int pos = 0; + for (i = 0; i < ARRAY_SIZE(arates_hz); i++) + pos += sprintf(buf + pos, "%d ", arates_hz[i]); + sprintf(buf + pos - 1, "\n"); + return pos; +} + +static ssize_t apds990x_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->arate); +} + +static int apds990x_set_arate(struct apds990x_chip *chip, int rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(arates_hz); i++) + if (rate >= arates_hz[i]) + break; + + if (i == ARRAY_SIZE(arates_hz)) + return -EINVAL; + + /* Pick up corresponding persistence value */ + chip->lux_persistence = apersis[i]; + chip->arate = arates_hz[i]; + + /* If the chip is not in use, don't try to access it */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + /* Persistence levels */ + return apds990x_write_byte(chip, APDS990X_PERS, + (chip->lux_persistence << APDS990X_APERS_SHIFT) | + (chip->prox_persistence << APDS990X_PPERS_SHIFT)); +} + +static ssize_t apds990x_rate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + ret = apds990x_set_arate(chip, value); + mutex_unlock(&chip->mutex); + + if (ret < 0) + return ret; + return len; +} + +static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, apds990x_rate_avail, NULL); + +static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, apds990x_rate_show, + apds990x_rate_store); + +static ssize_t apds990x_prox_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t ret; + struct apds990x_chip *chip = dev_get_drvdata(dev); + if (pm_runtime_suspended(dev) || !chip->prox_en) + return -EIO; + + mutex_lock(&chip->mutex); + ret = sprintf(buf, "%d\n", chip->prox_data); + mutex_unlock(&chip->mutex); + return ret; +} + +static DEVICE_ATTR(prox0_raw, S_IRUGO, apds990x_prox_show, NULL); + +static ssize_t apds990x_prox_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", APDS_PROX_RANGE); +} + +static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, apds990x_prox_range_show, NULL); + +static ssize_t apds990x_prox_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->prox_en); +} + +static ssize_t apds990x_prox_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + + if (!chip->prox_en) + chip->prox_data = 0; + + if (value) + chip->prox_en++; + else if (chip->prox_en > 0) + chip->prox_en--; + + if (!pm_runtime_suspended(dev)) + apds990x_mode_on(chip); + mutex_unlock(&chip->mutex); + return len; +} + +static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, apds990x_prox_enable_show, + apds990x_prox_enable_store); + +static const char reporting_modes[][9] = {"trigger", "periodic"}; + +static ssize_t apds990x_prox_reporting_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", + reporting_modes[!!chip->prox_continuous_mode]); +} + +static ssize_t apds990x_prox_reporting_mode_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + if (sysfs_streq(buf, reporting_modes[0])) + chip->prox_continuous_mode = 0; + else if (sysfs_streq(buf, reporting_modes[1])) + chip->prox_continuous_mode = 1; + else + return -EINVAL; + return len; +} + +static DEVICE_ATTR(prox0_reporting_mode, S_IRUGO | S_IWUSR, + apds990x_prox_reporting_mode_show, + apds990x_prox_reporting_mode_store); + +static ssize_t apds990x_prox_reporting_avail_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s %s\n", reporting_modes[0], reporting_modes[1]); +} + +static DEVICE_ATTR(prox0_reporting_mode_avail, S_IRUGO | S_IWUSR, + apds990x_prox_reporting_avail_show, NULL); + + +static ssize_t apds990x_lux_thresh_above_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_thres_hi); +} + +static ssize_t apds990x_lux_thresh_below_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_thres_lo); +} + +static ssize_t apds990x_set_lux_thresh(struct apds990x_chip *chip, u32 *target, + const char *buf) +{ + unsigned long thresh; + int ret; + + ret = kstrtoul(buf, 0, &thresh); + if (ret) + return ret; + + if (thresh > APDS_RANGE) + return -EINVAL; + + mutex_lock(&chip->mutex); + *target = thresh; + /* + * Don't update values in HW if we are still waiting for + * first interrupt to come after device handle open call. + */ + if (!chip->lux_wait_fresh_res) + apds990x_refresh_athres(chip); + mutex_unlock(&chip->mutex); + return ret; + +} + +static ssize_t apds990x_lux_thresh_above_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf); + if (ret < 0) + return ret; + return len; +} + +static ssize_t apds990x_lux_thresh_below_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf); + if (ret < 0) + return ret; + return len; +} + +static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR, + apds990x_lux_thresh_above_show, + apds990x_lux_thresh_above_store); + +static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR, + apds990x_lux_thresh_below_show, + apds990x_lux_thresh_below_store); + +static ssize_t apds990x_prox_threshold_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->prox_thres); +} + +static ssize_t apds990x_prox_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if ((value > APDS_RANGE) || (value == 0) || + (value < APDS_PROX_HYSTERESIS)) + return -EINVAL; + + mutex_lock(&chip->mutex); + chip->prox_thres = value; + + apds990x_force_p_refresh(chip); + mutex_unlock(&chip->mutex); + return len; +} + +static DEVICE_ATTR(prox0_thresh_above_value, S_IRUGO | S_IWUSR, + apds990x_prox_threshold_show, + apds990x_prox_threshold_store); + +static ssize_t apds990x_power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", !pm_runtime_suspended(dev)); + return 0; +} + +static ssize_t apds990x_power_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value) { + pm_runtime_get_sync(dev); + mutex_lock(&chip->mutex); + chip->lux_wait_fresh_res = true; + apds990x_force_a_refresh(chip); + apds990x_force_p_refresh(chip); + mutex_unlock(&chip->mutex); + } else { + if (!pm_runtime_suspended(dev)) + pm_runtime_put(dev); + } + return len; +} + +static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, + apds990x_power_state_show, + apds990x_power_state_store); + +static ssize_t apds990x_chip_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%s %d\n", chip->chipname, chip->revision); +} + +static DEVICE_ATTR(chip_id, S_IRUGO, apds990x_chip_id_show, NULL); + +static struct attribute *sysfs_attrs_ctrl[] = { + &dev_attr_lux0_calibscale.attr, + &dev_attr_lux0_calibscale_default.attr, + &dev_attr_lux0_input.attr, + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_rate.attr, + &dev_attr_lux0_rate_avail.attr, + &dev_attr_lux0_thresh_above_value.attr, + &dev_attr_lux0_thresh_below_value.attr, + &dev_attr_prox0_raw_en.attr, + &dev_attr_prox0_raw.attr, + &dev_attr_prox0_sensor_range.attr, + &dev_attr_prox0_thresh_above_value.attr, + &dev_attr_prox0_reporting_mode.attr, + &dev_attr_prox0_reporting_mode_avail.attr, + &dev_attr_chip_id.attr, + &dev_attr_power_state.attr, + NULL +}; + +static struct attribute_group apds990x_attribute_group[] = { + {.attrs = sysfs_attrs_ctrl }, +}; + +static int apds990x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct apds990x_chip *chip; + int err; + + chip = kzalloc(sizeof *chip, GFP_KERNEL); + if (!chip) + return -ENOMEM; + + i2c_set_clientdata(client, chip); + chip->client = client; + + init_waitqueue_head(&chip->wait); + mutex_init(&chip->mutex); + chip->pdata = client->dev.platform_data; + + if (chip->pdata == NULL) { + dev_err(&client->dev, "platform data is mandatory\n"); + err = -EINVAL; + goto fail1; + } + + if (chip->pdata->cf.ga == 0) { + /* set uncovered sensor default parameters */ + chip->cf.ga = 1966; /* 0.48 * APDS_PARAM_SCALE */ + chip->cf.cf1 = 4096; /* 1.00 * APDS_PARAM_SCALE */ + chip->cf.irf1 = 9134; /* 2.23 * APDS_PARAM_SCALE */ + chip->cf.cf2 = 2867; /* 0.70 * APDS_PARAM_SCALE */ + chip->cf.irf2 = 5816; /* 1.42 * APDS_PARAM_SCALE */ + chip->cf.df = 52; + } else { + chip->cf = chip->pdata->cf; + } + + /* precalculate inverse chip factors for threshold control */ + chip->rcf.afactor = + (chip->cf.irf1 - chip->cf.irf2) * APDS_PARAM_SCALE / + (chip->cf.cf1 - chip->cf.cf2); + chip->rcf.cf1 = APDS_PARAM_SCALE * APDS_PARAM_SCALE / + chip->cf.cf1; + chip->rcf.irf1 = chip->cf.irf1 * APDS_PARAM_SCALE / + chip->cf.cf1; + chip->rcf.cf2 = APDS_PARAM_SCALE * APDS_PARAM_SCALE / + chip->cf.cf2; + chip->rcf.irf2 = chip->cf.irf2 * APDS_PARAM_SCALE / + chip->cf.cf2; + + /* Set something to start with */ + chip->lux_thres_hi = APDS_LUX_DEF_THRES_HI; + chip->lux_thres_lo = APDS_LUX_DEF_THRES_LO; + chip->lux_calib = APDS_LUX_NEUTRAL_CALIB_VALUE; + + chip->prox_thres = APDS_PROX_DEF_THRES; + chip->pdrive = chip->pdata->pdrive; + chip->pdiode = APDS_PDIODE_IR; + chip->pgain = APDS_PGAIN_1X; + chip->prox_calib = APDS_PROX_NEUTRAL_CALIB_VALUE; + chip->prox_persistence = APDS_DEFAULT_PROX_PERS; + chip->prox_continuous_mode = false; + + chip->regs[0].supply = reg_vcc; + chip->regs[1].supply = reg_vled; + + err = regulator_bulk_get(&client->dev, + ARRAY_SIZE(chip->regs), chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot get regulators\n"); + goto fail1; + } + + err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot enable regulators\n"); + goto fail2; + } + + usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY); + + err = apds990x_detect(chip); + if (err < 0) { + dev_err(&client->dev, "APDS990X not found\n"); + goto fail3; + } + + pm_runtime_set_active(&client->dev); + + apds990x_configure(chip); + apds990x_set_arate(chip, APDS_LUX_DEFAULT_RATE); + apds990x_mode_on(chip); + + pm_runtime_enable(&client->dev); + + if (chip->pdata->setup_resources) { + err = chip->pdata->setup_resources(); + if (err) { + err = -EINVAL; + goto fail3; + } + } + + err = sysfs_create_group(&chip->client->dev.kobj, + apds990x_attribute_group); + if (err < 0) { + dev_err(&chip->client->dev, "Sysfs registration failed\n"); + goto fail4; + } + + err = request_threaded_irq(client->irq, NULL, + apds990x_irq, + IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW | + IRQF_ONESHOT, + "apds990x", chip); + if (err) { + dev_err(&client->dev, "could not get IRQ %d\n", + client->irq); + goto fail5; + } + return err; +fail5: + sysfs_remove_group(&chip->client->dev.kobj, + &apds990x_attribute_group[0]); +fail4: + if (chip->pdata && chip->pdata->release_resources) + chip->pdata->release_resources(); +fail3: + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); +fail2: + regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs); +fail1: + kfree(chip); + return err; +} + +static int apds990x_remove(struct i2c_client *client) +{ + struct apds990x_chip *chip = i2c_get_clientdata(client); + + free_irq(client->irq, chip); + sysfs_remove_group(&chip->client->dev.kobj, + apds990x_attribute_group); + + if (chip->pdata && chip->pdata->release_resources) + chip->pdata->release_resources(); + + if (!pm_runtime_suspended(&client->dev)) + apds990x_chip_off(chip); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs); + + kfree(chip); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int apds990x_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + apds990x_chip_off(chip); + return 0; +} + +static int apds990x_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + /* + * If we were enabled at suspend time, it is expected + * everything works nice and smoothly. Chip_on is enough + */ + apds990x_chip_on(chip); + + return 0; +} +#endif + +#ifdef CONFIG_PM +static int apds990x_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + apds990x_chip_off(chip); + return 0; +} + +static int apds990x_runtime_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + apds990x_chip_on(chip); + return 0; +} + +#endif + +static const struct i2c_device_id apds990x_id[] = { + {"apds990x", 0 }, + {} +}; + +MODULE_DEVICE_TABLE(i2c, apds990x_id); + +static const struct dev_pm_ops apds990x_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(apds990x_suspend, apds990x_resume) + SET_RUNTIME_PM_OPS(apds990x_runtime_suspend, + apds990x_runtime_resume, + NULL) +}; + +static struct i2c_driver apds990x_driver = { + .driver = { + .name = "apds990x", + .owner = THIS_MODULE, + .pm = &apds990x_pm_ops, + }, + .probe = apds990x_probe, + .remove = apds990x_remove, + .id_table = apds990x_id, +}; + +module_i2c_driver(apds990x_driver); + +MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor"); +MODULE_AUTHOR("Samu Onkalo, Nokia Corporation"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/arm-charlcd.c b/kernel/drivers/misc/arm-charlcd.c new file mode 100644 index 000000000..c65b5ea5d --- /dev/null +++ b/kernel/drivers/misc/arm-charlcd.c @@ -0,0 +1,388 @@ +/* + * Driver for the on-board character LCD found on some ARM reference boards + * This is basically an Hitachi HD44780 LCD with a custom IP block to drive it + * http://en.wikipedia.org/wiki/HD44780_Character_LCD + * Currently it will just display the text "ARM Linux" and the linux version + * + * License terms: GNU General Public License (GPL) version 2 + * Author: Linus Walleij <triad@df.lth.se> + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/workqueue.h> +#include <generated/utsrelease.h> + +#define DRIVERNAME "arm-charlcd" +#define CHARLCD_TIMEOUT (msecs_to_jiffies(1000)) + +/* Offsets to registers */ +#define CHAR_COM 0x00U +#define CHAR_DAT 0x04U +#define CHAR_RD 0x08U +#define CHAR_RAW 0x0CU +#define CHAR_MASK 0x10U +#define CHAR_STAT 0x14U + +#define CHAR_RAW_CLEAR 0x00000000U +#define CHAR_RAW_VALID 0x00000100U + +/* Hitachi HD44780 display commands */ +#define HD_CLEAR 0x01U +#define HD_HOME 0x02U +#define HD_ENTRYMODE 0x04U +#define HD_ENTRYMODE_INCREMENT 0x02U +#define HD_ENTRYMODE_SHIFT 0x01U +#define HD_DISPCTRL 0x08U +#define HD_DISPCTRL_ON 0x04U +#define HD_DISPCTRL_CURSOR_ON 0x02U +#define HD_DISPCTRL_CURSOR_BLINK 0x01U +#define HD_CRSR_SHIFT 0x10U +#define HD_CRSR_SHIFT_DISPLAY 0x08U +#define HD_CRSR_SHIFT_DISPLAY_RIGHT 0x04U +#define HD_FUNCSET 0x20U +#define HD_FUNCSET_8BIT 0x10U +#define HD_FUNCSET_2_LINES 0x08U +#define HD_FUNCSET_FONT_5X10 0x04U +#define HD_SET_CGRAM 0x40U +#define HD_SET_DDRAM 0x80U +#define HD_BUSY_FLAG 0x80U + +/** + * @dev: a pointer back to containing device + * @phybase: the offset to the controller in physical memory + * @physize: the size of the physical page + * @virtbase: the offset to the controller in virtual memory + * @irq: reserved interrupt number + * @complete: completion structure for the last LCD command + */ +struct charlcd { + struct device *dev; + u32 phybase; + u32 physize; + void __iomem *virtbase; + int irq; + struct completion complete; + struct delayed_work init_work; +}; + +static irqreturn_t charlcd_interrupt(int irq, void *data) +{ + struct charlcd *lcd = data; + u8 status; + + status = readl(lcd->virtbase + CHAR_STAT) & 0x01; + /* Clear IRQ */ + writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW); + if (status) + complete(&lcd->complete); + else + dev_info(lcd->dev, "Spurious IRQ (%02x)\n", status); + return IRQ_HANDLED; +} + + +static void charlcd_wait_complete_irq(struct charlcd *lcd) +{ + int ret; + + ret = wait_for_completion_interruptible_timeout(&lcd->complete, + CHARLCD_TIMEOUT); + /* Disable IRQ after completion */ + writel(0x00, lcd->virtbase + CHAR_MASK); + + if (ret < 0) { + dev_err(lcd->dev, + "wait_for_completion_interruptible_timeout() " + "returned %d waiting for ready\n", ret); + return; + } + + if (ret == 0) { + dev_err(lcd->dev, "charlcd controller timed out " + "waiting for ready\n"); + return; + } +} + +static u8 charlcd_4bit_read_char(struct charlcd *lcd) +{ + u8 data; + u32 val; + int i; + + /* If we can, use an IRQ to wait for the data, else poll */ + if (lcd->irq >= 0) + charlcd_wait_complete_irq(lcd); + else { + i = 0; + val = 0; + while (!(val & CHAR_RAW_VALID) && i < 10) { + udelay(100); + val = readl(lcd->virtbase + CHAR_RAW); + i++; + } + + writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW); + } + msleep(1); + + /* Read the 4 high bits of the data */ + data = readl(lcd->virtbase + CHAR_RD) & 0xf0; + + /* + * The second read for the low bits does not trigger an IRQ + * so in this case we have to poll for the 4 lower bits + */ + i = 0; + val = 0; + while (!(val & CHAR_RAW_VALID) && i < 10) { + udelay(100); + val = readl(lcd->virtbase + CHAR_RAW); + i++; + } + writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW); + msleep(1); + + /* Read the 4 low bits of the data */ + data |= (readl(lcd->virtbase + CHAR_RD) >> 4) & 0x0f; + + return data; +} + +static bool charlcd_4bit_read_bf(struct charlcd *lcd) +{ + if (lcd->irq >= 0) { + /* + * If we'll use IRQs to wait for the busyflag, clear any + * pending flag and enable IRQ + */ + writel(CHAR_RAW_CLEAR, lcd->virtbase + CHAR_RAW); + init_completion(&lcd->complete); + writel(0x01, lcd->virtbase + CHAR_MASK); + } + readl(lcd->virtbase + CHAR_COM); + return charlcd_4bit_read_char(lcd) & HD_BUSY_FLAG ? true : false; +} + +static void charlcd_4bit_wait_busy(struct charlcd *lcd) +{ + int retries = 50; + + udelay(100); + while (charlcd_4bit_read_bf(lcd) && retries) + retries--; + if (!retries) + dev_err(lcd->dev, "timeout waiting for busyflag\n"); +} + +static void charlcd_4bit_command(struct charlcd *lcd, u8 cmd) +{ + u32 cmdlo = (cmd << 4) & 0xf0; + u32 cmdhi = (cmd & 0xf0); + + writel(cmdhi, lcd->virtbase + CHAR_COM); + udelay(10); + writel(cmdlo, lcd->virtbase + CHAR_COM); + charlcd_4bit_wait_busy(lcd); +} + +static void charlcd_4bit_char(struct charlcd *lcd, u8 ch) +{ + u32 chlo = (ch << 4) & 0xf0; + u32 chhi = (ch & 0xf0); + + writel(chhi, lcd->virtbase + CHAR_DAT); + udelay(10); + writel(chlo, lcd->virtbase + CHAR_DAT); + charlcd_4bit_wait_busy(lcd); +} + +static void charlcd_4bit_print(struct charlcd *lcd, int line, const char *str) +{ + u8 offset; + int i; + + /* + * We support line 0, 1 + * Line 1 runs from 0x00..0x27 + * Line 2 runs from 0x28..0x4f + */ + if (line == 0) + offset = 0; + else if (line == 1) + offset = 0x28; + else + return; + + /* Set offset */ + charlcd_4bit_command(lcd, HD_SET_DDRAM | offset); + + /* Send string */ + for (i = 0; i < strlen(str) && i < 0x28; i++) + charlcd_4bit_char(lcd, str[i]); +} + +static void charlcd_4bit_init(struct charlcd *lcd) +{ + /* These commands cannot be checked with the busy flag */ + writel(HD_FUNCSET | HD_FUNCSET_8BIT, lcd->virtbase + CHAR_COM); + msleep(5); + writel(HD_FUNCSET | HD_FUNCSET_8BIT, lcd->virtbase + CHAR_COM); + udelay(100); + writel(HD_FUNCSET | HD_FUNCSET_8BIT, lcd->virtbase + CHAR_COM); + udelay(100); + /* Go to 4bit mode */ + writel(HD_FUNCSET, lcd->virtbase + CHAR_COM); + udelay(100); + /* + * 4bit mode, 2 lines, 5x8 font, after this the number of lines + * and the font cannot be changed until the next initialization sequence + */ + charlcd_4bit_command(lcd, HD_FUNCSET | HD_FUNCSET_2_LINES); + charlcd_4bit_command(lcd, HD_DISPCTRL | HD_DISPCTRL_ON); + charlcd_4bit_command(lcd, HD_ENTRYMODE | HD_ENTRYMODE_INCREMENT); + charlcd_4bit_command(lcd, HD_CLEAR); + charlcd_4bit_command(lcd, HD_HOME); + /* Put something useful in the display */ + charlcd_4bit_print(lcd, 0, "ARM Linux"); + charlcd_4bit_print(lcd, 1, UTS_RELEASE); +} + +static void charlcd_init_work(struct work_struct *work) +{ + struct charlcd *lcd = + container_of(work, struct charlcd, init_work.work); + + charlcd_4bit_init(lcd); +} + +static int __init charlcd_probe(struct platform_device *pdev) +{ + int ret; + struct charlcd *lcd; + struct resource *res; + + lcd = kzalloc(sizeof(struct charlcd), GFP_KERNEL); + if (!lcd) + return -ENOMEM; + + lcd->dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -ENOENT; + goto out_no_resource; + } + lcd->phybase = res->start; + lcd->physize = resource_size(res); + + if (request_mem_region(lcd->phybase, lcd->physize, + DRIVERNAME) == NULL) { + ret = -EBUSY; + goto out_no_memregion; + } + + lcd->virtbase = ioremap(lcd->phybase, lcd->physize); + if (!lcd->virtbase) { + ret = -ENOMEM; + goto out_no_memregion; + } + + lcd->irq = platform_get_irq(pdev, 0); + /* If no IRQ is supplied, we'll survive without it */ + if (lcd->irq >= 0) { + if (request_irq(lcd->irq, charlcd_interrupt, 0, + DRIVERNAME, lcd)) { + ret = -EIO; + goto out_no_irq; + } + } + + platform_set_drvdata(pdev, lcd); + + /* + * Initialize the display in a delayed work, because + * it is VERY slow and would slow down the boot of the system. + */ + INIT_DELAYED_WORK(&lcd->init_work, charlcd_init_work); + schedule_delayed_work(&lcd->init_work, 0); + + dev_info(&pdev->dev, "initialized ARM character LCD at %08x\n", + lcd->phybase); + + return 0; + +out_no_irq: + iounmap(lcd->virtbase); +out_no_memregion: + release_mem_region(lcd->phybase, SZ_4K); +out_no_resource: + kfree(lcd); + return ret; +} + +static int __exit charlcd_remove(struct platform_device *pdev) +{ + struct charlcd *lcd = platform_get_drvdata(pdev); + + if (lcd) { + free_irq(lcd->irq, lcd); + iounmap(lcd->virtbase); + release_mem_region(lcd->phybase, lcd->physize); + kfree(lcd); + } + + return 0; +} + +static int charlcd_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct charlcd *lcd = platform_get_drvdata(pdev); + + /* Power the display off */ + charlcd_4bit_command(lcd, HD_DISPCTRL); + return 0; +} + +static int charlcd_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct charlcd *lcd = platform_get_drvdata(pdev); + + /* Turn the display back on */ + charlcd_4bit_command(lcd, HD_DISPCTRL | HD_DISPCTRL_ON); + return 0; +} + +static const struct dev_pm_ops charlcd_pm_ops = { + .suspend = charlcd_suspend, + .resume = charlcd_resume, +}; + +static const struct of_device_id charlcd_match[] = { + { .compatible = "arm,versatile-lcd", }, + {} +}; + +static struct platform_driver charlcd_driver = { + .driver = { + .name = DRIVERNAME, + .pm = &charlcd_pm_ops, + .of_match_table = of_match_ptr(charlcd_match), + }, + .remove = __exit_p(charlcd_remove), +}; + +module_platform_driver_probe(charlcd_driver, charlcd_probe); + +MODULE_AUTHOR("Linus Walleij <triad@df.lth.se>"); +MODULE_DESCRIPTION("ARM Character LCD Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/atmel-ssc.c b/kernel/drivers/misc/atmel-ssc.c new file mode 100644 index 000000000..e11a0bd6c --- /dev/null +++ b/kernel/drivers/misc/atmel-ssc.c @@ -0,0 +1,234 @@ +/* + * Atmel SSC driver + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/platform_device.h> +#include <linux/list.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/atmel-ssc.h> +#include <linux/slab.h> +#include <linux/module.h> + +#include <linux/of.h> + +/* Serialize access to ssc_list and user count */ +static DEFINE_SPINLOCK(user_lock); +static LIST_HEAD(ssc_list); + +struct ssc_device *ssc_request(unsigned int ssc_num) +{ + int ssc_valid = 0; + struct ssc_device *ssc; + + spin_lock(&user_lock); + list_for_each_entry(ssc, &ssc_list, list) { + if (ssc->pdev->dev.of_node) { + if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc") + == ssc_num) { + ssc_valid = 1; + break; + } + } else if (ssc->pdev->id == ssc_num) { + ssc_valid = 1; + break; + } + } + + if (!ssc_valid) { + spin_unlock(&user_lock); + pr_err("ssc: ssc%d platform device is missing\n", ssc_num); + return ERR_PTR(-ENODEV); + } + + if (ssc->user) { + spin_unlock(&user_lock); + dev_dbg(&ssc->pdev->dev, "module busy\n"); + return ERR_PTR(-EBUSY); + } + ssc->user++; + spin_unlock(&user_lock); + + clk_prepare(ssc->clk); + + return ssc; +} +EXPORT_SYMBOL(ssc_request); + +void ssc_free(struct ssc_device *ssc) +{ + bool disable_clk = true; + + spin_lock(&user_lock); + if (ssc->user) + ssc->user--; + else { + disable_clk = false; + dev_dbg(&ssc->pdev->dev, "device already free\n"); + } + spin_unlock(&user_lock); + + if (disable_clk) + clk_unprepare(ssc->clk); +} +EXPORT_SYMBOL(ssc_free); + +static struct atmel_ssc_platform_data at91rm9200_config = { + .use_dma = 0, + .has_fslen_ext = 0, +}; + +static struct atmel_ssc_platform_data at91sam9rl_config = { + .use_dma = 0, + .has_fslen_ext = 1, +}; + +static struct atmel_ssc_platform_data at91sam9g45_config = { + .use_dma = 1, + .has_fslen_ext = 1, +}; + +static const struct platform_device_id atmel_ssc_devtypes[] = { + { + .name = "at91rm9200_ssc", + .driver_data = (unsigned long) &at91rm9200_config, + }, { + .name = "at91sam9rl_ssc", + .driver_data = (unsigned long) &at91sam9rl_config, + }, { + .name = "at91sam9g45_ssc", + .driver_data = (unsigned long) &at91sam9g45_config, + }, { + /* sentinel */ + } +}; + +#ifdef CONFIG_OF +static const struct of_device_id atmel_ssc_dt_ids[] = { + { + .compatible = "atmel,at91rm9200-ssc", + .data = &at91rm9200_config, + }, { + .compatible = "atmel,at91sam9rl-ssc", + .data = &at91sam9rl_config, + }, { + .compatible = "atmel,at91sam9g45-ssc", + .data = &at91sam9g45_config, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids); +#endif + +static inline const struct atmel_ssc_platform_data * __init + atmel_ssc_get_driver_data(struct platform_device *pdev) +{ + if (pdev->dev.of_node) { + const struct of_device_id *match; + match = of_match_node(atmel_ssc_dt_ids, pdev->dev.of_node); + if (match == NULL) + return NULL; + return match->data; + } + + return (struct atmel_ssc_platform_data *) + platform_get_device_id(pdev)->driver_data; +} + +static int ssc_probe(struct platform_device *pdev) +{ + struct resource *regs; + struct ssc_device *ssc; + const struct atmel_ssc_platform_data *plat_dat; + + ssc = devm_kzalloc(&pdev->dev, sizeof(struct ssc_device), GFP_KERNEL); + if (!ssc) { + dev_dbg(&pdev->dev, "out of memory\n"); + return -ENOMEM; + } + + ssc->pdev = pdev; + + plat_dat = atmel_ssc_get_driver_data(pdev); + if (!plat_dat) + return -ENODEV; + ssc->pdata = (struct atmel_ssc_platform_data *)plat_dat; + + if (pdev->dev.of_node) { + struct device_node *np = pdev->dev.of_node; + ssc->clk_from_rk_pin = + of_property_read_bool(np, "atmel,clk-from-rk-pin"); + } + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ssc->regs = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(ssc->regs)) + return PTR_ERR(ssc->regs); + + ssc->phybase = regs->start; + + ssc->clk = devm_clk_get(&pdev->dev, "pclk"); + if (IS_ERR(ssc->clk)) { + dev_dbg(&pdev->dev, "no pclk clock defined\n"); + return -ENXIO; + } + + /* disable all interrupts */ + clk_prepare_enable(ssc->clk); + ssc_writel(ssc->regs, IDR, -1); + ssc_readl(ssc->regs, SR); + clk_disable_unprepare(ssc->clk); + + ssc->irq = platform_get_irq(pdev, 0); + if (!ssc->irq) { + dev_dbg(&pdev->dev, "could not get irq\n"); + return -ENXIO; + } + + spin_lock(&user_lock); + list_add_tail(&ssc->list, &ssc_list); + spin_unlock(&user_lock); + + platform_set_drvdata(pdev, ssc); + + dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n", + ssc->regs, ssc->irq); + + return 0; +} + +static int ssc_remove(struct platform_device *pdev) +{ + struct ssc_device *ssc = platform_get_drvdata(pdev); + + spin_lock(&user_lock); + list_del(&ssc->list); + spin_unlock(&user_lock); + + return 0; +} + +static struct platform_driver ssc_driver = { + .driver = { + .name = "ssc", + .of_match_table = of_match_ptr(atmel_ssc_dt_ids), + }, + .id_table = atmel_ssc_devtypes, + .probe = ssc_probe, + .remove = ssc_remove, +}; +module_platform_driver(ssc_driver); + +MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>"); +MODULE_DESCRIPTION("SSC driver for Atmel AVR32 and AT91"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ssc"); diff --git a/kernel/drivers/misc/atmel_tclib.c b/kernel/drivers/misc/atmel_tclib.c new file mode 100644 index 000000000..0ca05c3ec --- /dev/null +++ b/kernel/drivers/misc/atmel_tclib.c @@ -0,0 +1,194 @@ +#include <linux/atmel_tc.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/of.h> + +/* + * This is a thin library to solve the problem of how to portably allocate + * one of the TC blocks. For simplicity, it doesn't currently expect to + * share individual timers between different drivers. + */ + +#if defined(CONFIG_AVR32) +/* AVR32 has these divide PBB */ +const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, }; +EXPORT_SYMBOL(atmel_tc_divisors); + +#elif defined(CONFIG_ARCH_AT91) +/* AT91 has these divide MCK */ +const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, }; +EXPORT_SYMBOL(atmel_tc_divisors); + +#endif + +static DEFINE_SPINLOCK(tc_list_lock); +static LIST_HEAD(tc_list); + +/** + * atmel_tc_alloc - allocate a specified TC block + * @block: which block to allocate + * + * Caller allocates a block. If it is available, a pointer to a + * pre-initialized struct atmel_tc is returned. The caller can access + * the registers directly through the "regs" field. + */ +struct atmel_tc *atmel_tc_alloc(unsigned block) +{ + struct atmel_tc *tc; + struct platform_device *pdev = NULL; + + spin_lock(&tc_list_lock); + list_for_each_entry(tc, &tc_list, node) { + if (tc->allocated) + continue; + + if ((tc->pdev->dev.of_node && tc->id == block) || + (tc->pdev->id == block)) { + pdev = tc->pdev; + tc->allocated = true; + break; + } + } + spin_unlock(&tc_list_lock); + + return pdev ? tc : NULL; +} +EXPORT_SYMBOL_GPL(atmel_tc_alloc); + +/** + * atmel_tc_free - release a specified TC block + * @tc: Timer/counter block that was returned by atmel_tc_alloc() + * + * This reverses the effect of atmel_tc_alloc(), invalidating the resource + * returned by that routine and making the TC available to other drivers. + */ +void atmel_tc_free(struct atmel_tc *tc) +{ + spin_lock(&tc_list_lock); + if (tc->allocated) + tc->allocated = false; + spin_unlock(&tc_list_lock); +} +EXPORT_SYMBOL_GPL(atmel_tc_free); + +#if defined(CONFIG_OF) +static struct atmel_tcb_config tcb_rm9200_config = { + .counter_width = 16, +}; + +static struct atmel_tcb_config tcb_sam9x5_config = { + .counter_width = 32, +}; + +static const struct of_device_id atmel_tcb_dt_ids[] = { + { + .compatible = "atmel,at91rm9200-tcb", + .data = &tcb_rm9200_config, + }, { + .compatible = "atmel,at91sam9x5-tcb", + .data = &tcb_sam9x5_config, + }, { + /* sentinel */ + } +}; + +MODULE_DEVICE_TABLE(of, atmel_tcb_dt_ids); +#endif + +static int __init tc_probe(struct platform_device *pdev) +{ + struct atmel_tc *tc; + struct clk *clk; + int irq; + struct resource *r; + unsigned int i; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -EINVAL; + + tc = devm_kzalloc(&pdev->dev, sizeof(struct atmel_tc), GFP_KERNEL); + if (!tc) + return -ENOMEM; + + tc->pdev = pdev; + + clk = devm_clk_get(&pdev->dev, "t0_clk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tc->regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(tc->regs)) + return PTR_ERR(tc->regs); + + /* Now take SoC information if available */ + if (pdev->dev.of_node) { + const struct of_device_id *match; + match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node); + if (match) + tc->tcb_config = match->data; + + tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb"); + } else { + tc->id = pdev->id; + } + + tc->clk[0] = clk; + tc->clk[1] = devm_clk_get(&pdev->dev, "t1_clk"); + if (IS_ERR(tc->clk[1])) + tc->clk[1] = clk; + tc->clk[2] = devm_clk_get(&pdev->dev, "t2_clk"); + if (IS_ERR(tc->clk[2])) + tc->clk[2] = clk; + + tc->irq[0] = irq; + tc->irq[1] = platform_get_irq(pdev, 1); + if (tc->irq[1] < 0) + tc->irq[1] = irq; + tc->irq[2] = platform_get_irq(pdev, 2); + if (tc->irq[2] < 0) + tc->irq[2] = irq; + + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); + + spin_lock(&tc_list_lock); + list_add_tail(&tc->node, &tc_list); + spin_unlock(&tc_list_lock); + + platform_set_drvdata(pdev, tc); + + return 0; +} + +static void tc_shutdown(struct platform_device *pdev) +{ + int i; + struct atmel_tc *tc = platform_get_drvdata(pdev); + + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); +} + +static struct platform_driver tc_driver = { + .driver = { + .name = "atmel_tcb", + .of_match_table = of_match_ptr(atmel_tcb_dt_ids), + }, + .shutdown = tc_shutdown, +}; + +static int __init tc_init(void) +{ + return platform_driver_probe(&tc_driver, tc_probe); +} +arch_initcall(tc_init); diff --git a/kernel/drivers/misc/bh1770glc.c b/kernel/drivers/misc/bh1770glc.c new file mode 100644 index 000000000..b756381b8 --- /dev/null +++ b/kernel/drivers/misc/bh1770glc.c @@ -0,0 +1,1411 @@ +/* + * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo <samu.p.onkalo@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/i2c/bh1770glc.h> +#include <linux/regulator/consumer.h> +#include <linux/pm_runtime.h> +#include <linux/workqueue.h> +#include <linux/delay.h> +#include <linux/wait.h> +#include <linux/slab.h> + +#define BH1770_ALS_CONTROL 0x80 /* ALS operation mode control */ +#define BH1770_PS_CONTROL 0x81 /* PS operation mode control */ +#define BH1770_I_LED 0x82 /* active LED and LED1, LED2 current */ +#define BH1770_I_LED3 0x83 /* LED3 current setting */ +#define BH1770_ALS_PS_MEAS 0x84 /* Forced mode trigger */ +#define BH1770_PS_MEAS_RATE 0x85 /* PS meas. rate at stand alone mode */ +#define BH1770_ALS_MEAS_RATE 0x86 /* ALS meas. rate at stand alone mode */ +#define BH1770_PART_ID 0x8a /* Part number and revision ID */ +#define BH1770_MANUFACT_ID 0x8b /* Manufacturerer ID */ +#define BH1770_ALS_DATA_0 0x8c /* ALS DATA low byte */ +#define BH1770_ALS_DATA_1 0x8d /* ALS DATA high byte */ +#define BH1770_ALS_PS_STATUS 0x8e /* Measurement data and int status */ +#define BH1770_PS_DATA_LED1 0x8f /* PS data from LED1 */ +#define BH1770_PS_DATA_LED2 0x90 /* PS data from LED2 */ +#define BH1770_PS_DATA_LED3 0x91 /* PS data from LED3 */ +#define BH1770_INTERRUPT 0x92 /* Interrupt setting */ +#define BH1770_PS_TH_LED1 0x93 /* PS interrupt threshold for LED1 */ +#define BH1770_PS_TH_LED2 0x94 /* PS interrupt threshold for LED2 */ +#define BH1770_PS_TH_LED3 0x95 /* PS interrupt threshold for LED3 */ +#define BH1770_ALS_TH_UP_0 0x96 /* ALS upper threshold low byte */ +#define BH1770_ALS_TH_UP_1 0x97 /* ALS upper threshold high byte */ +#define BH1770_ALS_TH_LOW_0 0x98 /* ALS lower threshold low byte */ +#define BH1770_ALS_TH_LOW_1 0x99 /* ALS lower threshold high byte */ + +/* MANUFACT_ID */ +#define BH1770_MANUFACT_ROHM 0x01 +#define BH1770_MANUFACT_OSRAM 0x03 + +/* PART_ID */ +#define BH1770_PART 0x90 +#define BH1770_PART_MASK 0xf0 +#define BH1770_REV_MASK 0x0f +#define BH1770_REV_SHIFT 0 +#define BH1770_REV_0 0x00 +#define BH1770_REV_1 0x01 + +/* Operating modes for both */ +#define BH1770_STANDBY 0x00 +#define BH1770_FORCED 0x02 +#define BH1770_STANDALONE 0x03 +#define BH1770_SWRESET (0x01 << 2) + +#define BH1770_PS_TRIG_MEAS (1 << 0) +#define BH1770_ALS_TRIG_MEAS (1 << 1) + +/* Interrupt control */ +#define BH1770_INT_OUTPUT_MODE (1 << 3) /* 0 = latched */ +#define BH1770_INT_POLARITY (1 << 2) /* 1 = active high */ +#define BH1770_INT_ALS_ENA (1 << 1) +#define BH1770_INT_PS_ENA (1 << 0) + +/* Interrupt status */ +#define BH1770_INT_LED1_DATA (1 << 0) +#define BH1770_INT_LED1_INT (1 << 1) +#define BH1770_INT_LED2_DATA (1 << 2) +#define BH1770_INT_LED2_INT (1 << 3) +#define BH1770_INT_LED3_DATA (1 << 4) +#define BH1770_INT_LED3_INT (1 << 5) +#define BH1770_INT_LEDS_INT ((1 << 1) | (1 << 3) | (1 << 5)) +#define BH1770_INT_ALS_DATA (1 << 6) +#define BH1770_INT_ALS_INT (1 << 7) + +/* Led channels */ +#define BH1770_LED1 0x00 + +#define BH1770_DISABLE 0 +#define BH1770_ENABLE 1 +#define BH1770_PROX_CHANNELS 1 + +#define BH1770_LUX_DEFAULT_RATE 1 /* Index to lux rate table */ +#define BH1770_PROX_DEFAULT_RATE 1 /* Direct HW value =~ 50Hz */ +#define BH1770_PROX_DEF_RATE_THRESH 6 /* Direct HW value =~ 5 Hz */ +#define BH1770_STARTUP_DELAY 50 +#define BH1770_RESET_TIME 10 +#define BH1770_TIMEOUT 2100 /* Timeout in 2.1 seconds */ + +#define BH1770_LUX_RANGE 65535 +#define BH1770_PROX_RANGE 255 +#define BH1770_COEF_SCALER 1024 +#define BH1770_CALIB_SCALER 8192 +#define BH1770_LUX_NEUTRAL_CALIB_VALUE (1 * BH1770_CALIB_SCALER) +#define BH1770_LUX_DEF_THRES 1000 +#define BH1770_PROX_DEF_THRES 70 +#define BH1770_PROX_DEF_ABS_THRES 100 +#define BH1770_DEFAULT_PERSISTENCE 10 +#define BH1770_PROX_MAX_PERSISTENCE 50 +#define BH1770_LUX_GA_SCALE 16384 +#define BH1770_LUX_CF_SCALE 2048 /* CF ChipFactor */ +#define BH1770_NEUTRAL_CF BH1770_LUX_CF_SCALE +#define BH1770_LUX_CORR_SCALE 4096 + +#define PROX_ABOVE_THRESHOLD 1 +#define PROX_BELOW_THRESHOLD 0 + +#define PROX_IGNORE_LUX_LIMIT 500 + +struct bh1770_chip { + struct bh1770_platform_data *pdata; + char chipname[10]; + u8 revision; + struct i2c_client *client; + struct regulator_bulk_data regs[2]; + struct mutex mutex; /* avoid parallel access */ + wait_queue_head_t wait; + + bool int_mode_prox; + bool int_mode_lux; + struct delayed_work prox_work; + u32 lux_cf; /* Chip specific factor */ + u32 lux_ga; + u32 lux_calib; + int lux_rate_index; + u32 lux_corr; + u16 lux_data_raw; + u16 lux_threshold_hi; + u16 lux_threshold_lo; + u16 lux_thres_hi_onchip; + u16 lux_thres_lo_onchip; + bool lux_wait_result; + + int prox_enable_count; + u16 prox_coef; + u16 prox_const; + int prox_rate; + int prox_rate_threshold; + u8 prox_persistence; + u8 prox_persistence_counter; + u8 prox_data; + u8 prox_threshold; + u8 prox_threshold_hw; + bool prox_force_update; + u8 prox_abs_thres; + u8 prox_led; +}; + +static const char reg_vcc[] = "Vcc"; +static const char reg_vleds[] = "Vleds"; + +/* + * Supported stand alone rates in ms from chip data sheet + * {10, 20, 30, 40, 70, 100, 200, 500, 1000, 2000}; + */ +static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2}; +static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500}; + +/* Supported IR-led currents in mA */ +static const u8 prox_curr_ma[] = {5, 10, 20, 50, 100, 150, 200}; + +/* + * Supported stand alone rates in ms from chip data sheet + * {100, 200, 500, 1000, 2000}; + */ +static const s16 lux_rates_hz[] = {10, 5, 2, 1, 0}; + +/* + * interrupt control functions are called while keeping chip->mutex + * excluding module probe / remove + */ +static inline int bh1770_lux_interrupt_control(struct bh1770_chip *chip, + int lux) +{ + chip->int_mode_lux = lux; + /* Set interrupt modes, interrupt active low, latched */ + return i2c_smbus_write_byte_data(chip->client, + BH1770_INTERRUPT, + (lux << 1) | chip->int_mode_prox); +} + +static inline int bh1770_prox_interrupt_control(struct bh1770_chip *chip, + int ps) +{ + chip->int_mode_prox = ps; + return i2c_smbus_write_byte_data(chip->client, + BH1770_INTERRUPT, + (chip->int_mode_lux << 1) | (ps << 0)); +} + +/* chip->mutex is always kept here */ +static int bh1770_lux_rate(struct bh1770_chip *chip, int rate_index) +{ + /* sysfs may call this when the chip is powered off */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + /* Proper proximity response needs fastest lux rate (100ms) */ + if (chip->prox_enable_count) + rate_index = 0; + + return i2c_smbus_write_byte_data(chip->client, + BH1770_ALS_MEAS_RATE, + rate_index); +} + +static int bh1770_prox_rate(struct bh1770_chip *chip, int mode) +{ + int rate; + + rate = (mode == PROX_ABOVE_THRESHOLD) ? + chip->prox_rate_threshold : chip->prox_rate; + + return i2c_smbus_write_byte_data(chip->client, + BH1770_PS_MEAS_RATE, + rate); +} + +/* InfraredLED is controlled by the chip during proximity scanning */ +static inline int bh1770_led_cfg(struct bh1770_chip *chip) +{ + /* LED cfg, current for leds 1 and 2 */ + return i2c_smbus_write_byte_data(chip->client, + BH1770_I_LED, + (BH1770_LED1 << 6) | + (BH1770_LED_5mA << 3) | + chip->prox_led); +} + +/* + * Following two functions converts raw ps values from HW to normalized + * values. Purpose is to compensate differences between different sensor + * versions and variants so that result means about the same between + * versions. + */ +static inline u8 bh1770_psraw_to_adjusted(struct bh1770_chip *chip, u8 psraw) +{ + u16 adjusted; + adjusted = (u16)(((u32)(psraw + chip->prox_const) * chip->prox_coef) / + BH1770_COEF_SCALER); + if (adjusted > BH1770_PROX_RANGE) + adjusted = BH1770_PROX_RANGE; + return adjusted; +} + +static inline u8 bh1770_psadjusted_to_raw(struct bh1770_chip *chip, u8 ps) +{ + u16 raw; + + raw = (((u32)ps * BH1770_COEF_SCALER) / chip->prox_coef); + if (raw > chip->prox_const) + raw = raw - chip->prox_const; + else + raw = 0; + return raw; +} + +/* + * Following two functions converts raw lux values from HW to normalized + * values. Purpose is to compensate differences between different sensor + * versions and variants so that result means about the same between + * versions. Chip->mutex is kept when this is called. + */ +static int bh1770_prox_set_threshold(struct bh1770_chip *chip) +{ + u8 tmp = 0; + + /* sysfs may call this when the chip is powered off */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + tmp = bh1770_psadjusted_to_raw(chip, chip->prox_threshold); + chip->prox_threshold_hw = tmp; + + return i2c_smbus_write_byte_data(chip->client, BH1770_PS_TH_LED1, + tmp); +} + +static inline u16 bh1770_lux_raw_to_adjusted(struct bh1770_chip *chip, u16 raw) +{ + u32 lux; + lux = ((u32)raw * chip->lux_corr) / BH1770_LUX_CORR_SCALE; + return min(lux, (u32)BH1770_LUX_RANGE); +} + +static inline u16 bh1770_lux_adjusted_to_raw(struct bh1770_chip *chip, + u16 adjusted) +{ + return (u32)adjusted * BH1770_LUX_CORR_SCALE / chip->lux_corr; +} + +/* chip->mutex is kept when this is called */ +static int bh1770_lux_update_thresholds(struct bh1770_chip *chip, + u16 threshold_hi, u16 threshold_lo) +{ + u8 data[4]; + int ret; + + /* sysfs may call this when the chip is powered off */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + /* + * Compensate threshold values with the correction factors if not + * set to minimum or maximum. + * Min & max values disables interrupts. + */ + if (threshold_hi != BH1770_LUX_RANGE && threshold_hi != 0) + threshold_hi = bh1770_lux_adjusted_to_raw(chip, threshold_hi); + + if (threshold_lo != BH1770_LUX_RANGE && threshold_lo != 0) + threshold_lo = bh1770_lux_adjusted_to_raw(chip, threshold_lo); + + if (chip->lux_thres_hi_onchip == threshold_hi && + chip->lux_thres_lo_onchip == threshold_lo) + return 0; + + chip->lux_thres_hi_onchip = threshold_hi; + chip->lux_thres_lo_onchip = threshold_lo; + + data[0] = threshold_hi; + data[1] = threshold_hi >> 8; + data[2] = threshold_lo; + data[3] = threshold_lo >> 8; + + ret = i2c_smbus_write_i2c_block_data(chip->client, + BH1770_ALS_TH_UP_0, + ARRAY_SIZE(data), + data); + return ret; +} + +static int bh1770_lux_get_result(struct bh1770_chip *chip) +{ + u16 data; + int ret; + + ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_0); + if (ret < 0) + return ret; + + data = ret & 0xff; + ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_1); + if (ret < 0) + return ret; + + chip->lux_data_raw = data | ((ret & 0xff) << 8); + + return 0; +} + +/* Calculate correction value which contains chip and device specific parts */ +static u32 bh1770_get_corr_value(struct bh1770_chip *chip) +{ + u32 tmp; + /* Impact of glass attenuation correction */ + tmp = (BH1770_LUX_CORR_SCALE * chip->lux_ga) / BH1770_LUX_GA_SCALE; + /* Impact of chip factor correction */ + tmp = (tmp * chip->lux_cf) / BH1770_LUX_CF_SCALE; + /* Impact of Device specific calibration correction */ + tmp = (tmp * chip->lux_calib) / BH1770_CALIB_SCALER; + return tmp; +} + +static int bh1770_lux_read_result(struct bh1770_chip *chip) +{ + bh1770_lux_get_result(chip); + return bh1770_lux_raw_to_adjusted(chip, chip->lux_data_raw); +} + +/* + * Chip on / off functions are called while keeping mutex except probe + * or remove phase + */ +static int bh1770_chip_on(struct bh1770_chip *chip) +{ + int ret = regulator_bulk_enable(ARRAY_SIZE(chip->regs), + chip->regs); + if (ret < 0) + return ret; + + usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2); + + /* Reset the chip */ + i2c_smbus_write_byte_data(chip->client, BH1770_ALS_CONTROL, + BH1770_SWRESET); + usleep_range(BH1770_RESET_TIME, BH1770_RESET_TIME * 2); + + /* + * ALS is started always since proximity needs als results + * for realibility estimation. + * Let's assume dark until the first ALS measurement is ready. + */ + chip->lux_data_raw = 0; + chip->prox_data = 0; + ret = i2c_smbus_write_byte_data(chip->client, + BH1770_ALS_CONTROL, BH1770_STANDALONE); + + /* Assume reset defaults */ + chip->lux_thres_hi_onchip = BH1770_LUX_RANGE; + chip->lux_thres_lo_onchip = 0; + + return ret; +} + +static void bh1770_chip_off(struct bh1770_chip *chip) +{ + i2c_smbus_write_byte_data(chip->client, + BH1770_INTERRUPT, BH1770_DISABLE); + i2c_smbus_write_byte_data(chip->client, + BH1770_ALS_CONTROL, BH1770_STANDBY); + i2c_smbus_write_byte_data(chip->client, + BH1770_PS_CONTROL, BH1770_STANDBY); + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); +} + +/* chip->mutex is kept when this is called */ +static int bh1770_prox_mode_control(struct bh1770_chip *chip) +{ + if (chip->prox_enable_count) { + chip->prox_force_update = true; /* Force immediate update */ + + bh1770_lux_rate(chip, chip->lux_rate_index); + bh1770_prox_set_threshold(chip); + bh1770_led_cfg(chip); + bh1770_prox_rate(chip, PROX_BELOW_THRESHOLD); + bh1770_prox_interrupt_control(chip, BH1770_ENABLE); + i2c_smbus_write_byte_data(chip->client, + BH1770_PS_CONTROL, BH1770_STANDALONE); + } else { + chip->prox_data = 0; + bh1770_lux_rate(chip, chip->lux_rate_index); + bh1770_prox_interrupt_control(chip, BH1770_DISABLE); + i2c_smbus_write_byte_data(chip->client, + BH1770_PS_CONTROL, BH1770_STANDBY); + } + return 0; +} + +/* chip->mutex is kept when this is called */ +static int bh1770_prox_read_result(struct bh1770_chip *chip) +{ + int ret; + bool above; + u8 mode; + + ret = i2c_smbus_read_byte_data(chip->client, BH1770_PS_DATA_LED1); + if (ret < 0) + goto out; + + if (ret > chip->prox_threshold_hw) + above = true; + else + above = false; + + /* + * when ALS levels goes above limit, proximity result may be + * false proximity. Thus ignore the result. With real proximity + * there is a shadow causing low als levels. + */ + if (chip->lux_data_raw > PROX_IGNORE_LUX_LIMIT) + ret = 0; + + chip->prox_data = bh1770_psraw_to_adjusted(chip, ret); + + /* Strong proximity level or force mode requires immediate response */ + if (chip->prox_data >= chip->prox_abs_thres || + chip->prox_force_update) + chip->prox_persistence_counter = chip->prox_persistence; + + chip->prox_force_update = false; + + /* Persistence filttering to reduce false proximity events */ + if (likely(above)) { + if (chip->prox_persistence_counter < chip->prox_persistence) { + chip->prox_persistence_counter++; + ret = -ENODATA; + } else { + mode = PROX_ABOVE_THRESHOLD; + ret = 0; + } + } else { + chip->prox_persistence_counter = 0; + mode = PROX_BELOW_THRESHOLD; + chip->prox_data = 0; + ret = 0; + } + + /* Set proximity detection rate based on above or below value */ + if (ret == 0) { + bh1770_prox_rate(chip, mode); + sysfs_notify(&chip->client->dev.kobj, NULL, "prox0_raw"); + } +out: + return ret; +} + +static int bh1770_detect(struct bh1770_chip *chip) +{ + struct i2c_client *client = chip->client; + s32 ret; + u8 manu, part; + + ret = i2c_smbus_read_byte_data(client, BH1770_MANUFACT_ID); + if (ret < 0) + goto error; + manu = (u8)ret; + + ret = i2c_smbus_read_byte_data(client, BH1770_PART_ID); + if (ret < 0) + goto error; + part = (u8)ret; + + chip->revision = (part & BH1770_REV_MASK) >> BH1770_REV_SHIFT; + chip->prox_coef = BH1770_COEF_SCALER; + chip->prox_const = 0; + chip->lux_cf = BH1770_NEUTRAL_CF; + + if ((manu == BH1770_MANUFACT_ROHM) && + ((part & BH1770_PART_MASK) == BH1770_PART)) { + snprintf(chip->chipname, sizeof(chip->chipname), "BH1770GLC"); + return 0; + } + + if ((manu == BH1770_MANUFACT_OSRAM) && + ((part & BH1770_PART_MASK) == BH1770_PART)) { + snprintf(chip->chipname, sizeof(chip->chipname), "SFH7770"); + /* Values selected by comparing different versions */ + chip->prox_coef = 819; /* 0.8 * BH1770_COEF_SCALER */ + chip->prox_const = 40; + return 0; + } + + ret = -ENODEV; +error: + dev_dbg(&client->dev, "BH1770 or SFH7770 not found\n"); + + return ret; +} + +/* + * This work is re-scheduled at every proximity interrupt. + * If this work is running, it means that there hasn't been any + * proximity interrupt in time. Situation is handled as no-proximity. + * It would be nice to have low-threshold interrupt or interrupt + * when measurement and hi-threshold are both 0. But neither of those exists. + * This is a workaroud for missing HW feature. + */ + +static void bh1770_prox_work(struct work_struct *work) +{ + struct bh1770_chip *chip = + container_of(work, struct bh1770_chip, prox_work.work); + + mutex_lock(&chip->mutex); + bh1770_prox_read_result(chip); + mutex_unlock(&chip->mutex); +} + +/* This is threaded irq handler */ +static irqreturn_t bh1770_irq(int irq, void *data) +{ + struct bh1770_chip *chip = data; + int status; + int rate = 0; + + mutex_lock(&chip->mutex); + status = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_PS_STATUS); + + /* Acknowledge interrupt by reading this register */ + i2c_smbus_read_byte_data(chip->client, BH1770_INTERRUPT); + + /* + * Check if there is fresh data available for als. + * If this is the very first data, update thresholds after that. + */ + if (status & BH1770_INT_ALS_DATA) { + bh1770_lux_get_result(chip); + if (unlikely(chip->lux_wait_result)) { + chip->lux_wait_result = false; + wake_up(&chip->wait); + bh1770_lux_update_thresholds(chip, + chip->lux_threshold_hi, + chip->lux_threshold_lo); + } + } + + /* Disable interrupt logic to guarantee acknowledgement */ + i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT, + (0 << 1) | (0 << 0)); + + if ((status & BH1770_INT_ALS_INT)) + sysfs_notify(&chip->client->dev.kobj, NULL, "lux0_input"); + + if (chip->int_mode_prox && (status & BH1770_INT_LEDS_INT)) { + rate = prox_rates_ms[chip->prox_rate_threshold]; + bh1770_prox_read_result(chip); + } + + /* Re-enable interrupt logic */ + i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT, + (chip->int_mode_lux << 1) | + (chip->int_mode_prox << 0)); + mutex_unlock(&chip->mutex); + + /* + * Can't cancel work while keeping mutex since the work uses the + * same mutex. + */ + if (rate) { + /* + * Simulate missing no-proximity interrupt 50ms after the + * next expected interrupt time. + */ + cancel_delayed_work_sync(&chip->prox_work); + schedule_delayed_work(&chip->prox_work, + msecs_to_jiffies(rate + 50)); + } + return IRQ_HANDLED; +} + +static ssize_t bh1770_power_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + ssize_t ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + if (value) { + pm_runtime_get_sync(dev); + + ret = bh1770_lux_rate(chip, chip->lux_rate_index); + if (ret < 0) { + pm_runtime_put(dev); + goto leave; + } + + ret = bh1770_lux_interrupt_control(chip, BH1770_ENABLE); + if (ret < 0) { + pm_runtime_put(dev); + goto leave; + } + + /* This causes interrupt after the next measurement cycle */ + bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES, + BH1770_LUX_DEF_THRES); + /* Inform that we are waiting for a result from ALS */ + chip->lux_wait_result = true; + bh1770_prox_mode_control(chip); + } else if (!pm_runtime_suspended(dev)) { + pm_runtime_put(dev); + } + ret = count; +leave: + mutex_unlock(&chip->mutex); + return ret; +} + +static ssize_t bh1770_power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", !pm_runtime_suspended(dev)); +} + +static ssize_t bh1770_lux_result_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t ret; + long timeout; + + if (pm_runtime_suspended(dev)) + return -EIO; /* Chip is not enabled at all */ + + timeout = wait_event_interruptible_timeout(chip->wait, + !chip->lux_wait_result, + msecs_to_jiffies(BH1770_TIMEOUT)); + if (!timeout) + return -EIO; + + mutex_lock(&chip->mutex); + ret = sprintf(buf, "%d\n", bh1770_lux_read_result(chip)); + mutex_unlock(&chip->mutex); + + return ret; +} + +static ssize_t bh1770_lux_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", BH1770_LUX_RANGE); +} + +static ssize_t bh1770_prox_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + /* Assume no proximity. Sensor will tell real state soon */ + if (!chip->prox_enable_count) + chip->prox_data = 0; + + if (value) + chip->prox_enable_count++; + else if (chip->prox_enable_count > 0) + chip->prox_enable_count--; + else + goto leave; + + /* Run control only when chip is powered on */ + if (!pm_runtime_suspended(dev)) + bh1770_prox_mode_control(chip); +leave: + mutex_unlock(&chip->mutex); + return count; +} + +static ssize_t bh1770_prox_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t len; + + mutex_lock(&chip->mutex); + len = sprintf(buf, "%d\n", chip->prox_enable_count); + mutex_unlock(&chip->mutex); + return len; +} + +static ssize_t bh1770_prox_result_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t ret; + + mutex_lock(&chip->mutex); + if (chip->prox_enable_count && !pm_runtime_suspended(dev)) + ret = sprintf(buf, "%d\n", chip->prox_data); + else + ret = -EIO; + mutex_unlock(&chip->mutex); + return ret; +} + +static ssize_t bh1770_prox_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", BH1770_PROX_RANGE); +} + +static ssize_t bh1770_get_prox_rate_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i; + int pos = 0; + for (i = 0; i < ARRAY_SIZE(prox_rates_hz); i++) + pos += sprintf(buf + pos, "%d ", prox_rates_hz[i]); + sprintf(buf + pos - 1, "\n"); + return pos; +} + +static ssize_t bh1770_get_prox_rate_above(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate_threshold]); +} + +static ssize_t bh1770_get_prox_rate_below(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate]); +} + +static int bh1770_prox_rate_validate(int rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(prox_rates_hz) - 1; i++) + if (rate >= prox_rates_hz[i]) + break; + return i; +} + +static ssize_t bh1770_set_prox_rate_above(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + chip->prox_rate_threshold = bh1770_prox_rate_validate(value); + mutex_unlock(&chip->mutex); + return count; +} + +static ssize_t bh1770_set_prox_rate_below(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + chip->prox_rate = bh1770_prox_rate_validate(value); + mutex_unlock(&chip->mutex); + return count; +} + +static ssize_t bh1770_get_prox_thres(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->prox_threshold); +} + +static ssize_t bh1770_set_prox_thres(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value > BH1770_PROX_RANGE) + return -EINVAL; + + mutex_lock(&chip->mutex); + chip->prox_threshold = value; + ret = bh1770_prox_set_threshold(chip); + mutex_unlock(&chip->mutex); + if (ret < 0) + return ret; + return count; +} + +static ssize_t bh1770_prox_persistence_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", chip->prox_persistence); +} + +static ssize_t bh1770_prox_persistence_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value > BH1770_PROX_MAX_PERSISTENCE) + return -EINVAL; + + chip->prox_persistence = value; + + return len; +} + +static ssize_t bh1770_prox_abs_thres_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", chip->prox_abs_thres); +} + +static ssize_t bh1770_prox_abs_thres_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value > BH1770_PROX_RANGE) + return -EINVAL; + + chip->prox_abs_thres = value; + + return len; +} + +static ssize_t bh1770_chip_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%s rev %d\n", chip->chipname, chip->revision); +} + +static ssize_t bh1770_lux_calib_default_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", BH1770_CALIB_SCALER); +} + +static ssize_t bh1770_lux_calib_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t len; + + mutex_lock(&chip->mutex); + len = sprintf(buf, "%u\n", chip->lux_calib); + mutex_unlock(&chip->mutex); + return len; +} + +static ssize_t bh1770_lux_calib_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + u32 old_calib; + u32 new_corr; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + old_calib = chip->lux_calib; + chip->lux_calib = value; + new_corr = bh1770_get_corr_value(chip); + if (new_corr == 0) { + chip->lux_calib = old_calib; + mutex_unlock(&chip->mutex); + return -EINVAL; + } + chip->lux_corr = new_corr; + /* Refresh thresholds on HW after changing correction value */ + bh1770_lux_update_thresholds(chip, chip->lux_threshold_hi, + chip->lux_threshold_lo); + + mutex_unlock(&chip->mutex); + + return len; +} + +static ssize_t bh1770_get_lux_rate_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i; + int pos = 0; + for (i = 0; i < ARRAY_SIZE(lux_rates_hz); i++) + pos += sprintf(buf + pos, "%d ", lux_rates_hz[i]); + sprintf(buf + pos - 1, "\n"); + return pos; +} + +static ssize_t bh1770_get_lux_rate(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", lux_rates_hz[chip->lux_rate_index]); +} + +static ssize_t bh1770_set_lux_rate(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long rate_hz; + int ret, i; + + ret = kstrtoul(buf, 0, &rate_hz); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(lux_rates_hz) - 1; i++) + if (rate_hz >= lux_rates_hz[i]) + break; + + mutex_lock(&chip->mutex); + chip->lux_rate_index = i; + ret = bh1770_lux_rate(chip, i); + mutex_unlock(&chip->mutex); + + if (ret < 0) + return ret; + + return count; +} + +static ssize_t bh1770_get_lux_thresh_above(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_threshold_hi); +} + +static ssize_t bh1770_get_lux_thresh_below(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_threshold_lo); +} + +static ssize_t bh1770_set_lux_thresh(struct bh1770_chip *chip, u16 *target, + const char *buf) +{ + unsigned long thresh; + int ret; + + ret = kstrtoul(buf, 0, &thresh); + if (ret) + return ret; + + if (thresh > BH1770_LUX_RANGE) + return -EINVAL; + + mutex_lock(&chip->mutex); + *target = thresh; + /* + * Don't update values in HW if we are still waiting for + * first interrupt to come after device handle open call. + */ + if (!chip->lux_wait_result) + ret = bh1770_lux_update_thresholds(chip, + chip->lux_threshold_hi, + chip->lux_threshold_lo); + mutex_unlock(&chip->mutex); + return ret; + +} + +static ssize_t bh1770_set_lux_thresh_above(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_hi, buf); + if (ret < 0) + return ret; + return len; +} + +static ssize_t bh1770_set_lux_thresh_below(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_lo, buf); + if (ret < 0) + return ret; + return len; +} + +static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, bh1770_prox_enable_show, + bh1770_prox_enable_store); +static DEVICE_ATTR(prox0_thresh_above1_value, S_IRUGO | S_IWUSR, + bh1770_prox_abs_thres_show, + bh1770_prox_abs_thres_store); +static DEVICE_ATTR(prox0_thresh_above0_value, S_IRUGO | S_IWUSR, + bh1770_get_prox_thres, + bh1770_set_prox_thres); +static DEVICE_ATTR(prox0_raw, S_IRUGO, bh1770_prox_result_show, NULL); +static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, bh1770_prox_range_show, NULL); +static DEVICE_ATTR(prox0_thresh_above_count, S_IRUGO | S_IWUSR, + bh1770_prox_persistence_show, + bh1770_prox_persistence_store); +static DEVICE_ATTR(prox0_rate_above, S_IRUGO | S_IWUSR, + bh1770_get_prox_rate_above, + bh1770_set_prox_rate_above); +static DEVICE_ATTR(prox0_rate_below, S_IRUGO | S_IWUSR, + bh1770_get_prox_rate_below, + bh1770_set_prox_rate_below); +static DEVICE_ATTR(prox0_rate_avail, S_IRUGO, bh1770_get_prox_rate_avail, NULL); + +static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, bh1770_lux_calib_show, + bh1770_lux_calib_store); +static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO, + bh1770_lux_calib_default_show, + NULL); +static DEVICE_ATTR(lux0_input, S_IRUGO, bh1770_lux_result_show, NULL); +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, bh1770_lux_range_show, NULL); +static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, bh1770_get_lux_rate, + bh1770_set_lux_rate); +static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, bh1770_get_lux_rate_avail, NULL); +static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR, + bh1770_get_lux_thresh_above, + bh1770_set_lux_thresh_above); +static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR, + bh1770_get_lux_thresh_below, + bh1770_set_lux_thresh_below); +static DEVICE_ATTR(chip_id, S_IRUGO, bh1770_chip_id_show, NULL); +static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, bh1770_power_state_show, + bh1770_power_state_store); + + +static struct attribute *sysfs_attrs[] = { + &dev_attr_lux0_calibscale.attr, + &dev_attr_lux0_calibscale_default.attr, + &dev_attr_lux0_input.attr, + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_rate.attr, + &dev_attr_lux0_rate_avail.attr, + &dev_attr_lux0_thresh_above_value.attr, + &dev_attr_lux0_thresh_below_value.attr, + &dev_attr_prox0_raw.attr, + &dev_attr_prox0_sensor_range.attr, + &dev_attr_prox0_raw_en.attr, + &dev_attr_prox0_thresh_above_count.attr, + &dev_attr_prox0_rate_above.attr, + &dev_attr_prox0_rate_below.attr, + &dev_attr_prox0_rate_avail.attr, + &dev_attr_prox0_thresh_above0_value.attr, + &dev_attr_prox0_thresh_above1_value.attr, + &dev_attr_chip_id.attr, + &dev_attr_power_state.attr, + NULL +}; + +static struct attribute_group bh1770_attribute_group = { + .attrs = sysfs_attrs +}; + +static int bh1770_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct bh1770_chip *chip; + int err; + + chip = devm_kzalloc(&client->dev, sizeof *chip, GFP_KERNEL); + if (!chip) + return -ENOMEM; + + i2c_set_clientdata(client, chip); + chip->client = client; + + mutex_init(&chip->mutex); + init_waitqueue_head(&chip->wait); + INIT_DELAYED_WORK(&chip->prox_work, bh1770_prox_work); + + if (client->dev.platform_data == NULL) { + dev_err(&client->dev, "platform data is mandatory\n"); + return -EINVAL; + } + + chip->pdata = client->dev.platform_data; + chip->lux_calib = BH1770_LUX_NEUTRAL_CALIB_VALUE; + chip->lux_rate_index = BH1770_LUX_DEFAULT_RATE; + chip->lux_threshold_lo = BH1770_LUX_DEF_THRES; + chip->lux_threshold_hi = BH1770_LUX_DEF_THRES; + + if (chip->pdata->glass_attenuation == 0) + chip->lux_ga = BH1770_NEUTRAL_GA; + else + chip->lux_ga = chip->pdata->glass_attenuation; + + chip->prox_threshold = BH1770_PROX_DEF_THRES; + chip->prox_led = chip->pdata->led_def_curr; + chip->prox_abs_thres = BH1770_PROX_DEF_ABS_THRES; + chip->prox_persistence = BH1770_DEFAULT_PERSISTENCE; + chip->prox_rate_threshold = BH1770_PROX_DEF_RATE_THRESH; + chip->prox_rate = BH1770_PROX_DEFAULT_RATE; + chip->prox_data = 0; + + chip->regs[0].supply = reg_vcc; + chip->regs[1].supply = reg_vleds; + + err = devm_regulator_bulk_get(&client->dev, + ARRAY_SIZE(chip->regs), chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot get regulators\n"); + return err; + } + + err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), + chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot enable regulators\n"); + return err; + } + + usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2); + err = bh1770_detect(chip); + if (err < 0) + goto fail0; + + /* Start chip */ + bh1770_chip_on(chip); + pm_runtime_set_active(&client->dev); + pm_runtime_enable(&client->dev); + + chip->lux_corr = bh1770_get_corr_value(chip); + if (chip->lux_corr == 0) { + dev_err(&client->dev, "Improper correction values\n"); + err = -EINVAL; + goto fail0; + } + + if (chip->pdata->setup_resources) { + err = chip->pdata->setup_resources(); + if (err) { + err = -EINVAL; + goto fail0; + } + } + + err = sysfs_create_group(&chip->client->dev.kobj, + &bh1770_attribute_group); + if (err < 0) { + dev_err(&chip->client->dev, "Sysfs registration failed\n"); + goto fail1; + } + + /* + * Chip needs level triggered interrupt to work. However, + * level triggering doesn't work always correctly with power + * management. Select both + */ + err = request_threaded_irq(client->irq, NULL, + bh1770_irq, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT | + IRQF_TRIGGER_LOW, + "bh1770", chip); + if (err) { + dev_err(&client->dev, "could not get IRQ %d\n", + client->irq); + goto fail2; + } + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); + return err; +fail2: + sysfs_remove_group(&chip->client->dev.kobj, + &bh1770_attribute_group); +fail1: + if (chip->pdata->release_resources) + chip->pdata->release_resources(); +fail0: + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); + return err; +} + +static int bh1770_remove(struct i2c_client *client) +{ + struct bh1770_chip *chip = i2c_get_clientdata(client); + + free_irq(client->irq, chip); + + sysfs_remove_group(&chip->client->dev.kobj, + &bh1770_attribute_group); + + if (chip->pdata->release_resources) + chip->pdata->release_resources(); + + cancel_delayed_work_sync(&chip->prox_work); + + if (!pm_runtime_suspended(&client->dev)) + bh1770_chip_off(chip); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int bh1770_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + + bh1770_chip_off(chip); + + return 0; +} + +static int bh1770_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + int ret = 0; + + bh1770_chip_on(chip); + + if (!pm_runtime_suspended(dev)) { + /* + * If we were enabled at suspend time, it is expected + * everything works nice and smoothly + */ + ret = bh1770_lux_rate(chip, chip->lux_rate_index); + ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE); + + /* This causes interrupt after the next measurement cycle */ + bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES, + BH1770_LUX_DEF_THRES); + /* Inform that we are waiting for a result from ALS */ + chip->lux_wait_result = true; + bh1770_prox_mode_control(chip); + } + return ret; +} +#endif + +#ifdef CONFIG_PM +static int bh1770_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + + bh1770_chip_off(chip); + + return 0; +} + +static int bh1770_runtime_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + + bh1770_chip_on(chip); + + return 0; +} +#endif + +static const struct i2c_device_id bh1770_id[] = { + {"bh1770glc", 0 }, + {"sfh7770", 0 }, + {} +}; + +MODULE_DEVICE_TABLE(i2c, bh1770_id); + +static const struct dev_pm_ops bh1770_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(bh1770_suspend, bh1770_resume) + SET_RUNTIME_PM_OPS(bh1770_runtime_suspend, bh1770_runtime_resume, NULL) +}; + +static struct i2c_driver bh1770_driver = { + .driver = { + .name = "bh1770glc", + .owner = THIS_MODULE, + .pm = &bh1770_pm_ops, + }, + .probe = bh1770_probe, + .remove = bh1770_remove, + .id_table = bh1770_id, +}; + +module_i2c_driver(bh1770_driver); + +MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor"); +MODULE_AUTHOR("Samu Onkalo, Nokia Corporation"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/bh1780gli.c b/kernel/drivers/misc/bh1780gli.c new file mode 100644 index 000000000..7f90ce5a5 --- /dev/null +++ b/kernel/drivers/misc/bh1780gli.c @@ -0,0 +1,259 @@ +/* + * bh1780gli.c + * ROHM Ambient Light Sensor Driver + * + * Copyright (C) 2010 Texas Instruments + * Author: Hemanth V <hemanthv@ti.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/i2c.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/platform_device.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/of.h> + +#define BH1780_REG_CONTROL 0x80 +#define BH1780_REG_PARTID 0x8A +#define BH1780_REG_MANFID 0x8B +#define BH1780_REG_DLOW 0x8C +#define BH1780_REG_DHIGH 0x8D + +#define BH1780_REVMASK (0xf) +#define BH1780_POWMASK (0x3) +#define BH1780_POFF (0x0) +#define BH1780_PON (0x3) + +/* power on settling time in ms */ +#define BH1780_PON_DELAY 2 + +struct bh1780_data { + struct i2c_client *client; + int power_state; + /* lock for sysfs operations */ + struct mutex lock; +}; + +static int bh1780_write(struct bh1780_data *ddata, u8 reg, u8 val, char *msg) +{ + int ret = i2c_smbus_write_byte_data(ddata->client, reg, val); + if (ret < 0) + dev_err(&ddata->client->dev, + "i2c_smbus_write_byte_data failed error %d Register (%s)\n", + ret, msg); + return ret; +} + +static int bh1780_read(struct bh1780_data *ddata, u8 reg, char *msg) +{ + int ret = i2c_smbus_read_byte_data(ddata->client, reg); + if (ret < 0) + dev_err(&ddata->client->dev, + "i2c_smbus_read_byte_data failed error %d Register (%s)\n", + ret, msg); + return ret; +} + +static ssize_t bh1780_show_lux(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct bh1780_data *ddata = platform_get_drvdata(pdev); + int lsb, msb; + + lsb = bh1780_read(ddata, BH1780_REG_DLOW, "DLOW"); + if (lsb < 0) + return lsb; + + msb = bh1780_read(ddata, BH1780_REG_DHIGH, "DHIGH"); + if (msb < 0) + return msb; + + return sprintf(buf, "%d\n", (msb << 8) | lsb); +} + +static ssize_t bh1780_show_power_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct bh1780_data *ddata = platform_get_drvdata(pdev); + int state; + + state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL"); + if (state < 0) + return state; + + return sprintf(buf, "%d\n", state & BH1780_POWMASK); +} + +static ssize_t bh1780_store_power_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = to_platform_device(dev); + struct bh1780_data *ddata = platform_get_drvdata(pdev); + unsigned long val; + int error; + + error = kstrtoul(buf, 0, &val); + if (error) + return error; + + if (val < BH1780_POFF || val > BH1780_PON) + return -EINVAL; + + mutex_lock(&ddata->lock); + + error = bh1780_write(ddata, BH1780_REG_CONTROL, val, "CONTROL"); + if (error < 0) { + mutex_unlock(&ddata->lock); + return error; + } + + msleep(BH1780_PON_DELAY); + ddata->power_state = val; + mutex_unlock(&ddata->lock); + + return count; +} + +static DEVICE_ATTR(lux, S_IRUGO, bh1780_show_lux, NULL); + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + bh1780_show_power_state, bh1780_store_power_state); + +static struct attribute *bh1780_attributes[] = { + &dev_attr_power_state.attr, + &dev_attr_lux.attr, + NULL +}; + +static const struct attribute_group bh1780_attr_group = { + .attrs = bh1780_attributes, +}; + +static int bh1780_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + struct bh1780_data *ddata; + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + ddata = devm_kzalloc(&client->dev, sizeof(struct bh1780_data), + GFP_KERNEL); + if (ddata == NULL) + return -ENOMEM; + + ddata->client = client; + i2c_set_clientdata(client, ddata); + + ret = bh1780_read(ddata, BH1780_REG_PARTID, "PART ID"); + if (ret < 0) + return ret; + + dev_info(&client->dev, "Ambient Light Sensor, Rev : %d\n", + (ret & BH1780_REVMASK)); + + mutex_init(&ddata->lock); + + return sysfs_create_group(&client->dev.kobj, &bh1780_attr_group); +} + +static int bh1780_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &bh1780_attr_group); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int bh1780_suspend(struct device *dev) +{ + struct bh1780_data *ddata; + int state, ret; + struct i2c_client *client = to_i2c_client(dev); + + ddata = i2c_get_clientdata(client); + state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL"); + if (state < 0) + return state; + + ddata->power_state = state & BH1780_POWMASK; + + ret = bh1780_write(ddata, BH1780_REG_CONTROL, BH1780_POFF, + "CONTROL"); + + if (ret < 0) + return ret; + + return 0; +} + +static int bh1780_resume(struct device *dev) +{ + struct bh1780_data *ddata; + int state, ret; + struct i2c_client *client = to_i2c_client(dev); + + ddata = i2c_get_clientdata(client); + state = ddata->power_state; + ret = bh1780_write(ddata, BH1780_REG_CONTROL, state, + "CONTROL"); + + if (ret < 0) + return ret; + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume); + +static const struct i2c_device_id bh1780_id[] = { + { "bh1780", 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(i2c, bh1780_id); + +#ifdef CONFIG_OF +static const struct of_device_id of_bh1780_match[] = { + { .compatible = "rohm,bh1780gli", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, of_bh1780_match); +#endif + +static struct i2c_driver bh1780_driver = { + .probe = bh1780_probe, + .remove = bh1780_remove, + .id_table = bh1780_id, + .driver = { + .name = "bh1780", + .pm = &bh1780_pm, + .of_match_table = of_match_ptr(of_bh1780_match), + }, +}; + +module_i2c_driver(bh1780_driver); + +MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hemanth V <hemanthv@ti.com>"); diff --git a/kernel/drivers/misc/bmp085-i2c.c b/kernel/drivers/misc/bmp085-i2c.c new file mode 100644 index 000000000..a7c16295b --- /dev/null +++ b/kernel/drivers/misc/bmp085-i2c.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 Bosch Sensortec GmbH + * Copyright (c) 2012 Unixphere AB + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/err.h> +#include "bmp085.h" + +#define BMP085_I2C_ADDRESS 0x77 + +static const unsigned short normal_i2c[] = { BMP085_I2C_ADDRESS, + I2C_CLIENT_END }; + +static int bmp085_i2c_detect(struct i2c_client *client, + struct i2c_board_info *info) +{ + if (client->addr != BMP085_I2C_ADDRESS) + return -ENODEV; + + return bmp085_detect(&client->dev); +} + +static int bmp085_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int err; + struct regmap *regmap = devm_regmap_init_i2c(client, + &bmp085_regmap_config); + + if (IS_ERR(regmap)) { + err = PTR_ERR(regmap); + dev_err(&client->dev, "Failed to init regmap: %d\n", err); + return err; + } + + return bmp085_probe(&client->dev, regmap, client->irq); +} + +static int bmp085_i2c_remove(struct i2c_client *client) +{ + return bmp085_remove(&client->dev); +} + +static const struct i2c_device_id bmp085_id[] = { + { BMP085_NAME, 0 }, + { "bmp180", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, bmp085_id); + +static struct i2c_driver bmp085_i2c_driver = { + .driver = { + .owner = THIS_MODULE, + .name = BMP085_NAME, + }, + .id_table = bmp085_id, + .probe = bmp085_i2c_probe, + .remove = bmp085_i2c_remove, + + .detect = bmp085_i2c_detect, + .address_list = normal_i2c +}; + +module_i2c_driver(bmp085_i2c_driver); + +MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>"); +MODULE_DESCRIPTION("BMP085 I2C bus driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/bmp085-spi.c b/kernel/drivers/misc/bmp085-spi.c new file mode 100644 index 000000000..864ecac32 --- /dev/null +++ b/kernel/drivers/misc/bmp085-spi.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2012 Bosch Sensortec GmbH + * Copyright (c) 2012 Unixphere AB + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/spi/spi.h> +#include <linux/err.h> +#include "bmp085.h" + +static int bmp085_spi_probe(struct spi_device *client) +{ + int err; + struct regmap *regmap; + + client->bits_per_word = 8; + err = spi_setup(client); + if (err < 0) { + dev_err(&client->dev, "spi_setup failed!\n"); + return err; + } + + regmap = devm_regmap_init_spi(client, &bmp085_regmap_config); + if (IS_ERR(regmap)) { + err = PTR_ERR(regmap); + dev_err(&client->dev, "Failed to init regmap: %d\n", err); + return err; + } + + return bmp085_probe(&client->dev, regmap, client->irq); +} + +static int bmp085_spi_remove(struct spi_device *client) +{ + return bmp085_remove(&client->dev); +} + +static const struct of_device_id bmp085_of_match[] = { + { .compatible = "bosch,bmp085", }, + { }, +}; +MODULE_DEVICE_TABLE(of, bmp085_of_match); + +static const struct spi_device_id bmp085_id[] = { + { "bmp180", 0 }, + { "bmp181", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, bmp085_id); + +static struct spi_driver bmp085_spi_driver = { + .driver = { + .owner = THIS_MODULE, + .name = BMP085_NAME, + .of_match_table = bmp085_of_match + }, + .id_table = bmp085_id, + .probe = bmp085_spi_probe, + .remove = bmp085_spi_remove +}; + +module_spi_driver(bmp085_spi_driver); + +MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>"); +MODULE_DESCRIPTION("BMP085 SPI bus driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/bmp085.c b/kernel/drivers/misc/bmp085.c new file mode 100644 index 000000000..9b313f781 --- /dev/null +++ b/kernel/drivers/misc/bmp085.c @@ -0,0 +1,506 @@ +/* Copyright (c) 2010 Christoph Mair <christoph.mair@gmail.com> + * Copyright (c) 2012 Bosch Sensortec GmbH + * Copyright (c) 2012 Unixphere AB + * + * This driver supports the bmp085 and bmp18x digital barometric pressure + * and temperature sensors from Bosch Sensortec. The datasheets + * are available from their website: + * http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP085-DS000-05.pdf + * http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP180-DS000-07.pdf + * + * A pressure measurement is issued by reading from pressure0_input. + * The return value ranges from 30000 to 110000 pascal with a resulution + * of 1 pascal (0.01 millibar) which enables measurements from 9000m above + * to 500m below sea level. + * + * The temperature can be read from temp0_input. Values range from + * -400 to 850 representing the ambient temperature in degree celsius + * multiplied by 10.The resolution is 0.1 celsius. + * + * Because ambient pressure is temperature dependent, a temperature + * measurement will be executed automatically even if the user is reading + * from pressure0_input. This happens if the last temperature measurement + * has been executed more then one second ago. + * + * To decrease RMS noise from pressure measurements, the bmp085 can + * autonomously calculate the average of up to eight samples. This is + * set up by writing to the oversampling sysfs file. Accepted values + * are 0, 1, 2 and 3. 2^x when x is the value written to this file + * specifies the number of samples used to calculate the ambient pressure. + * RMS noise is specified with six pascal (without averaging) and decreases + * down to 3 pascal when using an oversampling setting of 3. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/of.h> +#include "bmp085.h" +#include <linux/interrupt.h> +#include <linux/completion.h> +#include <linux/gpio.h> + +#define BMP085_CHIP_ID 0x55 +#define BMP085_CALIBRATION_DATA_START 0xAA +#define BMP085_CALIBRATION_DATA_LENGTH 11 /* 16 bit values */ +#define BMP085_CHIP_ID_REG 0xD0 +#define BMP085_CTRL_REG 0xF4 +#define BMP085_TEMP_MEASUREMENT 0x2E +#define BMP085_PRESSURE_MEASUREMENT 0x34 +#define BMP085_CONVERSION_REGISTER_MSB 0xF6 +#define BMP085_CONVERSION_REGISTER_LSB 0xF7 +#define BMP085_CONVERSION_REGISTER_XLSB 0xF8 +#define BMP085_TEMP_CONVERSION_TIME 5 + +struct bmp085_calibration_data { + s16 AC1, AC2, AC3; + u16 AC4, AC5, AC6; + s16 B1, B2; + s16 MB, MC, MD; +}; + +struct bmp085_data { + struct device *dev; + struct regmap *regmap; + struct mutex lock; + struct bmp085_calibration_data calibration; + u8 oversampling_setting; + u32 raw_temperature; + u32 raw_pressure; + u32 temp_measurement_period; + unsigned long last_temp_measurement; + u8 chip_id; + s32 b6; /* calculated temperature correction coefficient */ + int irq; + struct completion done; +}; + +static irqreturn_t bmp085_eoc_isr(int irq, void *devid) +{ + struct bmp085_data *data = devid; + + complete(&data->done); + + return IRQ_HANDLED; +} + +static s32 bmp085_read_calibration_data(struct bmp085_data *data) +{ + u16 tmp[BMP085_CALIBRATION_DATA_LENGTH]; + struct bmp085_calibration_data *cali = &(data->calibration); + s32 status = regmap_bulk_read(data->regmap, + BMP085_CALIBRATION_DATA_START, (u8 *)tmp, + (BMP085_CALIBRATION_DATA_LENGTH << 1)); + if (status < 0) + return status; + + cali->AC1 = be16_to_cpu(tmp[0]); + cali->AC2 = be16_to_cpu(tmp[1]); + cali->AC3 = be16_to_cpu(tmp[2]); + cali->AC4 = be16_to_cpu(tmp[3]); + cali->AC5 = be16_to_cpu(tmp[4]); + cali->AC6 = be16_to_cpu(tmp[5]); + cali->B1 = be16_to_cpu(tmp[6]); + cali->B2 = be16_to_cpu(tmp[7]); + cali->MB = be16_to_cpu(tmp[8]); + cali->MC = be16_to_cpu(tmp[9]); + cali->MD = be16_to_cpu(tmp[10]); + return 0; +} + +static s32 bmp085_update_raw_temperature(struct bmp085_data *data) +{ + u16 tmp; + s32 status; + + mutex_lock(&data->lock); + + init_completion(&data->done); + + status = regmap_write(data->regmap, BMP085_CTRL_REG, + BMP085_TEMP_MEASUREMENT); + if (status < 0) { + dev_err(data->dev, + "Error while requesting temperature measurement.\n"); + goto exit; + } + wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies( + BMP085_TEMP_CONVERSION_TIME)); + + status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB, + &tmp, sizeof(tmp)); + if (status < 0) { + dev_err(data->dev, + "Error while reading temperature measurement result\n"); + goto exit; + } + data->raw_temperature = be16_to_cpu(tmp); + data->last_temp_measurement = jiffies; + status = 0; /* everything ok, return 0 */ + +exit: + mutex_unlock(&data->lock); + return status; +} + +static s32 bmp085_update_raw_pressure(struct bmp085_data *data) +{ + u32 tmp = 0; + s32 status; + + mutex_lock(&data->lock); + + init_completion(&data->done); + + status = regmap_write(data->regmap, BMP085_CTRL_REG, + BMP085_PRESSURE_MEASUREMENT + + (data->oversampling_setting << 6)); + if (status < 0) { + dev_err(data->dev, + "Error while requesting pressure measurement.\n"); + goto exit; + } + + /* wait for the end of conversion */ + wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies( + 2+(3 << data->oversampling_setting))); + /* copy data into a u32 (4 bytes), but skip the first byte. */ + status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB, + ((u8 *)&tmp)+1, 3); + if (status < 0) { + dev_err(data->dev, + "Error while reading pressure measurement results\n"); + goto exit; + } + data->raw_pressure = be32_to_cpu((tmp)); + data->raw_pressure >>= (8-data->oversampling_setting); + status = 0; /* everything ok, return 0 */ + +exit: + mutex_unlock(&data->lock); + return status; +} + +/* + * This function starts the temperature measurement and returns the value + * in tenth of a degree celsius. + */ +static s32 bmp085_get_temperature(struct bmp085_data *data, int *temperature) +{ + struct bmp085_calibration_data *cali = &data->calibration; + long x1, x2; + int status; + + status = bmp085_update_raw_temperature(data); + if (status < 0) + goto exit; + + x1 = ((data->raw_temperature - cali->AC6) * cali->AC5) >> 15; + x2 = (cali->MC << 11) / (x1 + cali->MD); + data->b6 = x1 + x2 - 4000; + /* if NULL just update b6. Used for pressure only measurements */ + if (temperature != NULL) + *temperature = (x1+x2+8) >> 4; + +exit: + return status; +} + +/* + * This function starts the pressure measurement and returns the value + * in millibar. Since the pressure depends on the ambient temperature, + * a temperature measurement is executed according to the given temperature + * measurement period (default is 1 sec boundary). This period could vary + * and needs to be adjusted according to the sensor environment, i.e. if big + * temperature variations then the temperature needs to be read out often. + */ +static s32 bmp085_get_pressure(struct bmp085_data *data, int *pressure) +{ + struct bmp085_calibration_data *cali = &data->calibration; + s32 x1, x2, x3, b3; + u32 b4, b7; + s32 p; + int status; + + /* alt least every second force an update of the ambient temperature */ + if ((data->last_temp_measurement == 0) || + time_is_before_jiffies(data->last_temp_measurement + 1*HZ)) { + status = bmp085_get_temperature(data, NULL); + if (status < 0) + return status; + } + + status = bmp085_update_raw_pressure(data); + if (status < 0) + return status; + + x1 = (data->b6 * data->b6) >> 12; + x1 *= cali->B2; + x1 >>= 11; + + x2 = cali->AC2 * data->b6; + x2 >>= 11; + + x3 = x1 + x2; + + b3 = (((((s32)cali->AC1) * 4 + x3) << data->oversampling_setting) + 2); + b3 >>= 2; + + x1 = (cali->AC3 * data->b6) >> 13; + x2 = (cali->B1 * ((data->b6 * data->b6) >> 12)) >> 16; + x3 = (x1 + x2 + 2) >> 2; + b4 = (cali->AC4 * (u32)(x3 + 32768)) >> 15; + + b7 = ((u32)data->raw_pressure - b3) * + (50000 >> data->oversampling_setting); + p = ((b7 < 0x80000000) ? ((b7 << 1) / b4) : ((b7 / b4) * 2)); + + x1 = p >> 8; + x1 *= x1; + x1 = (x1 * 3038) >> 16; + x2 = (-7357 * p) >> 16; + p += (x1 + x2 + 3791) >> 4; + + *pressure = p; + + return 0; +} + +/* + * This function sets the chip-internal oversampling. Valid values are 0..3. + * The chip will use 2^oversampling samples for internal averaging. + * This influences the measurement time and the accuracy; larger values + * increase both. The datasheet gives an overview on how measurement time, + * accuracy and noise correlate. + */ +static void bmp085_set_oversampling(struct bmp085_data *data, + unsigned char oversampling) +{ + if (oversampling > 3) + oversampling = 3; + data->oversampling_setting = oversampling; +} + +/* + * Returns the currently selected oversampling. Range: 0..3 + */ +static unsigned char bmp085_get_oversampling(struct bmp085_data *data) +{ + return data->oversampling_setting; +} + +/* sysfs callbacks */ +static ssize_t set_oversampling(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bmp085_data *data = dev_get_drvdata(dev); + unsigned long oversampling; + int err = kstrtoul(buf, 10, &oversampling); + + if (err == 0) { + mutex_lock(&data->lock); + bmp085_set_oversampling(data, oversampling); + mutex_unlock(&data->lock); + return count; + } + + return err; +} + +static ssize_t show_oversampling(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bmp085_data *data = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", bmp085_get_oversampling(data)); +} +static DEVICE_ATTR(oversampling, S_IWUSR | S_IRUGO, + show_oversampling, set_oversampling); + + +static ssize_t show_temperature(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int temperature; + int status; + struct bmp085_data *data = dev_get_drvdata(dev); + + status = bmp085_get_temperature(data, &temperature); + if (status < 0) + return status; + else + return sprintf(buf, "%d\n", temperature); +} +static DEVICE_ATTR(temp0_input, S_IRUGO, show_temperature, NULL); + + +static ssize_t show_pressure(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int pressure; + int status; + struct bmp085_data *data = dev_get_drvdata(dev); + + status = bmp085_get_pressure(data, &pressure); + if (status < 0) + return status; + else + return sprintf(buf, "%d\n", pressure); +} +static DEVICE_ATTR(pressure0_input, S_IRUGO, show_pressure, NULL); + + +static struct attribute *bmp085_attributes[] = { + &dev_attr_temp0_input.attr, + &dev_attr_pressure0_input.attr, + &dev_attr_oversampling.attr, + NULL +}; + +static const struct attribute_group bmp085_attr_group = { + .attrs = bmp085_attributes, +}; + +int bmp085_detect(struct device *dev) +{ + struct bmp085_data *data = dev_get_drvdata(dev); + unsigned int id; + int ret; + + ret = regmap_read(data->regmap, BMP085_CHIP_ID_REG, &id); + if (ret < 0) + return ret; + + if (id != data->chip_id) + return -ENODEV; + + return 0; +} +EXPORT_SYMBOL_GPL(bmp085_detect); + +static void bmp085_get_of_properties(struct bmp085_data *data) +{ +#ifdef CONFIG_OF + struct device_node *np = data->dev->of_node; + u32 prop; + + if (!np) + return; + + if (!of_property_read_u32(np, "chip-id", &prop)) + data->chip_id = prop & 0xff; + + if (!of_property_read_u32(np, "temp-measurement-period", &prop)) + data->temp_measurement_period = (prop/100)*HZ; + + if (!of_property_read_u32(np, "default-oversampling", &prop)) + data->oversampling_setting = prop & 0xff; +#endif +} + +static int bmp085_init_client(struct bmp085_data *data) +{ + int status = bmp085_read_calibration_data(data); + + if (status < 0) + return status; + + /* default settings */ + data->chip_id = BMP085_CHIP_ID; + data->last_temp_measurement = 0; + data->temp_measurement_period = 1*HZ; + data->oversampling_setting = 3; + + bmp085_get_of_properties(data); + + mutex_init(&data->lock); + + return 0; +} + +struct regmap_config bmp085_regmap_config = { + .reg_bits = 8, + .val_bits = 8 +}; +EXPORT_SYMBOL_GPL(bmp085_regmap_config); + +int bmp085_probe(struct device *dev, struct regmap *regmap, int irq) +{ + struct bmp085_data *data; + int err = 0; + + data = kzalloc(sizeof(struct bmp085_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + dev_set_drvdata(dev, data); + data->dev = dev; + data->regmap = regmap; + data->irq = irq; + + if (data->irq > 0) { + err = devm_request_irq(dev, data->irq, bmp085_eoc_isr, + IRQF_TRIGGER_RISING, "bmp085", + data); + if (err < 0) + goto exit_free; + } + + /* Initialize the BMP085 chip */ + err = bmp085_init_client(data); + if (err < 0) + goto exit_free; + + err = bmp085_detect(dev); + if (err < 0) { + dev_err(dev, "%s: chip_id failed!\n", BMP085_NAME); + goto exit_free; + } + + /* Register sysfs hooks */ + err = sysfs_create_group(&dev->kobj, &bmp085_attr_group); + if (err) + goto exit_free; + + dev_info(dev, "Successfully initialized %s!\n", BMP085_NAME); + + return 0; + +exit_free: + kfree(data); +exit: + return err; +} +EXPORT_SYMBOL_GPL(bmp085_probe); + +int bmp085_remove(struct device *dev) +{ + struct bmp085_data *data = dev_get_drvdata(dev); + + sysfs_remove_group(&data->dev->kobj, &bmp085_attr_group); + kfree(data); + + return 0; +} +EXPORT_SYMBOL_GPL(bmp085_remove); + +MODULE_AUTHOR("Christoph Mair <christoph.mair@gmail.com>"); +MODULE_DESCRIPTION("BMP085 driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/bmp085.h b/kernel/drivers/misc/bmp085.h new file mode 100644 index 000000000..8b8e3b1f5 --- /dev/null +++ b/kernel/drivers/misc/bmp085.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 Bosch Sensortec GmbH + * Copyright (c) 2012 Unixphere AB + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _BMP085_H +#define _BMP085_H + +#include <linux/regmap.h> + +#define BMP085_NAME "bmp085" + +extern struct regmap_config bmp085_regmap_config; + +int bmp085_probe(struct device *dev, struct regmap *regmap, int irq); +int bmp085_remove(struct device *dev); +int bmp085_detect(struct device *dev); + +#endif diff --git a/kernel/drivers/misc/c2port/Kconfig b/kernel/drivers/misc/c2port/Kconfig new file mode 100644 index 000000000..0dd690e61 --- /dev/null +++ b/kernel/drivers/misc/c2port/Kconfig @@ -0,0 +1,34 @@ +# +# C2 port devices +# + +menuconfig C2PORT + tristate "Silicon Labs C2 port support" + default n + help + This option enables support for Silicon Labs C2 port used to + program Silicon micro controller chips (and other 8051 compatible). + + If your board have no such micro controllers you don't need this + interface at all. + + To compile this driver as a module, choose M here: the module will + be called c2port_core. Note that you also need a client module + usually called c2port-*. + + If you are not sure, say N here. + +if C2PORT + +config C2PORT_DURAMAR_2150 + tristate "C2 port support for Eurotech's Duramar 2150" + depends on X86 + default n + help + This option enables C2 support for the Eurotech's Duramar 2150 + on board micro controller. + + To compile this driver as a module, choose M here: the module will + be called c2port-duramar2150. + +endif # C2PORT diff --git a/kernel/drivers/misc/c2port/Makefile b/kernel/drivers/misc/c2port/Makefile new file mode 100644 index 000000000..3b2cf43d6 --- /dev/null +++ b/kernel/drivers/misc/c2port/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_C2PORT) += core.o + +obj-$(CONFIG_C2PORT_DURAMAR_2150) += c2port-duramar2150.o diff --git a/kernel/drivers/misc/c2port/c2port-duramar2150.c b/kernel/drivers/misc/c2port/c2port-duramar2150.c new file mode 100644 index 000000000..5484301d5 --- /dev/null +++ b/kernel/drivers/misc/c2port/c2port-duramar2150.c @@ -0,0 +1,159 @@ +/* + * Silicon Labs C2 port Linux support for Eurotech Duramar 2150 + * + * Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it> + * Copyright (c) 2008 Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/c2port.h> + +#define DATA_PORT 0x325 +#define DIR_PORT 0x326 +#define C2D (1 << 0) +#define C2CK (1 << 1) + +static DEFINE_MUTEX(update_lock); + +/* + * C2 port operations + */ + +static void duramar2150_c2port_access(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DIR_PORT); + + /* 0 = input, 1 = output */ + if (status) + outb(v | (C2D | C2CK), DIR_PORT); + else + /* When access is "off" is important that both lines are set + * as inputs or hi-impedance */ + outb(v & ~(C2D | C2CK), DIR_PORT); + + mutex_unlock(&update_lock); +} + +static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DIR_PORT); + + if (dir) + outb(v & ~C2D, DIR_PORT); + else + outb(v | C2D, DIR_PORT); + + mutex_unlock(&update_lock); +} + +static int duramar2150_c2port_c2d_get(struct c2port_device *dev) +{ + return inb(DATA_PORT) & C2D; +} + +static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DATA_PORT); + + if (status) + outb(v | C2D, DATA_PORT); + else + outb(v & ~C2D, DATA_PORT); + + mutex_unlock(&update_lock); +} + +static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DATA_PORT); + + if (status) + outb(v | C2CK, DATA_PORT); + else + outb(v & ~C2CK, DATA_PORT); + + mutex_unlock(&update_lock); +} + +static struct c2port_ops duramar2150_c2port_ops = { + .block_size = 512, /* bytes */ + .blocks_num = 30, /* total flash size: 15360 bytes */ + + .access = duramar2150_c2port_access, + .c2d_dir = duramar2150_c2port_c2d_dir, + .c2d_get = duramar2150_c2port_c2d_get, + .c2d_set = duramar2150_c2port_c2d_set, + .c2ck_set = duramar2150_c2port_c2ck_set, +}; + +static struct c2port_device *duramar2150_c2port_dev; + +/* + * Module stuff + */ + +static int __init duramar2150_c2port_init(void) +{ + struct resource *res; + int ret = 0; + + res = request_region(0x325, 2, "c2port"); + if (!res) + return -EBUSY; + + duramar2150_c2port_dev = c2port_device_register("uc", + &duramar2150_c2port_ops, NULL); + if (!duramar2150_c2port_dev) { + ret = -ENODEV; + goto free_region; + } + + return 0; + +free_region: + release_region(0x325, 2); + return ret; +} + +static void __exit duramar2150_c2port_exit(void) +{ + /* Setup the GPIOs as input by default (access = 0) */ + duramar2150_c2port_access(duramar2150_c2port_dev, 0); + + c2port_device_unregister(duramar2150_c2port_dev); + + release_region(0x325, 2); +} + +module_init(duramar2150_c2port_init); +module_exit(duramar2150_c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>"); +MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/c2port/core.c b/kernel/drivers/misc/c2port/core.c new file mode 100644 index 000000000..464419b36 --- /dev/null +++ b/kernel/drivers/misc/c2port/core.c @@ -0,0 +1,1009 @@ +/* + * Silicon Labs C2 port core Linux support + * + * Copyright (c) 2007 Rodolfo Giometti <giometti@linux.it> + * Copyright (c) 2007 Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/kmemcheck.h> +#include <linux/ctype.h> +#include <linux/delay.h> +#include <linux/idr.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <linux/c2port.h> + +#define DRIVER_NAME "c2port" +#define DRIVER_VERSION "0.51.0" + +static DEFINE_SPINLOCK(c2port_idr_lock); +static DEFINE_IDR(c2port_idr); + +/* + * Local variables + */ + +static struct class *c2port_class; + +/* + * C2 registers & commands defines + */ + +/* C2 registers */ +#define C2PORT_DEVICEID 0x00 +#define C2PORT_REVID 0x01 +#define C2PORT_FPCTL 0x02 +#define C2PORT_FPDAT 0xB4 + +/* C2 interface commands */ +#define C2PORT_GET_VERSION 0x01 +#define C2PORT_DEVICE_ERASE 0x03 +#define C2PORT_BLOCK_READ 0x06 +#define C2PORT_BLOCK_WRITE 0x07 +#define C2PORT_PAGE_ERASE 0x08 + +/* C2 status return codes */ +#define C2PORT_INVALID_COMMAND 0x00 +#define C2PORT_COMMAND_FAILED 0x02 +#define C2PORT_COMMAND_OK 0x0d + +/* + * C2 port low level signal managements + */ + +static void c2port_reset(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* To reset the device we have to keep clock line low for at least + * 20us. + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(25); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +static void c2port_strobe_ck(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* During hi-low-hi transition we disable local IRQs to avoid + * interructions since C2 port specification says that it must be + * shorter than 5us, otherwise the microcontroller may consider + * it as a reset signal! + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(1); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +/* + * C2 port basic functions + */ + +static void c2port_write_ar(struct c2port_device *dev, u8 addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (11b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, addr & 0x01); + c2port_strobe_ck(dev); + + addr >>= 1; + } + + /* STOP field */ + ops->c2d_dir(dev, 1); + c2port_strobe_ck(dev); +} + +static int c2port_read_ar(struct c2port_device *dev, u8 *addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (10b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + ops->c2d_dir(dev, 1); + *addr = 0; + for (i = 0; i < 8; i++) { + *addr >>= 1; /* shift in 8-bit ADDRESS field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *addr |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_write_dr(struct c2port_device *dev, u8 data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (01b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* DATA field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, data & 0x01); + c2port_strobe_ck(dev); + + data >>= 1; + } + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_read_dr(struct c2port_device *dev, u8 *data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (00b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* DATA field */ + *data = 0; + for (i = 0; i < 8; i++) { + *data >>= 1; /* shift in 8-bit DATA field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *data |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_poll_in_busy(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 20; + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (!(addr & 0x02)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +static int c2port_poll_out_ready(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 10000; /* erase flash needs long time... */ + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (addr & 0x01) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +/* + * sysfs methods + */ + +static ssize_t c2port_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", c2dev->name); +} +static DEVICE_ATTR(name, 0444, c2port_show_name, NULL); + +static ssize_t c2port_show_flash_blocks_num(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num); +} +static DEVICE_ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL); + +static ssize_t c2port_show_flash_block_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->block_size); +} +static DEVICE_ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL); + +static ssize_t c2port_show_flash_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size); +} +static DEVICE_ATTR(flash_size, 0444, c2port_show_flash_size, NULL); + +static ssize_t access_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->access); +} + +static ssize_t access_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + int status, ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + + c2dev->access = !!status; + + /* If access is "on" clock should be HIGH _before_ setting the line + * as output and data line should be set as INPUT anyway */ + if (c2dev->access) + ops->c2ck_set(c2dev, 1); + ops->access(c2dev, c2dev->access); + if (c2dev->access) + ops->c2d_dir(c2dev, 1); + + mutex_unlock(&c2dev->mutex); + + return count; +} +static DEVICE_ATTR_RW(access); + +static ssize_t c2port_store_reset(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + + c2port_reset(c2dev); + c2dev->flash_access = 0; + + mutex_unlock(&c2dev->mutex); + + return count; +} +static DEVICE_ATTR(reset, 0200, NULL, c2port_store_reset); + +static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select DEVICEID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_DEVICEID); + + /* Read and return the device ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_dev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_dev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(dev, "cannot read from %s\n", c2dev->name); + + return ret; +} +static DEVICE_ATTR(dev_id, 0444, c2port_show_dev_id, NULL); + +static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select REVID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_REVID); + + /* Read and return the revision ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_rev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_rev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name); + + return ret; +} +static DEVICE_ATTR(rev_id, 0444, c2port_show_rev_id, NULL); + +static ssize_t c2port_show_flash_access(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->flash_access); +} + +static ssize_t __c2port_store_flash_access(struct c2port_device *dev, + int status) +{ + int ret; + + /* Check the device access status */ + if (!dev->access) + return -EBUSY; + + dev->flash_access = !!status; + + /* If flash_access is off we have nothing to do... */ + if (dev->flash_access == 0) + return 0; + + /* Target the C2 flash programming control register for C2 data + * register access */ + c2port_write_ar(dev, C2PORT_FPCTL); + + /* Write the first keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x02); + if (ret < 0) + return ret; + + /* Write the second keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x01); + if (ret < 0) + return ret; + + /* Delay for at least 20ms to ensure the target is ready for + * C2 flash programming */ + mdelay(25); + + return 0; +} + +static ssize_t c2port_store_flash_access(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int status; + ssize_t ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + ret = __c2port_store_flash_access(c2dev, status); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot enable %s flash programming\n", + c2dev->name); + return ret; + } + + return count; +} +static DEVICE_ATTR(flash_access, 0644, c2port_show_flash_access, + c2port_store_flash_access); + +static ssize_t __c2port_write_flash_erase(struct c2port_device *dev) +{ + u8 status; + int ret; + + /* Target the C2 flash programming data register for C2 data register + * access. + */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send device erase command */ + c2port_write_dr(dev, C2PORT_DEVICE_ERASE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send a three-byte arming sequence to enable the device erase. + * If the sequence is not received correctly, the command will be + * ignored. + * Sequence is: 0xde, 0xad, 0xa5. + */ + c2port_write_dr(dev, 0xde); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xad); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xa5); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return 0; +} + +static ssize_t c2port_store_flash_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_erase(c2dev); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name); + return ret; + } + + return count; +} +static DEVICE_ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase); + +static ssize_t __c2port_read_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nread = 128; + int i, ret; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return 0; + + if (ops->block_size * ops->blocks_num - offset < nread) + nread = ops->block_size * ops->blocks_num - offset; + if (count < nread) + nread = count; + if (nread == 0) + return nread; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block read command */ + c2port_write_dr(dev, C2PORT_BLOCK_READ); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nread); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before reading FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Read flash block */ + for (i = 0; i < nread; i++) { + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + ret = c2port_read_dr(dev, buffer+i); + if (ret < 0) + return ret; + } + + return nread; +} + +static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = + dev_get_drvdata(container_of(kobj, + struct device, kobj)); + ssize_t ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_read_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name); + + return ret; +} + +static ssize_t __c2port_write_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nwrite = 128; + int i, ret; + + if (nwrite > count) + nwrite = count; + if (ops->block_size * ops->blocks_num - offset < nwrite) + nwrite = ops->block_size * ops->blocks_num - offset; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return -EINVAL; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block write command */ + c2port_write_dr(dev, C2PORT_BLOCK_WRITE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nwrite); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before writing FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Write flash block */ + for (i = 0; i < nwrite; i++) { + ret = c2port_write_dr(dev, *(buffer+i)); + if (ret < 0) + return ret; + + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + } + + /* Wait for last flash write to complete */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return nwrite; +} + +static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = + dev_get_drvdata(container_of(kobj, + struct device, kobj)); + int ret; + + /* Check the device access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name); + + return ret; +} +/* size is computed at run-time */ +static BIN_ATTR(flash_data, 0644, c2port_read_flash_data, + c2port_write_flash_data, 0); + +/* + * Class attributes + */ +static struct attribute *c2port_attrs[] = { + &dev_attr_name.attr, + &dev_attr_flash_blocks_num.attr, + &dev_attr_flash_block_size.attr, + &dev_attr_flash_size.attr, + &dev_attr_access.attr, + &dev_attr_reset.attr, + &dev_attr_dev_id.attr, + &dev_attr_rev_id.attr, + &dev_attr_flash_access.attr, + &dev_attr_flash_erase.attr, + NULL, +}; + +static struct bin_attribute *c2port_bin_attrs[] = { + &bin_attr_flash_data, + NULL, +}; + +static const struct attribute_group c2port_group = { + .attrs = c2port_attrs, + .bin_attrs = c2port_bin_attrs, +}; + +static const struct attribute_group *c2port_groups[] = { + &c2port_group, + NULL, +}; + +/* + * Exported functions + */ + +struct c2port_device *c2port_device_register(char *name, + struct c2port_ops *ops, void *devdata) +{ + struct c2port_device *c2dev; + int ret; + + if (unlikely(!ops) || unlikely(!ops->access) || \ + unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \ + unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set)) + return ERR_PTR(-EINVAL); + + c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); + kmemcheck_annotate_bitfield(c2dev, flags); + if (unlikely(!c2dev)) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock_irq(&c2port_idr_lock); + ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT); + spin_unlock_irq(&c2port_idr_lock); + idr_preload_end(); + + if (ret < 0) + goto error_idr_alloc; + c2dev->id = ret; + + bin_attr_flash_data.size = ops->blocks_num * ops->block_size; + + c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, + "c2port%d", c2dev->id); + if (unlikely(IS_ERR(c2dev->dev))) { + ret = PTR_ERR(c2dev->dev); + goto error_device_create; + } + dev_set_drvdata(c2dev->dev, c2dev); + + strncpy(c2dev->name, name, C2PORT_NAME_LEN); + c2dev->ops = ops; + mutex_init(&c2dev->mutex); + + /* By default C2 port access is off */ + c2dev->access = c2dev->flash_access = 0; + ops->access(c2dev, 0); + + dev_info(c2dev->dev, "C2 port %s added\n", name); + dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes " + "(%d bytes total)\n", + name, ops->blocks_num, ops->block_size, + ops->blocks_num * ops->block_size); + + return c2dev; + +error_device_create: + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, c2dev->id); + spin_unlock_irq(&c2port_idr_lock); + +error_idr_alloc: + kfree(c2dev); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL(c2port_device_register); + +void c2port_device_unregister(struct c2port_device *c2dev) +{ + if (!c2dev) + return; + + dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name); + + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, c2dev->id); + spin_unlock_irq(&c2port_idr_lock); + + device_destroy(c2port_class, c2dev->id); + + kfree(c2dev); +} +EXPORT_SYMBOL(c2port_device_unregister); + +/* + * Module stuff + */ + +static int __init c2port_init(void) +{ + printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION + " - (C) 2007 Rodolfo Giometti\n"); + + c2port_class = class_create(THIS_MODULE, "c2port"); + if (IS_ERR(c2port_class)) { + printk(KERN_ERR "c2port: failed to allocate class\n"); + return PTR_ERR(c2port_class); + } + c2port_class->dev_groups = c2port_groups; + + return 0; +} + +static void __exit c2port_exit(void) +{ + class_destroy(c2port_class); +} + +module_init(c2port_init); +module_exit(c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>"); +MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/carma/Kconfig b/kernel/drivers/misc/carma/Kconfig new file mode 100644 index 000000000..295882bfb --- /dev/null +++ b/kernel/drivers/misc/carma/Kconfig @@ -0,0 +1,15 @@ +config CARMA_FPGA + tristate "CARMA DATA-FPGA Access Driver" + depends on FSL_SOC && PPC_83xx && HAS_DMA && FSL_DMA + default n + help + Say Y here to include support for communicating with the data + processing FPGAs on the OVRO CARMA board. + +config CARMA_FPGA_PROGRAM + tristate "CARMA DATA-FPGA Programmer" + depends on FSL_SOC && PPC_83xx && HAS_DMA && FSL_DMA + default n + help + Say Y here to include support for programming the data processing + FPGAs on the OVRO CARMA board. diff --git a/kernel/drivers/misc/carma/Makefile b/kernel/drivers/misc/carma/Makefile new file mode 100644 index 000000000..ff36ac2ce --- /dev/null +++ b/kernel/drivers/misc/carma/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CARMA_FPGA) += carma-fpga.o +obj-$(CONFIG_CARMA_FPGA_PROGRAM) += carma-fpga-program.o diff --git a/kernel/drivers/misc/carma/carma-fpga-program.c b/kernel/drivers/misc/carma/carma-fpga-program.c new file mode 100644 index 000000000..0b1bd85e4 --- /dev/null +++ b/kernel/drivers/misc/carma/carma-fpga-program.c @@ -0,0 +1,1182 @@ +/* + * CARMA Board DATA-FPGA Programmer + * + * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/dma-mapping.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/completion.h> +#include <linux/miscdevice.h> +#include <linux/dmaengine.h> +#include <linux/fsldma.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/vmalloc.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/leds.h> +#include <linux/slab.h> +#include <linux/kref.h> +#include <linux/fs.h> +#include <linux/io.h> + +/* MPC8349EMDS specific get_immrbase() */ +#include <sysdev/fsl_soc.h> + +static const char drv_name[] = "carma-fpga-program"; + +/* + * Firmware images are always this exact size + * + * 12849552 bytes for a CARMA Digitizer Board (EP2S90 FPGAs) + * 18662880 bytes for a CARMA Correlator Board (EP2S130 FPGAs) + */ +#define FW_SIZE_EP2S90 12849552 +#define FW_SIZE_EP2S130 18662880 + +struct fpga_dev { + struct miscdevice miscdev; + + /* Reference count */ + struct kref ref; + + /* Device Registers */ + struct device *dev; + void __iomem *regs; + void __iomem *immr; + + /* Freescale DMA Device */ + struct dma_chan *chan; + + /* Interrupts */ + int irq, status; + struct completion completion; + + /* FPGA Bitfile */ + struct mutex lock; + + void *vaddr; + struct scatterlist *sglist; + int sglen; + int nr_pages; + bool buf_allocated; + + /* max size and written bytes */ + size_t fw_size; + size_t bytes; +}; + +static int fpga_dma_init(struct fpga_dev *priv, int nr_pages) +{ + struct page *pg; + int i; + + priv->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT); + if (NULL == priv->vaddr) { + pr_debug("vmalloc_32(%d pages) failed\n", nr_pages); + return -ENOMEM; + } + + pr_debug("vmalloc is at addr 0x%08lx, size=%d\n", + (unsigned long)priv->vaddr, + nr_pages << PAGE_SHIFT); + + memset(priv->vaddr, 0, nr_pages << PAGE_SHIFT); + priv->nr_pages = nr_pages; + + priv->sglist = vzalloc(priv->nr_pages * sizeof(*priv->sglist)); + if (NULL == priv->sglist) + goto vzalloc_err; + + sg_init_table(priv->sglist, priv->nr_pages); + for (i = 0; i < priv->nr_pages; i++) { + pg = vmalloc_to_page(priv->vaddr + i * PAGE_SIZE); + if (NULL == pg) + goto vmalloc_to_page_err; + sg_set_page(&priv->sglist[i], pg, PAGE_SIZE, 0); + } + return 0; + +vmalloc_to_page_err: + vfree(priv->sglist); + priv->sglist = NULL; +vzalloc_err: + vfree(priv->vaddr); + priv->vaddr = NULL; + return -ENOMEM; +} + +static int fpga_dma_map(struct fpga_dev *priv) +{ + priv->sglen = dma_map_sg(priv->dev, priv->sglist, + priv->nr_pages, DMA_TO_DEVICE); + + if (0 == priv->sglen) { + pr_warn("%s: dma_map_sg failed\n", __func__); + return -ENOMEM; + } + return 0; +} + +static int fpga_dma_unmap(struct fpga_dev *priv) +{ + if (!priv->sglen) + return 0; + + dma_unmap_sg(priv->dev, priv->sglist, priv->sglen, DMA_TO_DEVICE); + priv->sglen = 0; + return 0; +} + +/* + * FPGA Bitfile Helpers + */ + +/** + * fpga_drop_firmware_data() - drop the bitfile image from memory + * @priv: the driver's private data structure + * + * LOCKING: must hold priv->lock + */ +static void fpga_drop_firmware_data(struct fpga_dev *priv) +{ + vfree(priv->sglist); + vfree(priv->vaddr); + priv->buf_allocated = false; + priv->bytes = 0; +} + +/* + * Private Data Reference Count + */ + +static void fpga_dev_remove(struct kref *ref) +{ + struct fpga_dev *priv = container_of(ref, struct fpga_dev, ref); + + /* free any firmware image that was not programmed */ + fpga_drop_firmware_data(priv); + + mutex_destroy(&priv->lock); + kfree(priv); +} + +/* + * LED Trigger (could be a seperate module) + */ + +/* + * NOTE: this whole thing does have the problem that whenever the led's are + * NOTE: first set to use the fpga trigger, they could be in the wrong state + */ + +DEFINE_LED_TRIGGER(ledtrig_fpga); + +static void ledtrig_fpga_programmed(bool enabled) +{ + if (enabled) + led_trigger_event(ledtrig_fpga, LED_FULL); + else + led_trigger_event(ledtrig_fpga, LED_OFF); +} + +/* + * FPGA Register Helpers + */ + +/* Register Definitions */ +#define FPGA_CONFIG_CONTROL 0x40 +#define FPGA_CONFIG_STATUS 0x44 +#define FPGA_CONFIG_FIFO_SIZE 0x48 +#define FPGA_CONFIG_FIFO_USED 0x4C +#define FPGA_CONFIG_TOTAL_BYTE_COUNT 0x50 +#define FPGA_CONFIG_CUR_BYTE_COUNT 0x54 + +#define FPGA_FIFO_ADDRESS 0x3000 + +static int fpga_fifo_size(void __iomem *regs) +{ + return ioread32be(regs + FPGA_CONFIG_FIFO_SIZE); +} + +#define CFG_STATUS_ERR_MASK 0xfffe + +static int fpga_config_error(void __iomem *regs) +{ + return ioread32be(regs + FPGA_CONFIG_STATUS) & CFG_STATUS_ERR_MASK; +} + +static int fpga_fifo_empty(void __iomem *regs) +{ + return ioread32be(regs + FPGA_CONFIG_FIFO_USED) == 0; +} + +static void fpga_fifo_write(void __iomem *regs, u32 val) +{ + iowrite32be(val, regs + FPGA_FIFO_ADDRESS); +} + +static void fpga_set_byte_count(void __iomem *regs, u32 count) +{ + iowrite32be(count, regs + FPGA_CONFIG_TOTAL_BYTE_COUNT); +} + +#define CFG_CTL_ENABLE (1 << 0) +#define CFG_CTL_RESET (1 << 1) +#define CFG_CTL_DMA (1 << 2) + +static void fpga_programmer_enable(struct fpga_dev *priv, bool dma) +{ + u32 val; + + val = (dma) ? (CFG_CTL_ENABLE | CFG_CTL_DMA) : CFG_CTL_ENABLE; + iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL); +} + +static void fpga_programmer_disable(struct fpga_dev *priv) +{ + iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL); +} + +static void fpga_dump_registers(struct fpga_dev *priv) +{ + u32 control, status, size, used, total, curr; + + /* good status: do nothing */ + if (priv->status == 0) + return; + + /* Dump all status registers */ + control = ioread32be(priv->regs + FPGA_CONFIG_CONTROL); + status = ioread32be(priv->regs + FPGA_CONFIG_STATUS); + size = ioread32be(priv->regs + FPGA_CONFIG_FIFO_SIZE); + used = ioread32be(priv->regs + FPGA_CONFIG_FIFO_USED); + total = ioread32be(priv->regs + FPGA_CONFIG_TOTAL_BYTE_COUNT); + curr = ioread32be(priv->regs + FPGA_CONFIG_CUR_BYTE_COUNT); + + dev_err(priv->dev, "Configuration failed, dumping status registers\n"); + dev_err(priv->dev, "Control: 0x%.8x\n", control); + dev_err(priv->dev, "Status: 0x%.8x\n", status); + dev_err(priv->dev, "FIFO Size: 0x%.8x\n", size); + dev_err(priv->dev, "FIFO Used: 0x%.8x\n", used); + dev_err(priv->dev, "FIFO Total: 0x%.8x\n", total); + dev_err(priv->dev, "FIFO Curr: 0x%.8x\n", curr); +} + +/* + * FPGA Power Supply Code + */ + +#define CTL_PWR_CONTROL 0x2006 +#define CTL_PWR_STATUS 0x200A +#define CTL_PWR_FAIL 0x200B + +#define PWR_CONTROL_ENABLE 0x01 + +#define PWR_STATUS_ERROR_MASK 0x10 +#define PWR_STATUS_GOOD 0x0f + +/* + * Determine if the FPGA power is good for all supplies + */ +static bool fpga_power_good(struct fpga_dev *priv) +{ + u8 val; + + val = ioread8(priv->regs + CTL_PWR_STATUS); + if (val & PWR_STATUS_ERROR_MASK) + return false; + + return val == PWR_STATUS_GOOD; +} + +/* + * Disable the FPGA power supplies + */ +static void fpga_disable_power_supplies(struct fpga_dev *priv) +{ + unsigned long start; + u8 val; + + iowrite8(0x0, priv->regs + CTL_PWR_CONTROL); + + /* + * Wait 500ms for the power rails to discharge + * + * Without this delay, the CTL-CPLD state machine can get into a + * state where it is waiting for the power-goods to assert, but they + * never do. This only happens when enabling and disabling the + * power sequencer very rapidly. + * + * The loop below will also wait for the power goods to de-assert, + * but testing has shown that they are always disabled by the time + * the sleep completes. However, omitting the sleep and only waiting + * for the power-goods to de-assert was not sufficient to ensure + * that the power sequencer would not wedge itself. + */ + msleep(500); + + start = jiffies; + while (time_before(jiffies, start + HZ)) { + val = ioread8(priv->regs + CTL_PWR_STATUS); + if (!(val & PWR_STATUS_GOOD)) + break; + + usleep_range(5000, 10000); + } + + val = ioread8(priv->regs + CTL_PWR_STATUS); + if (val & PWR_STATUS_GOOD) { + dev_err(priv->dev, "power disable failed: " + "power goods: status 0x%.2x\n", val); + } + + if (val & PWR_STATUS_ERROR_MASK) { + dev_err(priv->dev, "power disable failed: " + "alarm bit set: status 0x%.2x\n", val); + } +} + +/** + * fpga_enable_power_supplies() - enable the DATA-FPGA power supplies + * @priv: the driver's private data structure + * + * Enable the DATA-FPGA power supplies, waiting up to 1 second for + * them to enable successfully. + * + * Returns 0 on success, -ERRNO otherwise + */ +static int fpga_enable_power_supplies(struct fpga_dev *priv) +{ + unsigned long start = jiffies; + + if (fpga_power_good(priv)) { + dev_dbg(priv->dev, "power was already good\n"); + return 0; + } + + iowrite8(PWR_CONTROL_ENABLE, priv->regs + CTL_PWR_CONTROL); + while (time_before(jiffies, start + HZ)) { + if (fpga_power_good(priv)) + return 0; + + usleep_range(5000, 10000); + } + + return fpga_power_good(priv) ? 0 : -ETIMEDOUT; +} + +/* + * Determine if the FPGA power supplies are all enabled + */ +static bool fpga_power_enabled(struct fpga_dev *priv) +{ + u8 val; + + val = ioread8(priv->regs + CTL_PWR_CONTROL); + if (val & PWR_CONTROL_ENABLE) + return true; + + return false; +} + +/* + * Determine if the FPGA's are programmed and running correctly + */ +static bool fpga_running(struct fpga_dev *priv) +{ + if (!fpga_power_good(priv)) + return false; + + /* Check the config done bit */ + return ioread32be(priv->regs + FPGA_CONFIG_STATUS) & (1 << 18); +} + +/* + * FPGA Programming Code + */ + +/** + * fpga_program_block() - put a block of data into the programmer's FIFO + * @priv: the driver's private data structure + * @buf: the data to program + * @count: the length of data to program (must be a multiple of 4 bytes) + * + * Returns 0 on success, -ERRNO otherwise + */ +static int fpga_program_block(struct fpga_dev *priv, void *buf, size_t count) +{ + u32 *data = buf; + int size = fpga_fifo_size(priv->regs); + int i, len; + unsigned long timeout; + + /* enforce correct data length for the FIFO */ + BUG_ON(count % 4 != 0); + + while (count > 0) { + + /* Get the size of the block to write (maximum is FIFO_SIZE) */ + len = min_t(size_t, count, size); + timeout = jiffies + HZ / 4; + + /* Write the block */ + for (i = 0; i < len / 4; i++) + fpga_fifo_write(priv->regs, data[i]); + + /* Update the amounts left */ + count -= len; + data += len / 4; + + /* Wait for the fifo to empty */ + while (true) { + + if (fpga_fifo_empty(priv->regs)) { + break; + } else { + dev_dbg(priv->dev, "Fifo not empty\n"); + cpu_relax(); + } + + if (fpga_config_error(priv->regs)) { + dev_err(priv->dev, "Error detected\n"); + return -EIO; + } + + if (time_after(jiffies, timeout)) { + dev_err(priv->dev, "Fifo drain timeout\n"); + return -ETIMEDOUT; + } + + usleep_range(5000, 10000); + } + } + + return 0; +} + +/** + * fpga_program_cpu() - program the DATA-FPGA's using the CPU + * @priv: the driver's private data structure + * + * This is useful when the DMA programming method fails. It is possible to + * wedge the Freescale DMA controller such that the DMA programming method + * always fails. This method has always succeeded. + * + * Returns 0 on success, -ERRNO otherwise + */ +static noinline int fpga_program_cpu(struct fpga_dev *priv) +{ + int ret; + unsigned long timeout; + + /* Disable the programmer */ + fpga_programmer_disable(priv); + + /* Set the total byte count */ + fpga_set_byte_count(priv->regs, priv->bytes); + dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes); + + /* Enable the controller for programming */ + fpga_programmer_enable(priv, false); + dev_dbg(priv->dev, "enabled the controller\n"); + + /* Write each chunk of the FPGA bitfile to FPGA programmer */ + ret = fpga_program_block(priv, priv->vaddr, priv->bytes); + if (ret) + goto out_disable_controller; + + /* Wait for the interrupt handler to signal that programming finished */ + timeout = wait_for_completion_timeout(&priv->completion, 2 * HZ); + if (!timeout) { + dev_err(priv->dev, "Timed out waiting for completion\n"); + ret = -ETIMEDOUT; + goto out_disable_controller; + } + + /* Retrieve the status from the interrupt handler */ + ret = priv->status; + +out_disable_controller: + fpga_programmer_disable(priv); + return ret; +} + +#define FIFO_DMA_ADDRESS 0xf0003000 +#define FIFO_MAX_LEN 4096 + +/** + * fpga_program_dma() - program the DATA-FPGA's using the DMA engine + * @priv: the driver's private data structure + * + * Program the DATA-FPGA's using the Freescale DMA engine. This requires that + * the engine is programmed such that the hardware DMA request lines can + * control the entire DMA transaction. The system controller FPGA then + * completely offloads the programming from the CPU. + * + * Returns 0 on success, -ERRNO otherwise + */ +static noinline int fpga_program_dma(struct fpga_dev *priv) +{ + struct dma_chan *chan = priv->chan; + struct dma_async_tx_descriptor *tx; + size_t num_pages, len, avail = 0; + struct dma_slave_config config; + struct scatterlist *sg; + struct sg_table table; + dma_cookie_t cookie; + int ret, i; + unsigned long timeout; + + /* Disable the programmer */ + fpga_programmer_disable(priv); + + /* Allocate a scatterlist for the DMA destination */ + num_pages = DIV_ROUND_UP(priv->bytes, FIFO_MAX_LEN); + ret = sg_alloc_table(&table, num_pages, GFP_KERNEL); + if (ret) { + dev_err(priv->dev, "Unable to allocate dst scatterlist\n"); + ret = -ENOMEM; + goto out_return; + } + + /* + * This is an ugly hack + * + * We fill in a scatterlist as if it were mapped for DMA. This is + * necessary because there exists no better structure for this + * inside the kernel code. + * + * As an added bonus, we can use the DMAEngine API for all of this, + * rather than inventing another extremely similar API. + */ + avail = priv->bytes; + for_each_sg(table.sgl, sg, num_pages, i) { + len = min_t(size_t, avail, FIFO_MAX_LEN); + sg_dma_address(sg) = FIFO_DMA_ADDRESS; + sg_dma_len(sg) = len; + + avail -= len; + } + + /* Map the buffer for DMA */ + ret = fpga_dma_map(priv); + if (ret) { + dev_err(priv->dev, "Unable to map buffer for DMA\n"); + goto out_free_table; + } + + /* + * Configure the DMA channel to transfer FIFO_SIZE / 2 bytes per + * transaction, and then put it under external control + */ + memset(&config, 0, sizeof(config)); + config.direction = DMA_MEM_TO_DEV; + config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + config.dst_maxburst = fpga_fifo_size(priv->regs) / 2 / 4; + ret = dmaengine_slave_config(chan, &config); + if (ret) { + dev_err(priv->dev, "DMA slave configuration failed\n"); + goto out_dma_unmap; + } + + ret = fsl_dma_external_start(chan, 1); + if (ret) { + dev_err(priv->dev, "DMA external control setup failed\n"); + goto out_dma_unmap; + } + + /* setup and submit the DMA transaction */ + + tx = dmaengine_prep_dma_sg(chan, table.sgl, num_pages, + priv->sglist, priv->sglen, 0); + if (!tx) { + dev_err(priv->dev, "Unable to prep DMA transaction\n"); + ret = -ENOMEM; + goto out_dma_unmap; + } + + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + dev_err(priv->dev, "Unable to submit DMA transaction\n"); + ret = -ENOMEM; + goto out_dma_unmap; + } + + dma_async_issue_pending(chan); + + /* Set the total byte count */ + fpga_set_byte_count(priv->regs, priv->bytes); + dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes); + + /* Enable the controller for DMA programming */ + fpga_programmer_enable(priv, true); + dev_dbg(priv->dev, "enabled the controller\n"); + + /* Wait for the interrupt handler to signal that programming finished */ + timeout = wait_for_completion_timeout(&priv->completion, 2 * HZ); + if (!timeout) { + dev_err(priv->dev, "Timed out waiting for completion\n"); + ret = -ETIMEDOUT; + goto out_disable_controller; + } + + /* Retrieve the status from the interrupt handler */ + ret = priv->status; + +out_disable_controller: + fpga_programmer_disable(priv); +out_dma_unmap: + fpga_dma_unmap(priv); +out_free_table: + sg_free_table(&table); +out_return: + return ret; +} + +/* + * Interrupt Handling + */ + +static irqreturn_t fpga_irq(int irq, void *dev_id) +{ + struct fpga_dev *priv = dev_id; + + /* Save the status */ + priv->status = fpga_config_error(priv->regs) ? -EIO : 0; + dev_dbg(priv->dev, "INTERRUPT status %d\n", priv->status); + fpga_dump_registers(priv); + + /* Disabling the programmer clears the interrupt */ + fpga_programmer_disable(priv); + + /* Notify any waiters */ + complete(&priv->completion); + + return IRQ_HANDLED; +} + +/* + * SYSFS Helpers + */ + +/** + * fpga_do_stop() - deconfigure (reset) the DATA-FPGA's + * @priv: the driver's private data structure + * + * LOCKING: must hold priv->lock + */ +static int fpga_do_stop(struct fpga_dev *priv) +{ + u32 val; + + /* Set the led to unprogrammed */ + ledtrig_fpga_programmed(false); + + /* Pulse the config line to reset the FPGA's */ + val = CFG_CTL_ENABLE | CFG_CTL_RESET; + iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL); + iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL); + + return 0; +} + +static noinline int fpga_do_program(struct fpga_dev *priv) +{ + int ret; + + if (priv->bytes != priv->fw_size) { + dev_err(priv->dev, "Incorrect bitfile size: got %zu bytes, " + "should be %zu bytes\n", + priv->bytes, priv->fw_size); + return -EINVAL; + } + + if (!fpga_power_enabled(priv)) { + dev_err(priv->dev, "Power not enabled\n"); + return -EINVAL; + } + + if (!fpga_power_good(priv)) { + dev_err(priv->dev, "Power not good\n"); + return -EINVAL; + } + + /* Set the LED to unprogrammed */ + ledtrig_fpga_programmed(false); + + /* Try to program the FPGA's using DMA */ + ret = fpga_program_dma(priv); + + /* If DMA failed or doesn't exist, try with CPU */ + if (ret) { + dev_warn(priv->dev, "Falling back to CPU programming\n"); + ret = fpga_program_cpu(priv); + } + + if (ret) { + dev_err(priv->dev, "Unable to program FPGA's\n"); + return ret; + } + + /* Drop the firmware bitfile from memory */ + fpga_drop_firmware_data(priv); + + dev_dbg(priv->dev, "FPGA programming successful\n"); + ledtrig_fpga_programmed(true); + + return 0; +} + +/* + * File Operations + */ + +static int fpga_open(struct inode *inode, struct file *filp) +{ + /* + * The miscdevice layer puts our struct miscdevice into the + * filp->private_data field. We use this to find our private + * data and then overwrite it with our own private structure. + */ + struct fpga_dev *priv = container_of(filp->private_data, + struct fpga_dev, miscdev); + unsigned int nr_pages; + int ret; + + /* We only allow one process at a time */ + ret = mutex_lock_interruptible(&priv->lock); + if (ret) + return ret; + + filp->private_data = priv; + kref_get(&priv->ref); + + /* Truncation: drop any existing data */ + if (filp->f_flags & O_TRUNC) + priv->bytes = 0; + + /* Check if we have already allocated a buffer */ + if (priv->buf_allocated) + return 0; + + /* Allocate a buffer to hold enough data for the bitfile */ + nr_pages = DIV_ROUND_UP(priv->fw_size, PAGE_SIZE); + ret = fpga_dma_init(priv, nr_pages); + if (ret) { + dev_err(priv->dev, "unable to allocate data buffer\n"); + mutex_unlock(&priv->lock); + kref_put(&priv->ref, fpga_dev_remove); + return ret; + } + + priv->buf_allocated = true; + return 0; +} + +static int fpga_release(struct inode *inode, struct file *filp) +{ + struct fpga_dev *priv = filp->private_data; + + mutex_unlock(&priv->lock); + kref_put(&priv->ref, fpga_dev_remove); + return 0; +} + +static ssize_t fpga_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct fpga_dev *priv = filp->private_data; + + /* FPGA bitfiles have an exact size: disallow anything else */ + if (priv->bytes >= priv->fw_size) + return -ENOSPC; + + count = min_t(size_t, priv->fw_size - priv->bytes, count); + if (copy_from_user(priv->vaddr + priv->bytes, buf, count)) + return -EFAULT; + + priv->bytes += count; + return count; +} + +static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count, + loff_t *f_pos) +{ + struct fpga_dev *priv = filp->private_data; + return simple_read_from_buffer(buf, count, f_pos, + priv->vaddr, priv->bytes); +} + +static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin) +{ + struct fpga_dev *priv = filp->private_data; + + /* only read-only opens are allowed to seek */ + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + return -EINVAL; + + return fixed_size_llseek(filp, offset, origin, priv->fw_size); +} + +static const struct file_operations fpga_fops = { + .open = fpga_open, + .release = fpga_release, + .write = fpga_write, + .read = fpga_read, + .llseek = fpga_llseek, +}; + +/* + * Device Attributes + */ + +static ssize_t pfail_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct fpga_dev *priv = dev_get_drvdata(dev); + u8 val; + + val = ioread8(priv->regs + CTL_PWR_FAIL); + return snprintf(buf, PAGE_SIZE, "0x%.2x\n", val); +} + +static ssize_t pgood_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct fpga_dev *priv = dev_get_drvdata(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_good(priv)); +} + +static ssize_t penable_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct fpga_dev *priv = dev_get_drvdata(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_enabled(priv)); +} + +static ssize_t penable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fpga_dev *priv = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val) { + ret = fpga_enable_power_supplies(priv); + if (ret) + return ret; + } else { + fpga_do_stop(priv); + fpga_disable_power_supplies(priv); + } + + return count; +} + +static ssize_t program_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct fpga_dev *priv = dev_get_drvdata(dev); + return snprintf(buf, PAGE_SIZE, "%d\n", fpga_running(priv)); +} + +static ssize_t program_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fpga_dev *priv = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + /* We can't have an image writer and be programming simultaneously */ + if (mutex_lock_interruptible(&priv->lock)) + return -ERESTARTSYS; + + /* Program or Reset the FPGA's */ + ret = val ? fpga_do_program(priv) : fpga_do_stop(priv); + if (ret) + goto out_unlock; + + /* Success */ + ret = count; + +out_unlock: + mutex_unlock(&priv->lock); + return ret; +} + +static DEVICE_ATTR(power_fail, S_IRUGO, pfail_show, NULL); +static DEVICE_ATTR(power_good, S_IRUGO, pgood_show, NULL); +static DEVICE_ATTR(power_enable, S_IRUGO | S_IWUSR, + penable_show, penable_store); + +static DEVICE_ATTR(program, S_IRUGO | S_IWUSR, + program_show, program_store); + +static struct attribute *fpga_attributes[] = { + &dev_attr_power_fail.attr, + &dev_attr_power_good.attr, + &dev_attr_power_enable.attr, + &dev_attr_program.attr, + NULL, +}; + +static const struct attribute_group fpga_attr_group = { + .attrs = fpga_attributes, +}; + +/* + * OpenFirmware Device Subsystem + */ + +#define SYS_REG_VERSION 0x00 +#define SYS_REG_GEOGRAPHIC 0x10 + +static bool dma_filter(struct dma_chan *chan, void *data) +{ + /* + * DMA Channel #0 is the only acceptable device + * + * This probably won't survive an unload/load cycle of the Freescale + * DMAEngine driver, but that won't be a problem + */ + return chan->chan_id == 0 && chan->device->dev_id == 0; +} + +static int fpga_of_remove(struct platform_device *op) +{ + struct fpga_dev *priv = platform_get_drvdata(op); + struct device *this_device = priv->miscdev.this_device; + + sysfs_remove_group(&this_device->kobj, &fpga_attr_group); + misc_deregister(&priv->miscdev); + + free_irq(priv->irq, priv); + irq_dispose_mapping(priv->irq); + + /* make sure the power supplies are off */ + fpga_disable_power_supplies(priv); + + /* unmap registers */ + iounmap(priv->immr); + iounmap(priv->regs); + + dma_release_channel(priv->chan); + + /* drop our reference to the private data structure */ + kref_put(&priv->ref, fpga_dev_remove); + return 0; +} + +/* CTL-CPLD Version Register */ +#define CTL_CPLD_VERSION 0x2000 + +static int fpga_of_probe(struct platform_device *op) +{ + struct device_node *of_node = op->dev.of_node; + struct device *this_device; + struct fpga_dev *priv; + dma_cap_mask_t mask; + u32 ver; + int ret; + + /* Allocate private data */ + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(&op->dev, "Unable to allocate private data\n"); + ret = -ENOMEM; + goto out_return; + } + + /* Setup the miscdevice */ + priv->miscdev.minor = MISC_DYNAMIC_MINOR; + priv->miscdev.name = drv_name; + priv->miscdev.fops = &fpga_fops; + + kref_init(&priv->ref); + + platform_set_drvdata(op, priv); + priv->dev = &op->dev; + mutex_init(&priv->lock); + init_completion(&priv->completion); + + dev_set_drvdata(priv->dev, priv); + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + dma_cap_set(DMA_SLAVE, mask); + dma_cap_set(DMA_SG, mask); + + /* Get control of DMA channel #0 */ + priv->chan = dma_request_channel(mask, dma_filter, NULL); + if (!priv->chan) { + dev_err(&op->dev, "Unable to acquire DMA channel #0\n"); + ret = -ENODEV; + goto out_free_priv; + } + + /* Remap the registers for use */ + priv->regs = of_iomap(of_node, 0); + if (!priv->regs) { + dev_err(&op->dev, "Unable to ioremap registers\n"); + ret = -ENOMEM; + goto out_dma_release_channel; + } + + /* Remap the IMMR for use */ + priv->immr = ioremap(get_immrbase(), 0x100000); + if (!priv->immr) { + dev_err(&op->dev, "Unable to ioremap IMMR\n"); + ret = -ENOMEM; + goto out_unmap_regs; + } + + /* + * Check that external DMA is configured + * + * U-Boot does this for us, but we should check it and bail out if + * there is a problem. Failing to have this register setup correctly + * will cause the DMA controller to transfer a single cacheline + * worth of data, then wedge itself. + */ + if ((ioread32be(priv->immr + 0x114) & 0xE00) != 0xE00) { + dev_err(&op->dev, "External DMA control not configured\n"); + ret = -ENODEV; + goto out_unmap_immr; + } + + /* + * Check the CTL-CPLD version + * + * This driver uses the CTL-CPLD DATA-FPGA power sequencer, and we + * don't want to run on any version of the CTL-CPLD that does not use + * a compatible register layout. + * + * v2: changed register layout, added power sequencer + * v3: added glitch filter on the i2c overcurrent/overtemp outputs + */ + ver = ioread8(priv->regs + CTL_CPLD_VERSION); + if (ver != 0x02 && ver != 0x03) { + dev_err(&op->dev, "CTL-CPLD is not version 0x02 or 0x03!\n"); + ret = -ENODEV; + goto out_unmap_immr; + } + + /* Set the exact size that the firmware image should be */ + ver = ioread32be(priv->regs + SYS_REG_VERSION); + priv->fw_size = (ver & (1 << 18)) ? FW_SIZE_EP2S130 : FW_SIZE_EP2S90; + + /* Find the correct IRQ number */ + priv->irq = irq_of_parse_and_map(of_node, 0); + if (priv->irq == NO_IRQ) { + dev_err(&op->dev, "Unable to find IRQ line\n"); + ret = -ENODEV; + goto out_unmap_immr; + } + + /* Request the IRQ */ + ret = request_irq(priv->irq, fpga_irq, IRQF_SHARED, drv_name, priv); + if (ret) { + dev_err(&op->dev, "Unable to request IRQ %d\n", priv->irq); + ret = -ENODEV; + goto out_irq_dispose_mapping; + } + + /* Reset and stop the FPGA's, just in case */ + fpga_do_stop(priv); + + /* Register the miscdevice */ + ret = misc_register(&priv->miscdev); + if (ret) { + dev_err(&op->dev, "Unable to register miscdevice\n"); + goto out_free_irq; + } + + /* Create the sysfs files */ + this_device = priv->miscdev.this_device; + dev_set_drvdata(this_device, priv); + ret = sysfs_create_group(&this_device->kobj, &fpga_attr_group); + if (ret) { + dev_err(&op->dev, "Unable to create sysfs files\n"); + goto out_misc_deregister; + } + + dev_info(priv->dev, "CARMA FPGA Programmer: %s rev%s with %s FPGAs\n", + (ver & (1 << 17)) ? "Correlator" : "Digitizer", + (ver & (1 << 16)) ? "B" : "A", + (ver & (1 << 18)) ? "EP2S130" : "EP2S90"); + + return 0; + +out_misc_deregister: + misc_deregister(&priv->miscdev); +out_free_irq: + free_irq(priv->irq, priv); +out_irq_dispose_mapping: + irq_dispose_mapping(priv->irq); +out_unmap_immr: + iounmap(priv->immr); +out_unmap_regs: + iounmap(priv->regs); +out_dma_release_channel: + dma_release_channel(priv->chan); +out_free_priv: + kref_put(&priv->ref, fpga_dev_remove); +out_return: + return ret; +} + +static const struct of_device_id fpga_of_match[] = { + { .compatible = "carma,fpga-programmer", }, + {}, +}; + +static struct platform_driver fpga_of_driver = { + .probe = fpga_of_probe, + .remove = fpga_of_remove, + .driver = { + .name = drv_name, + .of_match_table = fpga_of_match, + }, +}; + +/* + * Module Init / Exit + */ + +static int __init fpga_init(void) +{ + led_trigger_register_simple("fpga", &ledtrig_fpga); + return platform_driver_register(&fpga_of_driver); +} + +static void __exit fpga_exit(void) +{ + platform_driver_unregister(&fpga_of_driver); + led_trigger_unregister_simple(ledtrig_fpga); +} + +MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>"); +MODULE_DESCRIPTION("CARMA Board DATA-FPGA Programmer"); +MODULE_LICENSE("GPL"); + +module_init(fpga_init); +module_exit(fpga_exit); diff --git a/kernel/drivers/misc/carma/carma-fpga.c b/kernel/drivers/misc/carma/carma-fpga.c new file mode 100644 index 000000000..5aba3fd78 --- /dev/null +++ b/kernel/drivers/misc/carma/carma-fpga.c @@ -0,0 +1,1507 @@ +/* + * CARMA DATA-FPGA Access Driver + * + * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/* + * FPGA Memory Dump Format + * + * FPGA #0 control registers (32 x 32-bit words) + * FPGA #1 control registers (32 x 32-bit words) + * FPGA #2 control registers (32 x 32-bit words) + * FPGA #3 control registers (32 x 32-bit words) + * SYSFPGA control registers (32 x 32-bit words) + * FPGA #0 correlation array (NUM_CORL0 correlation blocks) + * FPGA #1 correlation array (NUM_CORL1 correlation blocks) + * FPGA #2 correlation array (NUM_CORL2 correlation blocks) + * FPGA #3 correlation array (NUM_CORL3 correlation blocks) + * + * Each correlation array consists of: + * + * Correlation Data (2 x NUM_LAGSn x 32-bit words) + * Pipeline Metadata (2 x NUM_METAn x 32-bit words) + * Quantization Counters (2 x NUM_QCNTn x 32-bit words) + * + * The NUM_CORLn, NUM_LAGSn, NUM_METAn, and NUM_QCNTn values come from + * the FPGA configuration registers. They do not change once the FPGA's + * have been programmed, they only change on re-programming. + */ + +/* + * Basic Description: + * + * This driver is used to capture correlation spectra off of the four data + * processing FPGAs. The FPGAs are often reprogrammed at runtime, therefore + * this driver supports dynamic enable/disable of capture while the device + * remains open. + * + * The nominal capture rate is 64Hz (every 15.625ms). To facilitate this fast + * capture rate, all buffers are pre-allocated to avoid any potentially long + * running memory allocations while capturing. + * + * There are two lists and one pointer which are used to keep track of the + * different states of data buffers. + * + * 1) free list + * This list holds all empty data buffers which are ready to receive data. + * + * 2) inflight pointer + * This pointer holds the currently inflight data buffer. This buffer is having + * data copied into it by the DMA engine. + * + * 3) used list + * This list holds data buffers which have been filled, and are waiting to be + * read by userspace. + * + * All buffers start life on the free list, then move successively to the + * inflight pointer, and then to the used list. After they have been read by + * userspace, they are moved back to the free list. The cycle repeats as long + * as necessary. + * + * It should be noted that all buffers are mapped and ready for DMA when they + * are on any of the three lists. They are only unmapped when they are in the + * process of being read by userspace. + */ + +/* + * Notes on the IRQ masking scheme: + * + * The IRQ masking scheme here is different than most other hardware. The only + * way for the DATA-FPGAs to detect if the kernel has taken too long to copy + * the data is if the status registers are not cleared before the next + * correlation data dump is ready. + * + * The interrupt line is connected to the status registers, such that when they + * are cleared, the interrupt is de-asserted. Therein lies our problem. We need + * to schedule a long-running DMA operation and return from the interrupt + * handler quickly, but we cannot clear the status registers. + * + * To handle this, the system controller FPGA has the capability to connect the + * interrupt line to a user-controlled GPIO pin. This pin is driven high + * (unasserted) and left that way. To mask the interrupt, we change the + * interrupt source to the GPIO pin. Tada, we hid the interrupt. :) + */ + +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/dma-mapping.h> +#include <linux/miscdevice.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/seq_file.h> +#include <linux/highmem.h> +#include <linux/debugfs.h> +#include <linux/vmalloc.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/kref.h> +#include <linux/io.h> + +/* system controller registers */ +#define SYS_IRQ_SOURCE_CTL 0x24 +#define SYS_IRQ_OUTPUT_EN 0x28 +#define SYS_IRQ_OUTPUT_DATA 0x2C +#define SYS_IRQ_INPUT_DATA 0x30 +#define SYS_FPGA_CONFIG_STATUS 0x44 + +/* GPIO IRQ line assignment */ +#define IRQ_CORL_DONE 0x10 + +/* FPGA registers */ +#define MMAP_REG_VERSION 0x00 +#define MMAP_REG_CORL_CONF1 0x08 +#define MMAP_REG_CORL_CONF2 0x0C +#define MMAP_REG_STATUS 0x48 + +#define SYS_FPGA_BLOCK 0xF0000000 + +#define DATA_FPGA_START 0x400000 +#define DATA_FPGA_SIZE 0x80000 + +static const char drv_name[] = "carma-fpga"; + +#define NUM_FPGA 4 + +#define MIN_DATA_BUFS 8 +#define MAX_DATA_BUFS 64 + +struct fpga_info { + unsigned int num_lag_ram; + unsigned int blk_size; +}; + +struct data_buf { + struct list_head entry; + void *vaddr; + struct scatterlist *sglist; + int sglen; + int nr_pages; + size_t size; +}; + +struct fpga_device { + /* character device */ + struct miscdevice miscdev; + struct device *dev; + struct mutex mutex; + + /* reference count */ + struct kref ref; + + /* FPGA registers and information */ + struct fpga_info info[NUM_FPGA]; + void __iomem *regs; + int irq; + + /* FPGA Physical Address/Size Information */ + resource_size_t phys_addr; + size_t phys_size; + + /* DMA structures */ + struct sg_table corl_table; + unsigned int corl_nents; + struct dma_chan *chan; + + /* Protection for all members below */ + spinlock_t lock; + + /* Device enable/disable flag */ + bool enabled; + + /* Correlation data buffers */ + wait_queue_head_t wait; + struct list_head free; + struct list_head used; + struct data_buf *inflight; + + /* Information about data buffers */ + unsigned int num_dropped; + unsigned int num_buffers; + size_t bufsize; + struct dentry *dbg_entry; +}; + +struct fpga_reader { + struct fpga_device *priv; + struct data_buf *buf; + off_t buf_start; +}; + +static void fpga_device_release(struct kref *ref) +{ + struct fpga_device *priv = container_of(ref, struct fpga_device, ref); + + /* the last reader has exited, cleanup the last bits */ + mutex_destroy(&priv->mutex); + kfree(priv); +} + +/* + * Data Buffer Allocation Helpers + */ + +static int carma_dma_init(struct data_buf *buf, int nr_pages) +{ + struct page *pg; + int i; + + buf->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT); + if (NULL == buf->vaddr) { + pr_debug("vmalloc_32(%d pages) failed\n", nr_pages); + return -ENOMEM; + } + + pr_debug("vmalloc is at addr 0x%08lx, size=%d\n", + (unsigned long)buf->vaddr, + nr_pages << PAGE_SHIFT); + + memset(buf->vaddr, 0, nr_pages << PAGE_SHIFT); + buf->nr_pages = nr_pages; + + buf->sglist = vzalloc(buf->nr_pages * sizeof(*buf->sglist)); + if (NULL == buf->sglist) + goto vzalloc_err; + + sg_init_table(buf->sglist, buf->nr_pages); + for (i = 0; i < buf->nr_pages; i++) { + pg = vmalloc_to_page(buf->vaddr + i * PAGE_SIZE); + if (NULL == pg) + goto vmalloc_to_page_err; + sg_set_page(&buf->sglist[i], pg, PAGE_SIZE, 0); + } + return 0; + +vmalloc_to_page_err: + vfree(buf->sglist); + buf->sglist = NULL; +vzalloc_err: + vfree(buf->vaddr); + buf->vaddr = NULL; + return -ENOMEM; +} + +static int carma_dma_map(struct device *dev, struct data_buf *buf) +{ + buf->sglen = dma_map_sg(dev, buf->sglist, + buf->nr_pages, DMA_FROM_DEVICE); + + if (0 == buf->sglen) { + pr_warn("%s: dma_map_sg failed\n", __func__); + return -ENOMEM; + } + return 0; +} + +static int carma_dma_unmap(struct device *dev, struct data_buf *buf) +{ + if (!buf->sglen) + return 0; + + dma_unmap_sg(dev, buf->sglist, buf->sglen, DMA_FROM_DEVICE); + buf->sglen = 0; + return 0; +} + +/** + * data_free_buffer() - free a single data buffer and all allocated memory + * @buf: the buffer to free + * + * This will free all of the pages allocated to the given data buffer, and + * then free the structure itself + */ +static void data_free_buffer(struct data_buf *buf) +{ + /* It is ok to free a NULL buffer */ + if (!buf) + return; + + /* free all memory */ + vfree(buf->sglist); + vfree(buf->vaddr); + kfree(buf); +} + +/** + * data_alloc_buffer() - allocate and fill a data buffer with pages + * @bytes: the number of bytes required + * + * This allocates all space needed for a data buffer. It must be mapped before + * use in a DMA transaction using carma_dma_map(). + * + * Returns NULL on failure + */ +static struct data_buf *data_alloc_buffer(const size_t bytes) +{ + unsigned int nr_pages; + struct data_buf *buf; + int ret; + + /* calculate the number of pages necessary */ + nr_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); + + /* allocate the buffer structure */ + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + goto out_return; + + /* initialize internal fields */ + INIT_LIST_HEAD(&buf->entry); + buf->size = bytes; + + /* allocate the buffer */ + ret = carma_dma_init(buf, nr_pages); + if (ret) + goto out_free_buf; + + return buf; + +out_free_buf: + kfree(buf); +out_return: + return NULL; +} + +/** + * data_free_buffers() - free all allocated buffers + * @priv: the driver's private data structure + * + * Free all buffers allocated by the driver (except those currently in the + * process of being read by userspace). + * + * LOCKING: must hold dev->mutex + * CONTEXT: user + */ +static void data_free_buffers(struct fpga_device *priv) +{ + struct data_buf *buf, *tmp; + + /* the device should be stopped, no DMA in progress */ + BUG_ON(priv->inflight != NULL); + + list_for_each_entry_safe(buf, tmp, &priv->free, entry) { + list_del_init(&buf->entry); + carma_dma_unmap(priv->dev, buf); + data_free_buffer(buf); + } + + list_for_each_entry_safe(buf, tmp, &priv->used, entry) { + list_del_init(&buf->entry); + carma_dma_unmap(priv->dev, buf); + data_free_buffer(buf); + } + + priv->num_buffers = 0; + priv->bufsize = 0; +} + +/** + * data_alloc_buffers() - allocate 1 seconds worth of data buffers + * @priv: the driver's private data structure + * + * Allocate enough buffers for a whole second worth of data + * + * This routine will attempt to degrade nicely by succeeding even if a full + * second worth of data buffers could not be allocated, as long as a minimum + * number were allocated. In this case, it will print a message to the kernel + * log. + * + * The device must not be modifying any lists when this is called. + * + * CONTEXT: user + * LOCKING: must hold dev->mutex + * + * Returns 0 on success, -ERRNO otherwise + */ +static int data_alloc_buffers(struct fpga_device *priv) +{ + struct data_buf *buf; + int i, ret; + + for (i = 0; i < MAX_DATA_BUFS; i++) { + + /* allocate a buffer */ + buf = data_alloc_buffer(priv->bufsize); + if (!buf) + break; + + /* map it for DMA */ + ret = carma_dma_map(priv->dev, buf); + if (ret) { + data_free_buffer(buf); + break; + } + + /* add it to the list of free buffers */ + list_add_tail(&buf->entry, &priv->free); + priv->num_buffers++; + } + + /* Make sure we allocated the minimum required number of buffers */ + if (priv->num_buffers < MIN_DATA_BUFS) { + dev_err(priv->dev, "Unable to allocate enough data buffers\n"); + data_free_buffers(priv); + return -ENOMEM; + } + + /* Warn if we are running in a degraded state, but do not fail */ + if (priv->num_buffers < MAX_DATA_BUFS) { + dev_warn(priv->dev, + "Unable to allocate %d buffers, using %d buffers instead\n", + MAX_DATA_BUFS, i); + } + + return 0; +} + +/* + * DMA Operations Helpers + */ + +/** + * fpga_start_addr() - get the physical address a DATA-FPGA + * @priv: the driver's private data structure + * @fpga: the DATA-FPGA number (zero based) + */ +static dma_addr_t fpga_start_addr(struct fpga_device *priv, unsigned int fpga) +{ + return priv->phys_addr + 0x400000 + (0x80000 * fpga); +} + +/** + * fpga_block_addr() - get the physical address of a correlation data block + * @priv: the driver's private data structure + * @fpga: the DATA-FPGA number (zero based) + * @blknum: the correlation block number (zero based) + */ +static dma_addr_t fpga_block_addr(struct fpga_device *priv, unsigned int fpga, + unsigned int blknum) +{ + return fpga_start_addr(priv, fpga) + (0x10000 * (1 + blknum)); +} + +#define REG_BLOCK_SIZE (32 * 4) + +/** + * data_setup_corl_table() - create the scatterlist for correlation dumps + * @priv: the driver's private data structure + * + * Create the scatterlist for transferring a correlation dump from the + * DATA FPGAs. This structure will be reused for each buffer than needs + * to be filled with correlation data. + * + * Returns 0 on success, -ERRNO otherwise + */ +static int data_setup_corl_table(struct fpga_device *priv) +{ + struct sg_table *table = &priv->corl_table; + struct scatterlist *sg; + struct fpga_info *info; + int i, j, ret; + + /* Calculate the number of entries needed */ + priv->corl_nents = (1 + NUM_FPGA) * REG_BLOCK_SIZE; + for (i = 0; i < NUM_FPGA; i++) + priv->corl_nents += priv->info[i].num_lag_ram; + + /* Allocate the scatterlist table */ + ret = sg_alloc_table(table, priv->corl_nents, GFP_KERNEL); + if (ret) { + dev_err(priv->dev, "unable to allocate DMA table\n"); + return ret; + } + + /* Add the DATA FPGA registers to the scatterlist */ + sg = table->sgl; + for (i = 0; i < NUM_FPGA; i++) { + sg_dma_address(sg) = fpga_start_addr(priv, i); + sg_dma_len(sg) = REG_BLOCK_SIZE; + sg = sg_next(sg); + } + + /* Add the SYS-FPGA registers to the scatterlist */ + sg_dma_address(sg) = SYS_FPGA_BLOCK; + sg_dma_len(sg) = REG_BLOCK_SIZE; + sg = sg_next(sg); + + /* Add the FPGA correlation data blocks to the scatterlist */ + for (i = 0; i < NUM_FPGA; i++) { + info = &priv->info[i]; + for (j = 0; j < info->num_lag_ram; j++) { + sg_dma_address(sg) = fpga_block_addr(priv, i, j); + sg_dma_len(sg) = info->blk_size; + sg = sg_next(sg); + } + } + + /* + * All physical addresses and lengths are present in the structure + * now. It can be reused for every FPGA DATA interrupt + */ + return 0; +} + +/* + * FPGA Register Access Helpers + */ + +static void fpga_write_reg(struct fpga_device *priv, unsigned int fpga, + unsigned int reg, u32 val) +{ + const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE); + iowrite32be(val, priv->regs + fpga_start + reg); +} + +static u32 fpga_read_reg(struct fpga_device *priv, unsigned int fpga, + unsigned int reg) +{ + const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE); + return ioread32be(priv->regs + fpga_start + reg); +} + +/** + * data_calculate_bufsize() - calculate the data buffer size required + * @priv: the driver's private data structure + * + * Calculate the total buffer size needed to hold a single block + * of correlation data + * + * CONTEXT: user + * + * Returns 0 on success, -ERRNO otherwise + */ +static int data_calculate_bufsize(struct fpga_device *priv) +{ + u32 num_corl, num_lags, num_meta, num_qcnt, num_pack; + u32 conf1, conf2, version; + u32 num_lag_ram, blk_size; + int i; + + /* Each buffer starts with the 5 FPGA register areas */ + priv->bufsize = (1 + NUM_FPGA) * REG_BLOCK_SIZE; + + /* Read and store the configuration data for each FPGA */ + for (i = 0; i < NUM_FPGA; i++) { + version = fpga_read_reg(priv, i, MMAP_REG_VERSION); + conf1 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF1); + conf2 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF2); + + /* minor version 2 and later */ + if ((version & 0x000000FF) >= 2) { + num_corl = (conf1 & 0x000000F0) >> 4; + num_pack = (conf1 & 0x00000F00) >> 8; + num_lags = (conf1 & 0x00FFF000) >> 12; + num_meta = (conf1 & 0x7F000000) >> 24; + num_qcnt = (conf2 & 0x00000FFF) >> 0; + } else { + num_corl = (conf1 & 0x000000F0) >> 4; + num_pack = 1; /* implied */ + num_lags = (conf1 & 0x000FFF00) >> 8; + num_meta = (conf1 & 0x7FF00000) >> 20; + num_qcnt = (conf2 & 0x00000FFF) >> 0; + } + + num_lag_ram = (num_corl + num_pack - 1) / num_pack; + blk_size = ((num_pack * num_lags) + num_meta + num_qcnt) * 8; + + priv->info[i].num_lag_ram = num_lag_ram; + priv->info[i].blk_size = blk_size; + priv->bufsize += num_lag_ram * blk_size; + + dev_dbg(priv->dev, "FPGA %d NUM_CORL: %d\n", i, num_corl); + dev_dbg(priv->dev, "FPGA %d NUM_PACK: %d\n", i, num_pack); + dev_dbg(priv->dev, "FPGA %d NUM_LAGS: %d\n", i, num_lags); + dev_dbg(priv->dev, "FPGA %d NUM_META: %d\n", i, num_meta); + dev_dbg(priv->dev, "FPGA %d NUM_QCNT: %d\n", i, num_qcnt); + dev_dbg(priv->dev, "FPGA %d BLK_SIZE: %d\n", i, blk_size); + } + + dev_dbg(priv->dev, "TOTAL BUFFER SIZE: %zu bytes\n", priv->bufsize); + return 0; +} + +/* + * Interrupt Handling + */ + +/** + * data_disable_interrupts() - stop the device from generating interrupts + * @priv: the driver's private data structure + * + * Hide interrupts by switching to GPIO interrupt source + * + * LOCKING: must hold dev->lock + */ +static void data_disable_interrupts(struct fpga_device *priv) +{ + /* hide the interrupt by switching the IRQ driver to GPIO */ + iowrite32be(0x2F, priv->regs + SYS_IRQ_SOURCE_CTL); +} + +/** + * data_enable_interrupts() - allow the device to generate interrupts + * @priv: the driver's private data structure + * + * Unhide interrupts by switching to the FPGA interrupt source. At the + * same time, clear the DATA-FPGA status registers. + * + * LOCKING: must hold dev->lock + */ +static void data_enable_interrupts(struct fpga_device *priv) +{ + /* clear the actual FPGA corl_done interrupt */ + fpga_write_reg(priv, 0, MMAP_REG_STATUS, 0x0); + fpga_write_reg(priv, 1, MMAP_REG_STATUS, 0x0); + fpga_write_reg(priv, 2, MMAP_REG_STATUS, 0x0); + fpga_write_reg(priv, 3, MMAP_REG_STATUS, 0x0); + + /* flush the writes */ + fpga_read_reg(priv, 0, MMAP_REG_STATUS); + fpga_read_reg(priv, 1, MMAP_REG_STATUS); + fpga_read_reg(priv, 2, MMAP_REG_STATUS); + fpga_read_reg(priv, 3, MMAP_REG_STATUS); + + /* switch back to the external interrupt source */ + iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL); +} + +/** + * data_dma_cb() - DMAEngine callback for DMA completion + * @data: the driver's private data structure + * + * Complete a DMA transfer from the DATA-FPGA's + * + * This is called via the DMA callback mechanism, and will handle moving the + * completed DMA transaction to the used list, and then wake any processes + * waiting for new data + * + * CONTEXT: any, softirq expected + */ +static void data_dma_cb(void *data) +{ + struct fpga_device *priv = data; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* If there is no inflight buffer, we've got a bug */ + BUG_ON(priv->inflight == NULL); + + /* Move the inflight buffer onto the used list */ + list_move_tail(&priv->inflight->entry, &priv->used); + priv->inflight = NULL; + + /* + * If data dumping is still enabled, then clear the FPGA + * status registers and re-enable FPGA interrupts + */ + if (priv->enabled) + data_enable_interrupts(priv); + + spin_unlock_irqrestore(&priv->lock, flags); + + /* + * We've changed both the inflight and used lists, so we need + * to wake up any processes that are blocking for those events + */ + wake_up(&priv->wait); +} + +/** + * data_submit_dma() - prepare and submit the required DMA to fill a buffer + * @priv: the driver's private data structure + * @buf: the data buffer + * + * Prepare and submit the necessary DMA transactions to fill a correlation + * data buffer. + * + * LOCKING: must hold dev->lock + * CONTEXT: hardirq only + * + * Returns 0 on success, -ERRNO otherwise + */ +static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf) +{ + struct scatterlist *dst_sg, *src_sg; + unsigned int dst_nents, src_nents; + struct dma_chan *chan = priv->chan; + struct dma_async_tx_descriptor *tx; + dma_cookie_t cookie; + dma_addr_t dst, src; + unsigned long dma_flags = 0; + + dst_sg = buf->sglist; + dst_nents = buf->sglen; + + src_sg = priv->corl_table.sgl; + src_nents = priv->corl_nents; + + /* + * All buffers passed to this function should be ready and mapped + * for DMA already. Therefore, we don't need to do anything except + * submit it to the Freescale DMA Engine for processing + */ + + /* setup the scatterlist to scatterlist transfer */ + tx = chan->device->device_prep_dma_sg(chan, + dst_sg, dst_nents, + src_sg, src_nents, + 0); + if (!tx) { + dev_err(priv->dev, "unable to prep scatterlist DMA\n"); + return -ENOMEM; + } + + /* submit the transaction to the DMA controller */ + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + dev_err(priv->dev, "unable to submit scatterlist DMA\n"); + return -ENOMEM; + } + + /* Prepare the re-read of the SYS-FPGA block */ + dst = sg_dma_address(dst_sg) + (NUM_FPGA * REG_BLOCK_SIZE); + src = SYS_FPGA_BLOCK; + tx = chan->device->device_prep_dma_memcpy(chan, dst, src, + REG_BLOCK_SIZE, + dma_flags); + if (!tx) { + dev_err(priv->dev, "unable to prep SYS-FPGA DMA\n"); + return -ENOMEM; + } + + /* Setup the callback */ + tx->callback = data_dma_cb; + tx->callback_param = priv; + + /* submit the transaction to the DMA controller */ + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + dev_err(priv->dev, "unable to submit SYS-FPGA DMA\n"); + return -ENOMEM; + } + + return 0; +} + +#define CORL_DONE 0x1 +#define CORL_ERR 0x2 + +static irqreturn_t data_irq(int irq, void *dev_id) +{ + struct fpga_device *priv = dev_id; + bool submitted = false; + struct data_buf *buf; + u32 status; + int i; + + /* detect spurious interrupts via FPGA status */ + for (i = 0; i < 4; i++) { + status = fpga_read_reg(priv, i, MMAP_REG_STATUS); + if (!(status & (CORL_DONE | CORL_ERR))) { + dev_err(priv->dev, "spurious irq detected (FPGA)\n"); + return IRQ_NONE; + } + } + + /* detect spurious interrupts via raw IRQ pin readback */ + status = ioread32be(priv->regs + SYS_IRQ_INPUT_DATA); + if (status & IRQ_CORL_DONE) { + dev_err(priv->dev, "spurious irq detected (IRQ)\n"); + return IRQ_NONE; + } + + spin_lock(&priv->lock); + + /* + * This is an error case that should never happen. + * + * If this driver has a bug and manages to re-enable interrupts while + * a DMA is in progress, then we will hit this statement and should + * start paying attention immediately. + */ + BUG_ON(priv->inflight != NULL); + + /* hide the interrupt by switching the IRQ driver to GPIO */ + data_disable_interrupts(priv); + + /* If there are no free buffers, drop this data */ + if (list_empty(&priv->free)) { + priv->num_dropped++; + goto out; + } + + buf = list_first_entry(&priv->free, struct data_buf, entry); + list_del_init(&buf->entry); + BUG_ON(buf->size != priv->bufsize); + + /* Submit a DMA transfer to get the correlation data */ + if (data_submit_dma(priv, buf)) { + dev_err(priv->dev, "Unable to setup DMA transfer\n"); + list_move_tail(&buf->entry, &priv->free); + goto out; + } + + /* Save the buffer for the DMA callback */ + priv->inflight = buf; + submitted = true; + + /* Start the DMA Engine */ + dma_async_issue_pending(priv->chan); + +out: + /* If no DMA was submitted, re-enable interrupts */ + if (!submitted) + data_enable_interrupts(priv); + + spin_unlock(&priv->lock); + return IRQ_HANDLED; +} + +/* + * Realtime Device Enable Helpers + */ + +/** + * data_device_enable() - enable the device for buffered dumping + * @priv: the driver's private data structure + * + * Enable the device for buffered dumping. Allocates buffers and hooks up + * the interrupt handler. When this finishes, data will come pouring in. + * + * LOCKING: must hold dev->mutex + * CONTEXT: user context only + * + * Returns 0 on success, -ERRNO otherwise + */ +static int data_device_enable(struct fpga_device *priv) +{ + bool enabled; + u32 val; + int ret; + + /* multiple enables are safe: they do nothing */ + spin_lock_irq(&priv->lock); + enabled = priv->enabled; + spin_unlock_irq(&priv->lock); + if (enabled) + return 0; + + /* check that the FPGAs are programmed */ + val = ioread32be(priv->regs + SYS_FPGA_CONFIG_STATUS); + if (!(val & (1 << 18))) { + dev_err(priv->dev, "DATA-FPGAs are not enabled\n"); + return -ENODATA; + } + + /* read the FPGAs to calculate the buffer size */ + ret = data_calculate_bufsize(priv); + if (ret) { + dev_err(priv->dev, "unable to calculate buffer size\n"); + goto out_error; + } + + /* allocate the correlation data buffers */ + ret = data_alloc_buffers(priv); + if (ret) { + dev_err(priv->dev, "unable to allocate buffers\n"); + goto out_error; + } + + /* setup the source scatterlist for dumping correlation data */ + ret = data_setup_corl_table(priv); + if (ret) { + dev_err(priv->dev, "unable to setup correlation DMA table\n"); + goto out_error; + } + + /* prevent the FPGAs from generating interrupts */ + data_disable_interrupts(priv); + + /* hookup the irq handler */ + ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv); + if (ret) { + dev_err(priv->dev, "unable to request IRQ handler\n"); + goto out_error; + } + + /* allow the DMA callback to re-enable FPGA interrupts */ + spin_lock_irq(&priv->lock); + priv->enabled = true; + spin_unlock_irq(&priv->lock); + + /* allow the FPGAs to generate interrupts */ + data_enable_interrupts(priv); + return 0; + +out_error: + sg_free_table(&priv->corl_table); + priv->corl_nents = 0; + + data_free_buffers(priv); + return ret; +} + +/** + * data_device_disable() - disable the device for buffered dumping + * @priv: the driver's private data structure + * + * Disable the device for buffered dumping. Stops new DMA transactions from + * being generated, waits for all outstanding DMA to complete, and then frees + * all buffers. + * + * LOCKING: must hold dev->mutex + * CONTEXT: user only + * + * Returns 0 on success, -ERRNO otherwise + */ +static int data_device_disable(struct fpga_device *priv) +{ + spin_lock_irq(&priv->lock); + + /* allow multiple disable */ + if (!priv->enabled) { + spin_unlock_irq(&priv->lock); + return 0; + } + + /* + * Mark the device disabled + * + * This stops DMA callbacks from re-enabling interrupts + */ + priv->enabled = false; + + /* prevent the FPGAs from generating interrupts */ + data_disable_interrupts(priv); + + /* wait until all ongoing DMA has finished */ + while (priv->inflight != NULL) { + spin_unlock_irq(&priv->lock); + wait_event(priv->wait, priv->inflight == NULL); + spin_lock_irq(&priv->lock); + } + + spin_unlock_irq(&priv->lock); + + /* unhook the irq handler */ + free_irq(priv->irq, priv); + + /* free the correlation table */ + sg_free_table(&priv->corl_table); + priv->corl_nents = 0; + + /* free all buffers: the free and used lists are not being changed */ + data_free_buffers(priv); + return 0; +} + +/* + * DEBUGFS Interface + */ +#ifdef CONFIG_DEBUG_FS + +/* + * Count the number of entries in the given list + */ +static unsigned int list_num_entries(struct list_head *list) +{ + struct list_head *entry; + unsigned int ret = 0; + + list_for_each(entry, list) + ret++; + + return ret; +} + +static int data_debug_show(struct seq_file *f, void *offset) +{ + struct fpga_device *priv = f->private; + + spin_lock_irq(&priv->lock); + + seq_printf(f, "enabled: %d\n", priv->enabled); + seq_printf(f, "bufsize: %d\n", priv->bufsize); + seq_printf(f, "num_buffers: %d\n", priv->num_buffers); + seq_printf(f, "num_free: %d\n", list_num_entries(&priv->free)); + seq_printf(f, "inflight: %d\n", priv->inflight != NULL); + seq_printf(f, "num_used: %d\n", list_num_entries(&priv->used)); + seq_printf(f, "num_dropped: %d\n", priv->num_dropped); + + spin_unlock_irq(&priv->lock); + return 0; +} + +static int data_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, data_debug_show, inode->i_private); +} + +static const struct file_operations data_debug_fops = { + .owner = THIS_MODULE, + .open = data_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int data_debugfs_init(struct fpga_device *priv) +{ + priv->dbg_entry = debugfs_create_file(drv_name, S_IRUGO, NULL, priv, + &data_debug_fops); + return PTR_ERR_OR_ZERO(priv->dbg_entry); +} + +static void data_debugfs_exit(struct fpga_device *priv) +{ + debugfs_remove(priv->dbg_entry); +} + +#else + +static inline int data_debugfs_init(struct fpga_device *priv) +{ + return 0; +} + +static inline void data_debugfs_exit(struct fpga_device *priv) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +/* + * SYSFS Attributes + */ + +static ssize_t data_en_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct fpga_device *priv = dev_get_drvdata(dev); + int ret; + + spin_lock_irq(&priv->lock); + ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled); + spin_unlock_irq(&priv->lock); + + return ret; +} + +static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fpga_device *priv = dev_get_drvdata(dev); + unsigned long enable; + int ret; + + ret = kstrtoul(buf, 0, &enable); + if (ret) { + dev_err(priv->dev, "unable to parse enable input\n"); + return ret; + } + + /* protect against concurrent enable/disable */ + ret = mutex_lock_interruptible(&priv->mutex); + if (ret) + return ret; + + if (enable) + ret = data_device_enable(priv); + else + ret = data_device_disable(priv); + + if (ret) { + dev_err(priv->dev, "device %s failed\n", + enable ? "enable" : "disable"); + count = ret; + goto out_unlock; + } + +out_unlock: + mutex_unlock(&priv->mutex); + return count; +} + +static DEVICE_ATTR(enable, S_IWUSR | S_IRUGO, data_en_show, data_en_set); + +static struct attribute *data_sysfs_attrs[] = { + &dev_attr_enable.attr, + NULL, +}; + +static const struct attribute_group rt_sysfs_attr_group = { + .attrs = data_sysfs_attrs, +}; + +/* + * FPGA Realtime Data Character Device + */ + +static int data_open(struct inode *inode, struct file *filp) +{ + /* + * The miscdevice layer puts our struct miscdevice into the + * filp->private_data field. We use this to find our private + * data and then overwrite it with our own private structure. + */ + struct fpga_device *priv = container_of(filp->private_data, + struct fpga_device, miscdev); + struct fpga_reader *reader; + int ret; + + /* allocate private data */ + reader = kzalloc(sizeof(*reader), GFP_KERNEL); + if (!reader) + return -ENOMEM; + + reader->priv = priv; + reader->buf = NULL; + + filp->private_data = reader; + ret = nonseekable_open(inode, filp); + if (ret) { + dev_err(priv->dev, "nonseekable-open failed\n"); + kfree(reader); + return ret; + } + + /* + * success, increase the reference count of the private data structure + * so that it doesn't disappear if the device is unbound + */ + kref_get(&priv->ref); + return 0; +} + +static int data_release(struct inode *inode, struct file *filp) +{ + struct fpga_reader *reader = filp->private_data; + struct fpga_device *priv = reader->priv; + + /* free the per-reader structure */ + data_free_buffer(reader->buf); + kfree(reader); + filp->private_data = NULL; + + /* decrement our reference count to the private data */ + kref_put(&priv->ref, fpga_device_release); + return 0; +} + +static ssize_t data_read(struct file *filp, char __user *ubuf, size_t count, + loff_t *f_pos) +{ + struct fpga_reader *reader = filp->private_data; + struct fpga_device *priv = reader->priv; + struct list_head *used = &priv->used; + bool drop_buffer = false; + struct data_buf *dbuf; + size_t avail; + void *data; + int ret; + + /* check if we already have a partial buffer */ + if (reader->buf) { + dbuf = reader->buf; + goto have_buffer; + } + + spin_lock_irq(&priv->lock); + + /* Block until there is at least one buffer on the used list */ + while (list_empty(used)) { + spin_unlock_irq(&priv->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(priv->wait, !list_empty(used)); + if (ret) + return ret; + + spin_lock_irq(&priv->lock); + } + + /* Grab the first buffer off of the used list */ + dbuf = list_first_entry(used, struct data_buf, entry); + list_del_init(&dbuf->entry); + + spin_unlock_irq(&priv->lock); + + /* Buffers are always mapped: unmap it */ + carma_dma_unmap(priv->dev, dbuf); + + /* save the buffer for later */ + reader->buf = dbuf; + reader->buf_start = 0; + +have_buffer: + /* Get the number of bytes available */ + avail = dbuf->size - reader->buf_start; + data = dbuf->vaddr + reader->buf_start; + + /* Get the number of bytes we can transfer */ + count = min(count, avail); + + /* Copy the data to the userspace buffer */ + if (copy_to_user(ubuf, data, count)) + return -EFAULT; + + /* Update the amount of available space */ + avail -= count; + + /* + * If there is still some data available, save the buffer for the + * next userspace call to read() and return + */ + if (avail > 0) { + reader->buf_start += count; + reader->buf = dbuf; + return count; + } + + /* + * Get the buffer ready to be reused for DMA + * + * If it fails, we pretend that the read never happed and return + * -EFAULT to userspace. The read will be retried. + */ + ret = carma_dma_map(priv->dev, dbuf); + if (ret) { + dev_err(priv->dev, "unable to remap buffer for DMA\n"); + return -EFAULT; + } + + /* Lock against concurrent enable/disable */ + spin_lock_irq(&priv->lock); + + /* the reader is finished with this buffer */ + reader->buf = NULL; + + /* + * One of two things has happened, the device is disabled, or the + * device has been reconfigured underneath us. In either case, we + * should just throw away the buffer. + * + * Lockdep complains if this is done under the spinlock, so we + * handle it during the unlock path. + */ + if (!priv->enabled || dbuf->size != priv->bufsize) { + drop_buffer = true; + goto out_unlock; + } + + /* The buffer is safe to reuse, so add it back to the free list */ + list_add_tail(&dbuf->entry, &priv->free); + +out_unlock: + spin_unlock_irq(&priv->lock); + + if (drop_buffer) { + carma_dma_unmap(priv->dev, dbuf); + data_free_buffer(dbuf); + } + + return count; +} + +static unsigned int data_poll(struct file *filp, struct poll_table_struct *tbl) +{ + struct fpga_reader *reader = filp->private_data; + struct fpga_device *priv = reader->priv; + unsigned int mask = 0; + + poll_wait(filp, &priv->wait, tbl); + + if (!list_empty(&priv->used)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static int data_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct fpga_reader *reader = filp->private_data; + struct fpga_device *priv = reader->priv; + unsigned long offset, vsize, psize, addr; + + /* VMA properties */ + offset = vma->vm_pgoff << PAGE_SHIFT; + vsize = vma->vm_end - vma->vm_start; + psize = priv->phys_size - offset; + addr = (priv->phys_addr + offset) >> PAGE_SHIFT; + + /* Check against the FPGA region's physical memory size */ + if (vsize > psize) { + dev_err(priv->dev, "requested mmap mapping too large\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + return io_remap_pfn_range(vma, vma->vm_start, addr, vsize, + vma->vm_page_prot); +} + +static const struct file_operations data_fops = { + .owner = THIS_MODULE, + .open = data_open, + .release = data_release, + .read = data_read, + .poll = data_poll, + .mmap = data_mmap, + .llseek = no_llseek, +}; + +/* + * OpenFirmware Device Subsystem + */ + +static bool dma_filter(struct dma_chan *chan, void *data) +{ + /* + * DMA Channel #0 is used for the FPGA Programmer, so ignore it + * + * This probably won't survive an unload/load cycle of the Freescale + * DMAEngine driver, but that won't be a problem + */ + if (chan->chan_id == 0 && chan->device->dev_id == 0) + return false; + + return true; +} + +static int data_of_probe(struct platform_device *op) +{ + struct device_node *of_node = op->dev.of_node; + struct device *this_device; + struct fpga_device *priv; + struct resource res; + dma_cap_mask_t mask; + int ret; + + /* Allocate private data */ + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(&op->dev, "Unable to allocate device private data\n"); + ret = -ENOMEM; + goto out_return; + } + + platform_set_drvdata(op, priv); + priv->dev = &op->dev; + kref_init(&priv->ref); + mutex_init(&priv->mutex); + + dev_set_drvdata(priv->dev, priv); + spin_lock_init(&priv->lock); + INIT_LIST_HEAD(&priv->free); + INIT_LIST_HEAD(&priv->used); + init_waitqueue_head(&priv->wait); + + /* Setup the misc device */ + priv->miscdev.minor = MISC_DYNAMIC_MINOR; + priv->miscdev.name = drv_name; + priv->miscdev.fops = &data_fops; + + /* Get the physical address of the FPGA registers */ + ret = of_address_to_resource(of_node, 0, &res); + if (ret) { + dev_err(&op->dev, "Unable to find FPGA physical address\n"); + ret = -ENODEV; + goto out_free_priv; + } + + priv->phys_addr = res.start; + priv->phys_size = resource_size(&res); + + /* ioremap the registers for use */ + priv->regs = of_iomap(of_node, 0); + if (!priv->regs) { + dev_err(&op->dev, "Unable to ioremap registers\n"); + ret = -ENOMEM; + goto out_free_priv; + } + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + dma_cap_set(DMA_INTERRUPT, mask); + dma_cap_set(DMA_SLAVE, mask); + dma_cap_set(DMA_SG, mask); + + /* Request a DMA channel */ + priv->chan = dma_request_channel(mask, dma_filter, NULL); + if (!priv->chan) { + dev_err(&op->dev, "Unable to request DMA channel\n"); + ret = -ENODEV; + goto out_unmap_regs; + } + + /* Find the correct IRQ number */ + priv->irq = irq_of_parse_and_map(of_node, 0); + if (priv->irq == NO_IRQ) { + dev_err(&op->dev, "Unable to find IRQ line\n"); + ret = -ENODEV; + goto out_release_dma; + } + + /* Drive the GPIO for FPGA IRQ high (no interrupt) */ + iowrite32be(IRQ_CORL_DONE, priv->regs + SYS_IRQ_OUTPUT_DATA); + + /* Register the miscdevice */ + ret = misc_register(&priv->miscdev); + if (ret) { + dev_err(&op->dev, "Unable to register miscdevice\n"); + goto out_irq_dispose_mapping; + } + + /* Create the debugfs files */ + ret = data_debugfs_init(priv); + if (ret) { + dev_err(&op->dev, "Unable to create debugfs files\n"); + goto out_misc_deregister; + } + + /* Create the sysfs files */ + this_device = priv->miscdev.this_device; + dev_set_drvdata(this_device, priv); + ret = sysfs_create_group(&this_device->kobj, &rt_sysfs_attr_group); + if (ret) { + dev_err(&op->dev, "Unable to create sysfs files\n"); + goto out_data_debugfs_exit; + } + + dev_info(&op->dev, "CARMA FPGA Realtime Data Driver Loaded\n"); + return 0; + +out_data_debugfs_exit: + data_debugfs_exit(priv); +out_misc_deregister: + misc_deregister(&priv->miscdev); +out_irq_dispose_mapping: + irq_dispose_mapping(priv->irq); +out_release_dma: + dma_release_channel(priv->chan); +out_unmap_regs: + iounmap(priv->regs); +out_free_priv: + kref_put(&priv->ref, fpga_device_release); +out_return: + return ret; +} + +static int data_of_remove(struct platform_device *op) +{ + struct fpga_device *priv = platform_get_drvdata(op); + struct device *this_device = priv->miscdev.this_device; + + /* remove all sysfs files, now the device cannot be re-enabled */ + sysfs_remove_group(&this_device->kobj, &rt_sysfs_attr_group); + + /* remove all debugfs files */ + data_debugfs_exit(priv); + + /* disable the device from generating data */ + data_device_disable(priv); + + /* remove the character device to stop new readers from appearing */ + misc_deregister(&priv->miscdev); + + /* cleanup everything not needed by readers */ + irq_dispose_mapping(priv->irq); + dma_release_channel(priv->chan); + iounmap(priv->regs); + + /* release our reference */ + kref_put(&priv->ref, fpga_device_release); + return 0; +} + +static const struct of_device_id data_of_match[] = { + { .compatible = "carma,carma-fpga", }, + {}, +}; + +static struct platform_driver data_of_driver = { + .probe = data_of_probe, + .remove = data_of_remove, + .driver = { + .name = drv_name, + .of_match_table = data_of_match, + }, +}; + +module_platform_driver(data_of_driver); + +MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>"); +MODULE_DESCRIPTION("CARMA DATA-FPGA Access Driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/cb710/Kconfig b/kernel/drivers/misc/cb710/Kconfig new file mode 100644 index 000000000..22429b8b1 --- /dev/null +++ b/kernel/drivers/misc/cb710/Kconfig @@ -0,0 +1,25 @@ +config CB710_CORE + tristate "ENE CB710/720 Flash memory card reader support" + depends on PCI + help + This option enables support for PCI ENE CB710/720 Flash memory card + reader found in some laptops (ie. some versions of HP Compaq nx9500). + + You will also have to select some flash card format drivers (MMC/SD, + MemoryStick). + + This driver can also be built as a module. If so, the module + will be called cb710. + +config CB710_DEBUG + bool "Enable driver debugging" + depends on CB710_CORE != n + default n + help + This is an option for use by developers; most people should + say N here. This adds a lot of debugging output to dmesg. + +config CB710_DEBUG_ASSUMPTIONS + bool + depends on CB710_CORE != n + default y diff --git a/kernel/drivers/misc/cb710/Makefile b/kernel/drivers/misc/cb710/Makefile new file mode 100644 index 000000000..467c8e9ca --- /dev/null +++ b/kernel/drivers/misc/cb710/Makefile @@ -0,0 +1,6 @@ +ccflags-$(CONFIG_CB710_DEBUG) := -DDEBUG + +obj-$(CONFIG_CB710_CORE) += cb710.o + +cb710-y := core.o sgbuf2.o +cb710-$(CONFIG_CB710_DEBUG) += debug.o diff --git a/kernel/drivers/misc/cb710/core.c b/kernel/drivers/misc/cb710/core.c new file mode 100644 index 000000000..fb397e7d1 --- /dev/null +++ b/kernel/drivers/misc/cb710/core.c @@ -0,0 +1,359 @@ +/* + * cb710/core.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/cb710.h> +#include <linux/gfp.h> + +static DEFINE_IDA(cb710_ida); +static DEFINE_SPINLOCK(cb710_ida_lock); + +void cb710_pci_update_config_reg(struct pci_dev *pdev, + int reg, uint32_t mask, uint32_t xor) +{ + u32 rval; + + pci_read_config_dword(pdev, reg, &rval); + rval = (rval & mask) ^ xor; + pci_write_config_dword(pdev, reg, rval); +} +EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg); + +/* Some magic writes based on Windows driver init code */ +static int cb710_pci_configure(struct pci_dev *pdev) +{ + unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + struct pci_dev *pdev0; + u32 val; + + cb710_pci_update_config_reg(pdev, 0x48, + ~0x000000FF, 0x0000003F); + + pci_read_config_dword(pdev, 0x48, &val); + if (val & 0x80000000) + return 0; + + pdev0 = pci_get_slot(pdev->bus, devfn); + if (!pdev0) + return -ENODEV; + + if (pdev0->vendor == PCI_VENDOR_ID_ENE + && pdev0->device == PCI_DEVICE_ID_ENE_720) { + cb710_pci_update_config_reg(pdev0, 0x8C, + ~0x00F00000, 0x00100000); + cb710_pci_update_config_reg(pdev0, 0xB0, + ~0x08000000, 0x08000000); + } + + cb710_pci_update_config_reg(pdev0, 0x8C, + ~0x00000F00, 0x00000200); + cb710_pci_update_config_reg(pdev0, 0x90, + ~0x00060000, 0x00040000); + + pci_dev_put(pdev0); + + return 0; +} + +static irqreturn_t cb710_irq_handler(int irq, void *data) +{ + struct cb710_chip *chip = data; + struct cb710_slot *slot = &chip->slot[0]; + irqreturn_t handled = IRQ_NONE; + unsigned nr; + + spin_lock(&chip->irq_lock); /* incl. smp_rmb() */ + + for (nr = chip->slots; nr; ++slot, --nr) { + cb710_irq_handler_t handler_func = slot->irq_handler; + if (handler_func && handler_func(slot)) + handled = IRQ_HANDLED; + } + + spin_unlock(&chip->irq_lock); + + return handled; +} + +static void cb710_release_slot(struct device *dev) +{ +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev)); + struct cb710_chip *chip = cb710_slot_to_chip(slot); + + /* slot struct can be freed now */ + atomic_dec(&chip->slot_refs_count); +#endif +} + +static int cb710_register_slot(struct cb710_chip *chip, + unsigned slot_mask, unsigned io_offset, const char *name) +{ + int nr = chip->slots; + struct cb710_slot *slot = &chip->slot[nr]; + int err; + + dev_dbg(cb710_chip_dev(chip), + "register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n", + name, chip->platform_id, nr, slot_mask, io_offset); + + /* slot->irq_handler == NULL here; this needs to be + * seen before platform_device_register() */ + ++chip->slots; + smp_wmb(); + + slot->iobase = chip->iobase + io_offset; + slot->pdev.name = name; + slot->pdev.id = chip->platform_id; + slot->pdev.dev.parent = &chip->pdev->dev; + slot->pdev.dev.release = cb710_release_slot; + + err = platform_device_register(&slot->pdev); + +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + atomic_inc(&chip->slot_refs_count); +#endif + + if (err) { + /* device_initialize() called from platform_device_register() + * wants this on error path */ + platform_device_put(&slot->pdev); + + /* slot->irq_handler == NULL here anyway, so no lock needed */ + --chip->slots; + return err; + } + + chip->slot_mask |= slot_mask; + + return 0; +} + +static void cb710_unregister_slot(struct cb710_chip *chip, + unsigned slot_mask) +{ + int nr = chip->slots - 1; + + if (!(chip->slot_mask & slot_mask)) + return; + + platform_device_unregister(&chip->slot[nr].pdev); + + /* complementary to spin_unlock() in cb710_set_irq_handler() */ + smp_rmb(); + BUG_ON(chip->slot[nr].irq_handler != NULL); + + /* slot->irq_handler == NULL here, so no lock needed */ + --chip->slots; + chip->slot_mask &= ~slot_mask; +} + +void cb710_set_irq_handler(struct cb710_slot *slot, + cb710_irq_handler_t handler) +{ + struct cb710_chip *chip = cb710_slot_to_chip(slot); + unsigned long flags; + + spin_lock_irqsave(&chip->irq_lock, flags); + slot->irq_handler = handler; + spin_unlock_irqrestore(&chip->irq_lock, flags); +} +EXPORT_SYMBOL_GPL(cb710_set_irq_handler); + +#ifdef CONFIG_PM + +static int cb710_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + + devm_free_irq(&pdev->dev, pdev->irq, chip); + pci_save_state(pdev); + pci_disable_device(pdev); + if (state.event & PM_EVENT_SLEEP) + pci_set_power_state(pdev, PCI_D3hot); + return 0; +} + +static int cb710_resume(struct pci_dev *pdev) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + int err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + err = pcim_enable_device(pdev); + if (err) + return err; + + return devm_request_irq(&pdev->dev, pdev->irq, + cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip); +} + +#endif /* CONFIG_PM */ + +static int cb710_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct cb710_chip *chip; + unsigned long flags; + u32 val; + int err; + int n = 0; + + err = cb710_pci_configure(pdev); + if (err) + return err; + + /* this is actually magic... */ + pci_read_config_dword(pdev, 0x48, &val); + if (!(val & 0x80000000)) { + pci_write_config_dword(pdev, 0x48, val|0x71000000); + pci_read_config_dword(pdev, 0x48, &val); + } + + dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val); + if (!(val & 0x70000000)) + return -ENODEV; + val = (val >> 28) & 7; + if (val & CB710_SLOT_MMC) + ++n; + if (val & CB710_SLOT_MS) + ++n; + if (val & CB710_SLOT_SM) + ++n; + + chip = devm_kzalloc(&pdev->dev, + sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME); + if (err) + return err; + + spin_lock_init(&chip->irq_lock); + chip->pdev = pdev; + chip->iobase = pcim_iomap_table(pdev)[0]; + + pci_set_drvdata(pdev, chip); + + err = devm_request_irq(&pdev->dev, pdev->irq, + cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip); + if (err) + return err; + + do { + if (!ida_pre_get(&cb710_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock_irqsave(&cb710_ida_lock, flags); + err = ida_get_new(&cb710_ida, &chip->platform_id); + spin_unlock_irqrestore(&cb710_ida_lock, flags); + + if (err && err != -EAGAIN) + return err; + } while (err); + + + dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n", + chip->platform_id, chip->iobase, pdev->irq); + + if (val & CB710_SLOT_MMC) { /* MMC/SD slot */ + err = cb710_register_slot(chip, + CB710_SLOT_MMC, 0x00, "cb710-mmc"); + if (err) + return err; + } + + if (val & CB710_SLOT_MS) { /* MemoryStick slot */ + err = cb710_register_slot(chip, + CB710_SLOT_MS, 0x40, "cb710-ms"); + if (err) + goto unreg_mmc; + } + + if (val & CB710_SLOT_SM) { /* SmartMedia slot */ + err = cb710_register_slot(chip, + CB710_SLOT_SM, 0x60, "cb710-sm"); + if (err) + goto unreg_ms; + } + + return 0; +unreg_ms: + cb710_unregister_slot(chip, CB710_SLOT_MS); +unreg_mmc: + cb710_unregister_slot(chip, CB710_SLOT_MMC); + +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + BUG_ON(atomic_read(&chip->slot_refs_count) != 0); +#endif + return err; +} + +static void cb710_remove_one(struct pci_dev *pdev) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + unsigned long flags; + + cb710_unregister_slot(chip, CB710_SLOT_SM); + cb710_unregister_slot(chip, CB710_SLOT_MS); + cb710_unregister_slot(chip, CB710_SLOT_MMC); +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + BUG_ON(atomic_read(&chip->slot_refs_count) != 0); +#endif + + spin_lock_irqsave(&cb710_ida_lock, flags); + ida_remove(&cb710_ida, chip->platform_id); + spin_unlock_irqrestore(&cb710_ida_lock, flags); +} + +static const struct pci_device_id cb710_pci_tbl[] = { + { PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH, + PCI_ANY_ID, PCI_ANY_ID, }, + { 0, } +}; + +static struct pci_driver cb710_driver = { + .name = KBUILD_MODNAME, + .id_table = cb710_pci_tbl, + .probe = cb710_probe, + .remove = cb710_remove_one, +#ifdef CONFIG_PM + .suspend = cb710_suspend, + .resume = cb710_resume, +#endif +}; + +static int __init cb710_init_module(void) +{ + return pci_register_driver(&cb710_driver); +} + +static void __exit cb710_cleanup_module(void) +{ + pci_unregister_driver(&cb710_driver); + ida_destroy(&cb710_ida); +} + +module_init(cb710_init_module); +module_exit(cb710_cleanup_module); + +MODULE_AUTHOR("MichaÅ‚ MirosÅ‚aw <mirq-linux@rere.qmqm.pl>"); +MODULE_DESCRIPTION("ENE CB710 memory card reader driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cb710_pci_tbl); diff --git a/kernel/drivers/misc/cb710/debug.c b/kernel/drivers/misc/cb710/debug.c new file mode 100644 index 000000000..fcb3b8e30 --- /dev/null +++ b/kernel/drivers/misc/cb710/debug.c @@ -0,0 +1,118 @@ +/* + * cb710/debug.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/cb710.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#define CB710_REG_COUNT 0x80 + +static const u16 allow[CB710_REG_COUNT/16] = { + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, +}; +static const char *const prefix[ARRAY_SIZE(allow)] = { + "MMC", "MMC", "MMC", "MMC", + "MS?", "MS?", "SM?", "SM?" +}; + +static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits) +{ + unsigned mask = (1 << bits/8) - 1; + offset *= bits/8; + return ((allow[block] >> offset) & mask) == mask; +} + +#define CB710_READ_REGS_TEMPLATE(t) \ +static void cb710_read_regs_##t(void __iomem *iobase, \ + u##t *reg, unsigned select) \ +{ \ + unsigned i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \ + if (!(select & (1 << i))) \ + continue; \ + \ + for (j = 0; j < 0x10/(t/8); ++j) { \ + if (!allow_reg_read(i, j, t)) \ + continue; \ + reg[j] = ioread##t(iobase \ + + (i << 4) + (j * (t/8))); \ + } \ + } \ +} + +static const char cb710_regf_8[] = "%02X"; +static const char cb710_regf_16[] = "%04X"; +static const char cb710_regf_32[] = "%08X"; +static const char cb710_xes[] = "xxxxxxxx"; + +#define CB710_DUMP_REGS_TEMPLATE(t) \ +static void cb710_dump_regs_##t(struct device *dev, \ + const u##t *reg, unsigned select) \ +{ \ + const char *const xp = &cb710_xes[8 - t/4]; \ + const char *const format = cb710_regf_##t; \ + \ + char msg[100], *p; \ + unsigned i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \ + if (!(select & (1 << i))) \ + continue; \ + p = msg; \ + for (j = 0; j < 0x10/(t/8); ++j) { \ + *p++ = ' '; \ + if (j == 8/(t/8)) \ + *p++ = ' '; \ + if (allow_reg_read(i, j, t)) \ + p += sprintf(p, format, reg[j]); \ + else \ + p += sprintf(p, "%s", xp); \ + } \ + dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg); \ + } \ +} + +#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t) \ +static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip, \ + unsigned select) \ +{ \ + u##t regs[CB710_REG_COUNT/sizeof(u##t)]; \ + \ + memset(®s, 0, sizeof(regs)); \ + cb710_read_regs_##t(chip->iobase, regs, select); \ + cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select); \ +} + +#define CB710_REG_ACCESS_TEMPLATES(t) \ + CB710_READ_REGS_TEMPLATE(t) \ + CB710_DUMP_REGS_TEMPLATE(t) \ + CB710_READ_AND_DUMP_REGS_TEMPLATE(t) + +CB710_REG_ACCESS_TEMPLATES(8) +CB710_REG_ACCESS_TEMPLATES(16) +CB710_REG_ACCESS_TEMPLATES(32) + +void cb710_dump_regs(struct cb710_chip *chip, unsigned select) +{ + if (!(select & CB710_DUMP_REGS_MASK)) + select = CB710_DUMP_REGS_ALL; + if (!(select & CB710_DUMP_ACCESS_MASK)) + select |= CB710_DUMP_ACCESS_8; + + if (select & CB710_DUMP_ACCESS_32) + cb710_read_and_dump_regs_32(chip, select); + if (select & CB710_DUMP_ACCESS_16) + cb710_read_and_dump_regs_16(chip, select); + if (select & CB710_DUMP_ACCESS_8) + cb710_read_and_dump_regs_8(chip, select); +} +EXPORT_SYMBOL_GPL(cb710_dump_regs); + diff --git a/kernel/drivers/misc/cb710/sgbuf2.c b/kernel/drivers/misc/cb710/sgbuf2.c new file mode 100644 index 000000000..2a40d0efd --- /dev/null +++ b/kernel/drivers/misc/cb710/sgbuf2.c @@ -0,0 +1,146 @@ +/* + * cb710/sgbuf2.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cb710.h> + +static bool sg_dwiter_next(struct sg_mapping_iter *miter) +{ + if (sg_miter_next(miter)) { + miter->consumed = 0; + return true; + } else + return false; +} + +static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter) +{ + return miter->length == miter->consumed && !sg_dwiter_next(miter); +} + +static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter) +{ + size_t len, left = 4; + uint32_t data; + void *addr = &data; + + do { + len = min(miter->length - miter->consumed, left); + memcpy(addr, miter->addr + miter->consumed, len); + miter->consumed += len; + left -= len; + if (!left) + return data; + addr += len; + } while (sg_dwiter_next(miter)); + + memset(addr, 0, left); + return data; +} + +static inline bool needs_unaligned_copy(const void *ptr) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + return false; +#else + return ((ptr - NULL) & 3) != 0; +#endif +} + +static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr) +{ + size_t len; + + if (sg_dwiter_is_at_end(miter)) + return true; + + len = miter->length - miter->consumed; + + if (likely(len >= 4 && !needs_unaligned_copy( + miter->addr + miter->consumed))) { + *ptr = miter->addr + miter->consumed; + miter->consumed += 4; + return true; + } + + return false; +} + +/** + * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer + * @miter: sg mapping iterator used for reading + * + * Description: + * Returns 32-bit word starting at byte pointed to by @miter@ + * handling any alignment issues. Bytes past the buffer's end + * are not accessed (read) but are returned as zeroes. @miter@ + * is advanced by 4 bytes or to the end of buffer whichever is + * closer. + * + * Context: + * Same requirements as in sg_miter_next(). + * + * Returns: + * 32-bit word just read. + */ +uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter) +{ + uint32_t *ptr = NULL; + + if (likely(sg_dwiter_get_next_block(miter, &ptr))) + return ptr ? *ptr : 0; + + return sg_dwiter_read_buffer(miter); +} +EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block); + +static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data) +{ + size_t len, left = 4; + void *addr = &data; + + do { + len = min(miter->length - miter->consumed, left); + memcpy(miter->addr, addr, len); + miter->consumed += len; + left -= len; + if (!left) + return; + addr += len; + } while (sg_dwiter_next(miter)); +} + +/** + * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer + * @miter: sg mapping iterator used for writing + * + * Description: + * Writes 32-bit word starting at byte pointed to by @miter@ + * handling any alignment issues. Bytes which would be written + * past the buffer's end are silently discarded. @miter@ is + * advanced by 4 bytes or to the end of buffer whichever is closer. + * + * Context: + * Same requirements as in sg_miter_next(). + */ +void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data) +{ + uint32_t *ptr = NULL; + + if (likely(sg_dwiter_get_next_block(miter, &ptr))) { + if (ptr) + *ptr = data; + else + return; + } else + sg_dwiter_write_slow(miter, data); +} +EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); + diff --git a/kernel/drivers/misc/cs5535-mfgpt.c b/kernel/drivers/misc/cs5535-mfgpt.c new file mode 100644 index 000000000..347f08f2f --- /dev/null +++ b/kernel/drivers/misc/cs5535-mfgpt.c @@ -0,0 +1,383 @@ +/* + * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT) + * + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007 Andres Salomon <dilinger@debian.org> + * Copyright (C) 2009 Andres Salomon <dilinger@collabora.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/cs5535.h> +#include <linux/slab.h> + +#define DRV_NAME "cs5535-mfgpt" + +static int mfgpt_reset_timers; +module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644); +MODULE_PARM_DESC(mfgptfix, "Try to reset the MFGPT timers during init; " + "required by some broken BIOSes (ie, TinyBIOS < 0.99) or kexec " + "(1 = reset the MFGPT using an undocumented bit, " + "2 = perform a soft reset by unconfiguring all timers); " + "use what works best for you."); + +struct cs5535_mfgpt_timer { + struct cs5535_mfgpt_chip *chip; + int nr; +}; + +static struct cs5535_mfgpt_chip { + DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS); + resource_size_t base; + + struct platform_device *pdev; + spinlock_t lock; + int initialized; +} cs5535_mfgpt_chip; + +int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp, + int event, int enable) +{ + uint32_t msr, mask, value, dummy; + int shift = (cmp == MFGPT_CMP1) ? 0 : 8; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * The register maps for these are described in sections 6.17.1.x of + * the AMD Geode CS5536 Companion Device Data Book. + */ + switch (event) { + case MFGPT_EVENT_RESET: + /* + * XXX: According to the docs, we cannot reset timers above + * 6; that is, resets for 7 and 8 will be ignored. Is this + * a problem? -dilinger + */ + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + 24); + break; + + case MFGPT_EVENT_NMI: + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + shift); + break; + + case MFGPT_EVENT_IRQ: + msr = MSR_MFGPT_IRQ; + mask = 1 << (timer->nr + shift); + break; + + default: + return -EIO; + } + + rdmsr(msr, value, dummy); + + if (enable) + value |= mask; + else + value &= ~mask; + + wrmsr(msr, value, dummy); + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event); + +int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq, + int enable) +{ + uint32_t zsel, lpc, dummy; + int shift; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) + return -EIO; + + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ; + + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + + /* All chosen and checked - go for it */ + if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; + if (enable) { + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + } + + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq); + +struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain) +{ + struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip; + struct cs5535_mfgpt_timer *timer = NULL; + unsigned long flags; + int max; + + if (!mfgpt->initialized) + goto done; + + /* only allocate timers from the working domain if requested */ + if (domain == MFGPT_DOMAIN_WORKING) + max = 6; + else + max = MFGPT_MAX_TIMERS; + + if (timer_nr >= max) { + /* programmer error. silly programmers! */ + WARN_ON(1); + goto done; + } + + spin_lock_irqsave(&mfgpt->lock, flags); + if (timer_nr < 0) { + unsigned long t; + + /* try to find any available timer */ + t = find_first_bit(mfgpt->avail, max); + /* set timer_nr to -1 if no timers available */ + timer_nr = t < max ? (int) t : -1; + } else { + /* check if the requested timer's available */ + if (!test_bit(timer_nr, mfgpt->avail)) + timer_nr = -1; + } + + if (timer_nr >= 0) + /* if timer_nr is not -1, it's an available timer */ + __clear_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + + if (timer_nr < 0) + goto done; + + timer = kmalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) { + /* aw hell */ + spin_lock_irqsave(&mfgpt->lock, flags); + __set_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + goto done; + } + timer->chip = mfgpt; + timer->nr = timer_nr; + dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr); + +done: + return timer; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer); + +/* + * XXX: This frees the timer memory, but never resets the actual hardware + * timer. The old geode_mfgpt code did this; it would be good to figure + * out a way to actually release the hardware timer. See comments below. + */ +void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer) +{ + unsigned long flags; + uint16_t val; + + /* timer can be made available again only if never set up */ + val = cs5535_mfgpt_read(timer, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP)) { + spin_lock_irqsave(&timer->chip->lock, flags); + __set_bit(timer->nr, timer->chip->avail); + spin_unlock_irqrestore(&timer->chip->lock, flags); + } + + kfree(timer); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer); + +uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg) +{ + return inw(timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_read); + +void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg, + uint16_t value) +{ + outw(value, timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_write); + +/* + * This is a sledgehammer that resets all MFGPT timers. This is required by + * some broken BIOSes which leave the system in an unstable state + * (TinyBIOS 0.98, for example; fixed in 0.99). It's uncertain as to + * whether or not this secret MSR can be used to release individual timers. + * Jordan tells me that he and Mitch once played w/ it, but it's unclear + * what the results of that were (and they experienced some instability). + */ +static void reset_all_timers(void) +{ + uint32_t val, dummy; + + /* The following undocumented bit resets the MFGPT timers */ + val = 0xFF; dummy = 0; + wrmsr(MSR_MFGPT_SETUP, val, dummy); +} + +/* + * This is another sledgehammer to reset all MFGPT timers. + * Instead of using the undocumented bit method it clears + * IRQ, NMI and RESET settings. + */ +static void soft_reset(void) +{ + int i; + struct cs5535_mfgpt_timer t; + + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + t.nr = i; + + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_RESET, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_RESET, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_NMI, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_NMI, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_IRQ, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_IRQ, 0); + } +} + +/* + * Check whether any MFGPTs are available for the kernel to use. In most + * cases, firmware that uses AMD's VSA code will claim all timers during + * bootup; we certainly don't want to take them if they're already in use. + * In other cases (such as with VSAless OpenFirmware), the system firmware + * leaves timers available for us to use. + */ +static int scan_timers(struct cs5535_mfgpt_chip *mfgpt) +{ + struct cs5535_mfgpt_timer timer = { .chip = mfgpt }; + unsigned long flags; + int timers = 0; + uint16_t val; + int i; + + /* bios workaround */ + if (mfgpt_reset_timers == 1) + reset_all_timers(); + else if (mfgpt_reset_timers == 2) + soft_reset(); + + /* just to be safe, protect this section w/ lock */ + spin_lock_irqsave(&mfgpt->lock, flags); + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + timer.nr = i; + val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP) || mfgpt_reset_timers == 2) { + __set_bit(i, mfgpt->avail); + timers++; + } + } + spin_unlock_irqrestore(&mfgpt->lock, flags); + + return timers; +} + +static int cs5535_mfgpt_probe(struct platform_device *pdev) +{ + struct resource *res; + int err = -EIO, t; + + if (mfgpt_reset_timers < 0 || mfgpt_reset_timers > 2) { + dev_err(&pdev->dev, "Bad mfgpt_reset_timers value: %i\n", + mfgpt_reset_timers); + goto done; + } + + /* There are two ways to get the MFGPT base address; one is by + * fetching it from MSR_LBAR_MFGPT, the other is by reading the + * PCI BAR info. The latter method is easier (especially across + * different architectures), so we'll stick with that for now. If + * it turns out to be unreliable in the face of crappy BIOSes, we + * can always go back to using MSRs.. */ + + res = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (!res) { + dev_err(&pdev->dev, "can't fetch device resource info\n"); + goto done; + } + + if (!request_region(res->start, resource_size(res), pdev->name)) { + dev_err(&pdev->dev, "can't request region\n"); + goto done; + } + + /* set up the driver-specific struct */ + cs5535_mfgpt_chip.base = res->start; + cs5535_mfgpt_chip.pdev = pdev; + spin_lock_init(&cs5535_mfgpt_chip.lock); + + dev_info(&pdev->dev, "reserved resource region %pR\n", res); + + /* detect the available timers */ + t = scan_timers(&cs5535_mfgpt_chip); + dev_info(&pdev->dev, "%d MFGPT timers available\n", t); + cs5535_mfgpt_chip.initialized = 1; + return 0; + +done: + return err; +} + +static struct platform_driver cs5535_mfgpt_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = cs5535_mfgpt_probe, +}; + + +static int __init cs5535_mfgpt_init(void) +{ + return platform_driver_register(&cs5535_mfgpt_driver); +} + +module_init(cs5535_mfgpt_init); + +MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>"); +MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/kernel/drivers/misc/cxl/Kconfig b/kernel/drivers/misc/cxl/Kconfig new file mode 100644 index 000000000..a990b39b4 --- /dev/null +++ b/kernel/drivers/misc/cxl/Kconfig @@ -0,0 +1,25 @@ +# +# IBM Coherent Accelerator (CXL) compatible devices +# + +config CXL_BASE + bool + default n + select PPC_COPRO_BASE + +config CXL + tristate "Support for IBM Coherent Accelerators (CXL)" + depends on PPC_POWERNV && PCI_MSI + select CXL_BASE + default m + help + Select this option to enable driver support for IBM Coherent + Accelerators (CXL). CXL is otherwise known as Coherent Accelerator + Processor Interface (CAPI). CAPI allows accelerators in FPGAs to be + coherently attached to a CPU via an MMU. This driver enables + userspace programs to access these accelerators via /dev/cxl/afuM.N + devices. + + CAPI adapters are found in POWER8 based systems. + + If unsure, say N. diff --git a/kernel/drivers/misc/cxl/Makefile b/kernel/drivers/misc/cxl/Makefile new file mode 100644 index 000000000..edb494d3f --- /dev/null +++ b/kernel/drivers/misc/cxl/Makefile @@ -0,0 +1,6 @@ +cxl-y += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o trace.o +obj-$(CONFIG_CXL) += cxl.o +obj-$(CONFIG_CXL_BASE) += base.o + +# For tracepoints to include our trace.h from tracepoint infrastructure: +CFLAGS_trace.o := -I$(src) diff --git a/kernel/drivers/misc/cxl/base.c b/kernel/drivers/misc/cxl/base.c new file mode 100644 index 000000000..0654ad836 --- /dev/null +++ b/kernel/drivers/misc/cxl/base.c @@ -0,0 +1,86 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/rcupdate.h> +#include <asm/errno.h> +#include <misc/cxl.h> +#include "cxl.h" + +/* protected by rcu */ +static struct cxl_calls *cxl_calls; + +atomic_t cxl_use_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(cxl_use_count); + +#ifdef CONFIG_CXL_MODULE + +static inline struct cxl_calls *cxl_calls_get(void) +{ + struct cxl_calls *calls = NULL; + + rcu_read_lock(); + calls = rcu_dereference(cxl_calls); + if (calls && !try_module_get(calls->owner)) + calls = NULL; + rcu_read_unlock(); + + return calls; +} + +static inline void cxl_calls_put(struct cxl_calls *calls) +{ + BUG_ON(calls != cxl_calls); + + /* we don't need to rcu this, as we hold a reference to the module */ + module_put(cxl_calls->owner); +} + +#else /* !defined CONFIG_CXL_MODULE */ + +static inline struct cxl_calls *cxl_calls_get(void) +{ + return cxl_calls; +} + +static inline void cxl_calls_put(struct cxl_calls *calls) { } + +#endif /* CONFIG_CXL_MODULE */ + +void cxl_slbia(struct mm_struct *mm) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + if (cxl_ctx_in_use()) + calls->cxl_slbia(mm); + + cxl_calls_put(calls); +} + +int register_cxl_calls(struct cxl_calls *calls) +{ + if (cxl_calls) + return -EBUSY; + + rcu_assign_pointer(cxl_calls, calls); + return 0; +} +EXPORT_SYMBOL_GPL(register_cxl_calls); + +void unregister_cxl_calls(struct cxl_calls *calls) +{ + BUG_ON(cxl_calls->owner != calls->owner); + RCU_INIT_POINTER(cxl_calls, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(unregister_cxl_calls); diff --git a/kernel/drivers/misc/cxl/context.c b/kernel/drivers/misc/cxl/context.c new file mode 100644 index 000000000..d1b55fe62 --- /dev/null +++ b/kernel/drivers/misc/cxl/context.c @@ -0,0 +1,247 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/bitmap.h> +#include <linux/sched.h> +#include <linux/pid.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/debugfs.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include <asm/cputable.h> +#include <asm/current.h> +#include <asm/copro.h> + +#include "cxl.h" + +/* + * Allocates space for a CXL context. + */ +struct cxl_context *cxl_context_alloc(void) +{ + return kzalloc(sizeof(struct cxl_context), GFP_KERNEL); +} + +/* + * Initialises a CXL context. + */ +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, + struct address_space *mapping) +{ + int i; + + spin_lock_init(&ctx->sste_lock); + ctx->afu = afu; + ctx->master = master; + ctx->pid = NULL; /* Set in start work ioctl */ + mutex_init(&ctx->mapping_lock); + ctx->mapping = mapping; + + /* + * Allocate the segment table before we put it in the IDR so that we + * can always access it when dereferenced from IDR. For the same + * reason, the segment table is only destroyed after the context is + * removed from the IDR. Access to this in the IOCTL is protected by + * Linux filesytem symantics (can't IOCTL until open is complete). + */ + i = cxl_alloc_sst(ctx); + if (i) + return i; + + INIT_WORK(&ctx->fault_work, cxl_handle_fault); + + init_waitqueue_head(&ctx->wq); + spin_lock_init(&ctx->lock); + + ctx->irq_bitmap = NULL; + ctx->pending_irq = false; + ctx->pending_fault = false; + ctx->pending_afu_err = false; + + /* + * When we have to destroy all contexts in cxl_context_detach_all() we + * end up with afu_release_irqs() called from inside a + * idr_for_each_entry(). Hence we need to make sure that anything + * dereferenced from this IDR is ok before we allocate the IDR here. + * This clears out the IRQ ranges to ensure this. + */ + for (i = 0; i < CXL_IRQ_RANGES; i++) + ctx->irqs.range[i] = 0; + + mutex_init(&ctx->status_mutex); + + ctx->status = OPENED; + + /* + * Allocating IDR! We better make sure everything's setup that + * dereferences from it. + */ + mutex_lock(&afu->contexts_lock); + idr_preload(GFP_KERNEL); + i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, + ctx->afu->num_procs, GFP_NOWAIT); + idr_preload_end(); + mutex_unlock(&afu->contexts_lock); + if (i < 0) + return i; + + ctx->pe = i; + ctx->elem = &ctx->afu->spa[i]; + ctx->pe_inserted = false; + return 0; +} + +static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct cxl_context *ctx = vma->vm_file->private_data; + unsigned long address = (unsigned long)vmf->virtual_address; + u64 area, offset; + + offset = vmf->pgoff << PAGE_SHIFT; + + pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n", + __func__, ctx->pe, address, offset); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + area = ctx->afu->psn_phys; + if (offset > ctx->afu->adapter->ps_size) + return VM_FAULT_SIGBUS; + } else { + area = ctx->psn_phys; + if (offset > ctx->psn_size) + return VM_FAULT_SIGBUS; + } + + mutex_lock(&ctx->status_mutex); + + if (ctx->status != STARTED) { + mutex_unlock(&ctx->status_mutex); + pr_devel("%s: Context not started, failing problem state access\n", __func__); + return VM_FAULT_SIGBUS; + } + + vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT); + + mutex_unlock(&ctx->status_mutex); + + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct cxl_mmap_vmops = { + .fault = cxl_mmap_fault, +}; + +/* + * Map a per-context mmio space into the given vma. + */ +int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) +{ + u64 len = vma->vm_end - vma->vm_start; + len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode != CXL_MODE_DEDICATED) { + /* make sure there is a valid per process space for this AFU */ + if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) { + pr_devel("AFU doesn't support mmio space\n"); + return -EINVAL; + } + + /* Can't mmap until the AFU is enabled */ + if (!ctx->afu->enabled) + return -EBUSY; + } + + pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__, + ctx->psn_phys, ctx->pe , ctx->master); + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &cxl_mmap_vmops; + return 0; +} + +/* + * Detach a context from the hardware. This disables interrupts and doesn't + * return until all outstanding interrupts for this context have completed. The + * hardware should no longer access *ctx after this has returned. + */ +static void __detach_context(struct cxl_context *ctx) +{ + enum cxl_context_status status; + + mutex_lock(&ctx->status_mutex); + status = ctx->status; + ctx->status = CLOSED; + mutex_unlock(&ctx->status_mutex); + if (status != STARTED) + return; + + WARN_ON(cxl_detach_process(ctx)); + afu_release_irqs(ctx); + flush_work(&ctx->fault_work); /* Only needed for dedicated process */ + wake_up_all(&ctx->wq); +} + +/* + * Detach the given context from the AFU. This doesn't actually + * free the context but it should stop the context running in hardware + * (ie. prevent this context from generating any further interrupts + * so that it can be freed). + */ +void cxl_context_detach(struct cxl_context *ctx) +{ + __detach_context(ctx); +} + +/* + * Detach all contexts on the given AFU. + */ +void cxl_context_detach_all(struct cxl_afu *afu) +{ + struct cxl_context *ctx; + int tmp; + + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { + /* + * Anything done in here needs to be setup before the IDR is + * created and torn down after the IDR removed + */ + __detach_context(ctx); + + /* + * We are force detaching - remove any active PSA mappings so + * userspace cannot interfere with the card if it comes back. + * Easiest way to exercise this is to unbind and rebind the + * driver via sysfs while it is in use. + */ + mutex_lock(&ctx->mapping_lock); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, 0, 0, 1); + mutex_unlock(&ctx->mapping_lock); + } + mutex_unlock(&afu->contexts_lock); +} + +void cxl_context_free(struct cxl_context *ctx) +{ + mutex_lock(&ctx->afu->contexts_lock); + idr_remove(&ctx->afu->contexts_idr, ctx->pe); + mutex_unlock(&ctx->afu->contexts_lock); + synchronize_rcu(); + + free_page((u64)ctx->sstp); + ctx->sstp = NULL; + + put_pid(ctx->pid); + kfree(ctx); +} diff --git a/kernel/drivers/misc/cxl/cxl.h b/kernel/drivers/misc/cxl/cxl.h new file mode 100644 index 000000000..a1cee4767 --- /dev/null +++ b/kernel/drivers/misc/cxl/cxl.h @@ -0,0 +1,663 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _CXL_H_ +#define _CXL_H_ + +#include <linux/interrupt.h> +#include <linux/semaphore.h> +#include <linux/device.h> +#include <linux/types.h> +#include <linux/cdev.h> +#include <linux/pid.h> +#include <linux/io.h> +#include <linux/pci.h> +#include <asm/cputable.h> +#include <asm/mmu.h> +#include <asm/reg.h> +#include <misc/cxl.h> + +#include <uapi/misc/cxl.h> + +extern uint cxl_verbose; + +#define CXL_TIMEOUT 5 + +/* + * Bump version each time a user API change is made, whether it is + * backwards compatible ot not. + */ +#define CXL_API_VERSION 1 +#define CXL_API_VERSION_COMPATIBLE 1 + +/* + * Opaque types to avoid accidentally passing registers for the wrong MMIO + * + * At the end of the day, I'm not married to using typedef here, but it might + * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and + * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write. + * + * I'm quite happy if these are changed back to #defines before upstreaming, it + * should be little more than a regexp search+replace operation in this file. + */ +typedef struct { + const int x; +} cxl_p1_reg_t; +typedef struct { + const int x; +} cxl_p1n_reg_t; +typedef struct { + const int x; +} cxl_p2n_reg_t; +#define cxl_reg_off(reg) \ + (reg.x) + +/* Memory maps. Ref CXL Appendix A */ + +/* PSL Privilege 1 Memory Map */ +/* Configuration and Control area */ +static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000}; +static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008}; +static const cxl_p1_reg_t CXL_PSL_KEY1 = {0x0010}; +static const cxl_p1_reg_t CXL_PSL_KEY2 = {0x0018}; +static const cxl_p1_reg_t CXL_PSL_Control = {0x0020}; +/* Downloading */ +static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060}; +static const cxl_p1_reg_t CXL_PSL_DLADDR = {0x0068}; + +/* PSL Lookaside Buffer Management Area */ +static const cxl_p1_reg_t CXL_PSL_LBISEL = {0x0080}; +static const cxl_p1_reg_t CXL_PSL_SLBIE = {0x0088}; +static const cxl_p1_reg_t CXL_PSL_SLBIA = {0x0090}; +static const cxl_p1_reg_t CXL_PSL_TLBIE = {0x00A0}; +static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8}; +static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; + +/* 0x00C0:7EFF Implementation dependent area */ +static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; +static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; +static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118}; +static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128}; +static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; +static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; +static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; +static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; +/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */ +/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */ + +/* PSL Slice Privilege 1 Memory Map */ +/* Configuration Area */ +static const cxl_p1n_reg_t CXL_PSL_SR_An = {0x00}; +static const cxl_p1n_reg_t CXL_PSL_LPID_An = {0x08}; +static const cxl_p1n_reg_t CXL_PSL_AMBAR_An = {0x10}; +static const cxl_p1n_reg_t CXL_PSL_SPOffset_An = {0x18}; +static const cxl_p1n_reg_t CXL_PSL_ID_An = {0x20}; +static const cxl_p1n_reg_t CXL_PSL_SERR_An = {0x28}; +/* Memory Management and Lookaside Buffer Management */ +static const cxl_p1n_reg_t CXL_PSL_SDR_An = {0x30}; +static const cxl_p1n_reg_t CXL_PSL_AMOR_An = {0x38}; +/* Pointer Area */ +static const cxl_p1n_reg_t CXL_HAURP_An = {0x80}; +static const cxl_p1n_reg_t CXL_PSL_SPAP_An = {0x88}; +static const cxl_p1n_reg_t CXL_PSL_LLCMD_An = {0x90}; +/* Control Area */ +static const cxl_p1n_reg_t CXL_PSL_SCNTL_An = {0xA0}; +static const cxl_p1n_reg_t CXL_PSL_CtxTime_An = {0xA8}; +static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0}; +static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An = {0xB8}; +/* 0xC0:FF Implementation Dependent Area */ +static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An = {0xC0}; +static const cxl_p1n_reg_t CXL_AFU_DEBUG_An = {0xC8}; +static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A = {0xD0}; +static const cxl_p1n_reg_t CXL_PSL_COALLOC_A = {0xD8}; +static const cxl_p1n_reg_t CXL_PSL_RXCTL_A = {0xE0}; +static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE = {0xE8}; + +/* PSL Slice Privilege 2 Memory Map */ +/* Configuration and Control Area */ +static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000}; +static const cxl_p2n_reg_t CXL_CSRP_An = {0x008}; +static const cxl_p2n_reg_t CXL_AURP0_An = {0x010}; +static const cxl_p2n_reg_t CXL_AURP1_An = {0x018}; +static const cxl_p2n_reg_t CXL_SSTP0_An = {0x020}; +static const cxl_p2n_reg_t CXL_SSTP1_An = {0x028}; +static const cxl_p2n_reg_t CXL_PSL_AMR_An = {0x030}; +/* Segment Lookaside Buffer Management */ +static const cxl_p2n_reg_t CXL_SLBIE_An = {0x040}; +static const cxl_p2n_reg_t CXL_SLBIA_An = {0x048}; +static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050}; +/* Interrupt Registers */ +static const cxl_p2n_reg_t CXL_PSL_DSISR_An = {0x060}; +static const cxl_p2n_reg_t CXL_PSL_DAR_An = {0x068}; +static const cxl_p2n_reg_t CXL_PSL_DSR_An = {0x070}; +static const cxl_p2n_reg_t CXL_PSL_TFC_An = {0x078}; +static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080}; +static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088}; +/* AFU Registers */ +static const cxl_p2n_reg_t CXL_AFU_Cntl_An = {0x090}; +static const cxl_p2n_reg_t CXL_AFU_ERR_An = {0x098}; +/* Work Element Descriptor */ +static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; +/* 0x0C0:FFF Implementation Dependent Area */ + +#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL +#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL +#define CXL_PSL_SPAP_Size_Shift 4 +#define CXL_PSL_SPAP_V 0x0000000000000001ULL + +/****** CXL_PSL_DLCNTL *****************************************************/ +#define CXL_PSL_DLCNTL_D (0x1ull << (63-28)) +#define CXL_PSL_DLCNTL_C (0x1ull << (63-29)) +#define CXL_PSL_DLCNTL_E (0x1ull << (63-30)) +#define CXL_PSL_DLCNTL_S (0x1ull << (63-31)) +#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E) +#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S) + +/****** CXL_PSL_SR_An ******************************************************/ +#define CXL_PSL_SR_An_SF MSR_SF /* 64bit */ +#define CXL_PSL_SR_An_TA (1ull << (63-1)) /* Tags active, GA1: 0 */ +#define CXL_PSL_SR_An_HV MSR_HV /* Hypervisor, GA1: 0 */ +#define CXL_PSL_SR_An_PR MSR_PR /* Problem state, GA1: 1 */ +#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */ +#define CXL_PSL_SR_An_TC (1ull << (63-54)) /* Page Table secondary hash */ +#define CXL_PSL_SR_An_US (1ull << (63-56)) /* User state, GA1: X */ +#define CXL_PSL_SR_An_SC (1ull << (63-58)) /* Segment Table secondary hash */ +#define CXL_PSL_SR_An_R MSR_DR /* Relocate, GA1: 1 */ +#define CXL_PSL_SR_An_MP (1ull << (63-62)) /* Master Process */ +#define CXL_PSL_SR_An_LE (1ull << (63-63)) /* Little Endian */ + +/****** CXL_PSL_LLCMD_An ****************************************************/ +#define CXL_LLCMD_TERMINATE 0x0001000000000000ULL +#define CXL_LLCMD_REMOVE 0x0002000000000000ULL +#define CXL_LLCMD_SUSPEND 0x0003000000000000ULL +#define CXL_LLCMD_RESUME 0x0004000000000000ULL +#define CXL_LLCMD_ADD 0x0005000000000000ULL +#define CXL_LLCMD_UPDATE 0x0006000000000000ULL +#define CXL_LLCMD_HANDLE_MASK 0x000000000000ffffULL + +/****** CXL_PSL_ID_An ****************************************************/ +#define CXL_PSL_ID_An_F (1ull << (63-31)) +#define CXL_PSL_ID_An_L (1ull << (63-30)) + +/****** CXL_PSL_SCNTL_An ****************************************************/ +#define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15)) +/* Programming Modes: */ +#define CXL_PSL_SCNTL_An_PM_MASK (0xffffull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_Shared (0x0000ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_OS (0x0001ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_Process (0x0002ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_AFU (0x0004ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_AFU_PBT (0x0104ull << (63-31)) +/* Purge Status (ro) */ +#define CXL_PSL_SCNTL_An_Ps_MASK (0x3ull << (63-39)) +#define CXL_PSL_SCNTL_An_Ps_Pending (0x1ull << (63-39)) +#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39)) +/* Purge */ +#define CXL_PSL_SCNTL_An_Pc (0x1ull << (63-48)) +/* Suspend Status (ro) */ +#define CXL_PSL_SCNTL_An_Ss_MASK (0x3ull << (63-55)) +#define CXL_PSL_SCNTL_An_Ss_Pending (0x1ull << (63-55)) +#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55)) +/* Suspend Control */ +#define CXL_PSL_SCNTL_An_Sc (0x1ull << (63-63)) + +/* AFU Slice Enable Status (ro) */ +#define CXL_AFU_Cntl_An_ES_MASK (0x7ull << (63-2)) +#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2)) +#define CXL_AFU_Cntl_An_ES_Enabled (0x4ull << (63-2)) +/* AFU Slice Enable */ +#define CXL_AFU_Cntl_An_E (0x1ull << (63-3)) +/* AFU Slice Reset status (ro) */ +#define CXL_AFU_Cntl_An_RS_MASK (0x3ull << (63-5)) +#define CXL_AFU_Cntl_An_RS_Pending (0x1ull << (63-5)) +#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5)) +/* AFU Slice Reset */ +#define CXL_AFU_Cntl_An_RA (0x1ull << (63-7)) + +/****** CXL_SSTP0/1_An ******************************************************/ +/* These top bits are for the segment that CONTAINS the segment table */ +#define CXL_SSTP0_An_B_SHIFT SLB_VSID_SSIZE_SHIFT +#define CXL_SSTP0_An_KS (1ull << (63-2)) +#define CXL_SSTP0_An_KP (1ull << (63-3)) +#define CXL_SSTP0_An_N (1ull << (63-4)) +#define CXL_SSTP0_An_L (1ull << (63-5)) +#define CXL_SSTP0_An_C (1ull << (63-6)) +#define CXL_SSTP0_An_TA (1ull << (63-7)) +#define CXL_SSTP0_An_LP_SHIFT (63-9) /* 2 Bits */ +/* And finally, the virtual address & size of the segment table: */ +#define CXL_SSTP0_An_SegTableSize_SHIFT (63-31) /* 12 Bits */ +#define CXL_SSTP0_An_SegTableSize_MASK \ + (((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT) +#define CXL_SSTP0_An_STVA_U_MASK ((1ull << (63-49))-1) +#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1)) +#define CXL_SSTP1_An_V (1ull << (63-63)) + +/****** CXL_PSL_SLBIE_[An] **************************************************/ +/* write: */ +#define CXL_SLBIE_C PPC_BIT(36) /* Class */ +#define CXL_SLBIE_SS PPC_BITMASK(37, 38) /* Segment Size */ +#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38) +#define CXL_SLBIE_TA PPC_BIT(38) /* Tags Active */ +/* read: */ +#define CXL_SLBIE_MAX PPC_BITMASK(24, 31) +#define CXL_SLBIE_PENDING PPC_BITMASK(56, 63) + +/****** Common to all CXL_TLBIA/SLBIA_[An] **********************************/ +#define CXL_TLB_SLB_P (1ull) /* Pending (read) */ + +/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers **********************/ +#define CXL_TLB_SLB_IQ_ALL (0ull) /* Inv qualifier */ +#define CXL_TLB_SLB_IQ_LPID (1ull) /* Inv qualifier */ +#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */ + +/****** CXL_PSL_AFUSEL ******************************************************/ +#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */ + +/****** CXL_PSL_DSISR_An ****************************************************/ +#define CXL_PSL_DSISR_An_DS (1ull << (63-0)) /* Segment not found */ +#define CXL_PSL_DSISR_An_DM (1ull << (63-1)) /* PTE not found (See also: M) or protection fault */ +#define CXL_PSL_DSISR_An_ST (1ull << (63-2)) /* Segment Table PTE not found */ +#define CXL_PSL_DSISR_An_UR (1ull << (63-3)) /* AURP PTE not found */ +#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR) +#define CXL_PSL_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */ +#define CXL_PSL_DSISR_An_AE (1ull << (63-5)) /* AFU Error */ +#define CXL_PSL_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */ +/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */ +#define CXL_PSL_DSISR_An_M DSISR_NOHPTE /* PTE not found */ +#define CXL_PSL_DSISR_An_P DSISR_PROTFAULT /* Storage protection violation */ +#define CXL_PSL_DSISR_An_A (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */ +#define CXL_PSL_DSISR_An_S DSISR_ISSTORE /* Access was afu_wr or afu_zero */ +#define CXL_PSL_DSISR_An_K DSISR_KEYFAULT /* Access not permitted by virtual page class key protection */ + +/****** CXL_PSL_TFC_An ******************************************************/ +#define CXL_PSL_TFC_An_A (1ull << (63-28)) /* Acknowledge non-translation fault */ +#define CXL_PSL_TFC_An_C (1ull << (63-29)) /* Continue (abort transaction) */ +#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */ +#define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */ + +/* cxl_process_element->software_status */ +#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 - 0)) /* Valid */ +#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */ +#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */ +#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */ + +/****** CXL_PSL_RXCTL_An (Implementation Specific) ************************** + * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to + * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x + * of the hang pulse frequency. + */ +#define CXL_PSL_RXCTL_AFUHP_4S 0x7000000000000000ULL + +/* SPA->sw_command_status */ +#define CXL_SPA_SW_CMD_MASK 0xffff000000000000ULL +#define CXL_SPA_SW_CMD_TERMINATE 0x0001000000000000ULL +#define CXL_SPA_SW_CMD_REMOVE 0x0002000000000000ULL +#define CXL_SPA_SW_CMD_SUSPEND 0x0003000000000000ULL +#define CXL_SPA_SW_CMD_RESUME 0x0004000000000000ULL +#define CXL_SPA_SW_CMD_ADD 0x0005000000000000ULL +#define CXL_SPA_SW_CMD_UPDATE 0x0006000000000000ULL +#define CXL_SPA_SW_STATE_MASK 0x0000ffff00000000ULL +#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL +#define CXL_SPA_SW_STATE_REMOVED 0x0000000200000000ULL +#define CXL_SPA_SW_STATE_SUSPENDED 0x0000000300000000ULL +#define CXL_SPA_SW_STATE_RESUMED 0x0000000400000000ULL +#define CXL_SPA_SW_STATE_ADDED 0x0000000500000000ULL +#define CXL_SPA_SW_STATE_UPDATED 0x0000000600000000ULL +#define CXL_SPA_SW_PSL_ID_MASK 0x00000000ffff0000ULL +#define CXL_SPA_SW_LINK_MASK 0x000000000000ffffULL + +#define CXL_MAX_SLICES 4 +#define MAX_AFU_MMIO_REGS 3 + +#define CXL_MODE_DEDICATED 0x1 +#define CXL_MODE_DIRECTED 0x2 +#define CXL_MODE_TIME_SLICED 0x4 +#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) + +enum cxl_context_status { + CLOSED, + OPENED, + STARTED +}; + +enum prefault_modes { + CXL_PREFAULT_NONE, + CXL_PREFAULT_WED, + CXL_PREFAULT_ALL, +}; + +struct cxl_sste { + __be64 esid_data; + __be64 vsid_data; +}; + +#define to_cxl_adapter(d) container_of(d, struct cxl, dev) +#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev) + +struct cxl_afu { + irq_hw_number_t psl_hwirq; + irq_hw_number_t serr_hwirq; + char *err_irq_name; + char *psl_irq_name; + unsigned int serr_virq; + void __iomem *p1n_mmio; + void __iomem *p2n_mmio; + phys_addr_t psn_phys; + u64 pp_offset; + u64 pp_size; + void __iomem *afu_desc_mmio; + struct cxl *adapter; + struct device dev; + struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d; + struct device *chardev_s, *chardev_m, *chardev_d; + struct idr contexts_idr; + struct dentry *debugfs; + struct mutex contexts_lock; + struct mutex spa_mutex; + spinlock_t afu_cntl_lock; + + /* + * Only the first part of the SPA is used for the process element + * linked list. The only other part that software needs to worry about + * is sw_command_status, which we store a separate pointer to. + * Everything else in the SPA is only used by hardware + */ + struct cxl_process_element *spa; + __be64 *sw_command_status; + unsigned int spa_size; + int spa_order; + int spa_max_procs; + unsigned int psl_virq; + + int pp_irqs; + int irqs_max; + int num_procs; + int max_procs_virtualised; + int slice; + int modes_supported; + int current_mode; + int crs_num; + u64 crs_len; + u64 crs_offset; + struct list_head crs; + enum prefault_modes prefault_mode; + bool psa; + bool pp_psa; + bool enabled; +}; + + +struct cxl_irq_name { + struct list_head list; + char *name; +}; + +/* + * This is a cxl context. If the PSL is in dedicated mode, there will be one + * of these per AFU. If in AFU directed there can be lots of these. + */ +struct cxl_context { + struct cxl_afu *afu; + + /* Problem state MMIO */ + phys_addr_t psn_phys; + u64 psn_size; + + /* Used to unmap any mmaps when force detaching */ + struct address_space *mapping; + struct mutex mapping_lock; + + spinlock_t sste_lock; /* Protects segment table entries */ + struct cxl_sste *sstp; + u64 sstp0, sstp1; + unsigned int sst_size, sst_lru; + + wait_queue_head_t wq; + struct pid *pid; + spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */ + /* Only used in PR mode */ + u64 process_token; + + unsigned long *irq_bitmap; /* Accessed from IRQ context */ + struct cxl_irq_ranges irqs; + struct list_head irq_names; + u64 fault_addr; + u64 fault_dsisr; + u64 afu_err; + + /* + * This status and it's lock pretects start and detach context + * from racing. It also prevents detach from racing with + * itself + */ + enum cxl_context_status status; + struct mutex status_mutex; + + + /* XXX: Is it possible to need multiple work items at once? */ + struct work_struct fault_work; + u64 dsisr; + u64 dar; + + struct cxl_process_element *elem; + + int pe; /* process element handle */ + u32 irq_count; + bool pe_inserted; + bool master; + bool kernel; + bool pending_irq; + bool pending_fault; + bool pending_afu_err; +}; + +struct cxl { + void __iomem *p1_mmio; + void __iomem *p2_mmio; + irq_hw_number_t err_hwirq; + unsigned int err_virq; + spinlock_t afu_list_lock; + struct cxl_afu *afu[CXL_MAX_SLICES]; + struct device dev; + struct dentry *trace; + struct dentry *psl_err_chk; + struct dentry *debugfs; + char *irq_name; + struct bin_attribute cxl_attr; + int adapter_num; + int user_irqs; + u64 afu_desc_off; + u64 afu_desc_size; + u64 ps_off; + u64 ps_size; + u16 psl_rev; + u16 base_image; + u8 vsec_status; + u8 caia_major; + u8 caia_minor; + u8 slices; + bool user_image_loaded; + bool perst_loads_image; + bool perst_select_user; +}; + +int cxl_alloc_one_irq(struct cxl *adapter); +void cxl_release_one_irq(struct cxl *adapter, int hwirq); +int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); +void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); +int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); +int cxl_update_image_control(struct cxl *adapter); +int cxl_reset(struct cxl *adapter); + +/* common == phyp + powernv */ +struct cxl_process_element_common { + __be32 tid; + __be32 pid; + __be64 csrp; + __be64 aurp0; + __be64 aurp1; + __be64 sstp0; + __be64 sstp1; + __be64 amr; + u8 reserved3[4]; + __be64 wed; +} __packed; + +/* just powernv */ +struct cxl_process_element { + __be64 sr; + __be64 SPOffset; + __be64 sdr; + __be64 haurp; + __be32 ctxtime; + __be16 ivte_offsets[4]; + __be16 ivte_ranges[4]; + __be32 lpid; + struct cxl_process_element_common common; + __be32 software_state; +} __packed; + +static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) +{ + WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); + return cxl->p1_mmio + cxl_reg_off(reg); +} + +#define cxl_p1_write(cxl, reg, val) \ + out_be64(_cxl_p1_addr(cxl, reg), val) +#define cxl_p1_read(cxl, reg) \ + in_be64(_cxl_p1_addr(cxl, reg)) + +static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); + return afu->p1n_mmio + cxl_reg_off(reg); +} + +#define cxl_p1n_write(afu, reg, val) \ + out_be64(_cxl_p1n_addr(afu, reg), val) +#define cxl_p1n_read(afu, reg) \ + in_be64(_cxl_p1n_addr(afu, reg)) + +static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + return afu->p2n_mmio + cxl_reg_off(reg); +} + +#define cxl_p2n_write(afu, reg, val) \ + out_be64(_cxl_p2n_addr(afu, reg), val) +#define cxl_p2n_read(afu, reg) \ + in_be64(_cxl_p2n_addr(afu, reg)) + + +#define cxl_afu_cr_read64(afu, cr, off) \ + in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +#define cxl_afu_cr_read32(afu, cr, off) \ + in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); +u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); + + +struct cxl_calls { + void (*cxl_slbia)(struct mm_struct *mm); + struct module *owner; +}; +int register_cxl_calls(struct cxl_calls *calls); +void unregister_cxl_calls(struct cxl_calls *calls); + +int cxl_alloc_adapter_nr(struct cxl *adapter); +void cxl_remove_adapter_nr(struct cxl *adapter); + +int cxl_file_init(void); +void cxl_file_exit(void); +int cxl_register_adapter(struct cxl *adapter); +int cxl_register_afu(struct cxl_afu *afu); +int cxl_chardev_d_afu_add(struct cxl_afu *afu); +int cxl_chardev_m_afu_add(struct cxl_afu *afu); +int cxl_chardev_s_afu_add(struct cxl_afu *afu); +void cxl_chardev_afu_remove(struct cxl_afu *afu); + +void cxl_context_detach_all(struct cxl_afu *afu); +void cxl_context_free(struct cxl_context *ctx); +void cxl_context_detach(struct cxl_context *ctx); + +int cxl_sysfs_adapter_add(struct cxl *adapter); +void cxl_sysfs_adapter_remove(struct cxl *adapter); +int cxl_sysfs_afu_add(struct cxl_afu *afu); +void cxl_sysfs_afu_remove(struct cxl_afu *afu); +int cxl_sysfs_afu_m_add(struct cxl_afu *afu); +void cxl_sysfs_afu_m_remove(struct cxl_afu *afu); + +int cxl_afu_activate_mode(struct cxl_afu *afu, int mode); +int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); +int cxl_afu_deactivate_mode(struct cxl_afu *afu); +int cxl_afu_select_best_mode(struct cxl_afu *afu); + +int cxl_register_psl_irq(struct cxl_afu *afu); +void cxl_release_psl_irq(struct cxl_afu *afu); +int cxl_register_psl_err_irq(struct cxl *adapter); +void cxl_release_psl_err_irq(struct cxl *adapter); +int cxl_register_serr_irq(struct cxl_afu *afu); +void cxl_release_serr_irq(struct cxl_afu *afu); +int afu_register_irqs(struct cxl_context *ctx, u32 count); +void afu_release_irqs(struct cxl_context *ctx); +irqreturn_t cxl_slice_irq_err(int irq, void *data); + +int cxl_debugfs_init(void); +void cxl_debugfs_exit(void); +int cxl_debugfs_adapter_add(struct cxl *adapter); +void cxl_debugfs_adapter_remove(struct cxl *adapter); +int cxl_debugfs_afu_add(struct cxl_afu *afu); +void cxl_debugfs_afu_remove(struct cxl_afu *afu); + +void cxl_handle_fault(struct work_struct *work); +void cxl_prefault(struct cxl_context *ctx, u64 wed); + +struct cxl *get_cxl_adapter(int num); +int cxl_alloc_sst(struct cxl_context *ctx); + +void init_cxl_native(void); + +struct cxl_context *cxl_context_alloc(void); +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, + struct address_space *mapping); +void cxl_context_free(struct cxl_context *ctx); +int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); + +/* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */ +struct cxl_irq_info { + u64 dsisr; + u64 dar; + u64 dsr; + u32 pid; + u32 tid; + u64 afu_err; + u64 errstat; + u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */ +}; + +int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, + u64 amr); +int cxl_detach_process(struct cxl_context *ctx); + +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info); +int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); + +int cxl_check_error(struct cxl_afu *afu); +int cxl_afu_slbia(struct cxl_afu *afu); +int cxl_tlb_slb_invalidate(struct cxl *adapter); +int cxl_afu_disable(struct cxl_afu *afu); +int cxl_afu_reset(struct cxl_afu *afu); +int cxl_psl_purge(struct cxl_afu *afu); + +void cxl_stop_trace(struct cxl *cxl); + +extern struct pci_driver cxl_pci_driver; + +#endif diff --git a/kernel/drivers/misc/cxl/debugfs.c b/kernel/drivers/misc/cxl/debugfs.c new file mode 100644 index 000000000..825c41258 --- /dev/null +++ b/kernel/drivers/misc/cxl/debugfs.c @@ -0,0 +1,132 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "cxl.h" + +static struct dentry *cxl_debugfs; + +void cxl_stop_trace(struct cxl *adapter) +{ + int slice; + + /* Stop the trace */ + cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL); + + /* Stop the slice traces */ + spin_lock(&adapter->afu_list_lock); + for (slice = 0; slice < adapter->slices; slice++) { + if (adapter->afu[slice]) + cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, 0x8000000000000000LL); + } + spin_unlock(&adapter->afu_list_lock); +} + +/* Helpers to export CXL mmaped IO registers via debugfs */ +static int debugfs_io_u64_get(void *data, u64 *val) +{ + *val = in_be64((u64 __iomem *)data); + return 0; +} + +static int debugfs_io_u64_set(void *data, u64 val) +{ + out_be64((u64 __iomem *)data, val); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x%016llx\n"); + +static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, + struct dentry *parent, u64 __iomem *value) +{ + return debugfs_create_file(name, mode, parent, (void *)value, &fops_io_x64); +} + +int cxl_debugfs_adapter_add(struct cxl *adapter) +{ + struct dentry *dir; + char buf[32]; + + if (!cxl_debugfs) + return -ENODEV; + + snprintf(buf, 32, "card%i", adapter->adapter_num); + dir = debugfs_create_dir(buf, cxl_debugfs); + if (IS_ERR(dir)) + return PTR_ERR(dir); + adapter->debugfs = dir; + + debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); + debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); + debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); + debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE)); + + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); + + return 0; +} + +void cxl_debugfs_adapter_remove(struct cxl *adapter) +{ + debugfs_remove_recursive(adapter->debugfs); +} + +int cxl_debugfs_afu_add(struct cxl_afu *afu) +{ + struct dentry *dir; + char buf[32]; + + if (!afu->adapter->debugfs) + return -ENODEV; + + snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice); + dir = debugfs_create_dir(buf, afu->adapter->debugfs); + if (IS_ERR(dir)) + return PTR_ERR(dir); + afu->debugfs = dir; + + debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); + debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); + debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); + debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An)); + + debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An)); + debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An)); + debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An)); + debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An)); + debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An)); + + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); + + return 0; +} + +void cxl_debugfs_afu_remove(struct cxl_afu *afu) +{ + debugfs_remove_recursive(afu->debugfs); +} + +int __init cxl_debugfs_init(void) +{ + struct dentry *ent; + ent = debugfs_create_dir("cxl", NULL); + if (IS_ERR(ent)) + return PTR_ERR(ent); + cxl_debugfs = ent; + + return 0; +} + +void cxl_debugfs_exit(void) +{ + debugfs_remove_recursive(cxl_debugfs); +} diff --git a/kernel/drivers/misc/cxl/fault.c b/kernel/drivers/misc/cxl/fault.c new file mode 100644 index 000000000..5286b8b70 --- /dev/null +++ b/kernel/drivers/misc/cxl/fault.c @@ -0,0 +1,310 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/pid.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "cxl" "." +#include <asm/current.h> +#include <asm/copro.h> +#include <asm/mmu.h> + +#include "cxl.h" +#include "trace.h" + +static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) +{ + return ((sste->vsid_data == cpu_to_be64(slb->vsid)) && + (sste->esid_data == cpu_to_be64(slb->esid))); +} + +/* + * This finds a free SSTE for the given SLB, or returns NULL if it's already in + * the segment table. + */ +static struct cxl_sste* find_free_sste(struct cxl_context *ctx, + struct copro_slb *slb) +{ + struct cxl_sste *primary, *sste, *ret = NULL; + unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */ + unsigned int entry; + unsigned int hash; + + if (slb->vsid & SLB_VSID_B_1T) + hash = (slb->esid >> SID_SHIFT_1T) & mask; + else /* 256M */ + hash = (slb->esid >> SID_SHIFT) & mask; + + primary = ctx->sstp + (hash << 3); + + for (entry = 0, sste = primary; entry < 8; entry++, sste++) { + if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V)) + ret = sste; + if (sste_matches(sste, slb)) + return NULL; + } + if (ret) + return ret; + + /* Nothing free, select an entry to cast out */ + ret = primary + ctx->sst_lru; + ctx->sst_lru = (ctx->sst_lru + 1) & 0x7; + + return ret; +} + +static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb) +{ + /* mask is the group index, we search primary and secondary here. */ + struct cxl_sste *sste; + unsigned long flags; + + spin_lock_irqsave(&ctx->sste_lock, flags); + sste = find_free_sste(ctx, slb); + if (!sste) + goto out_unlock; + + pr_devel("CXL Populating SST[%li]: %#llx %#llx\n", + sste - ctx->sstp, slb->vsid, slb->esid); + trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid); + + sste->vsid_data = cpu_to_be64(slb->vsid); + sste->esid_data = cpu_to_be64(slb->esid); +out_unlock: + spin_unlock_irqrestore(&ctx->sste_lock, flags); +} + +static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm, + u64 ea) +{ + struct copro_slb slb = {0,0}; + int rc; + + if (!(rc = copro_calculate_slb(mm, ea, &slb))) { + cxl_load_segment(ctx, &slb); + } + + return rc; +} + +static void cxl_ack_ae(struct cxl_context *ctx) +{ + unsigned long flags; + + cxl_ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); + + spin_lock_irqsave(&ctx->lock, flags); + ctx->pending_fault = true; + ctx->fault_addr = ctx->dar; + ctx->fault_dsisr = ctx->dsisr; + spin_unlock_irqrestore(&ctx->lock, flags); + + wake_up_all(&ctx->wq); +} + +static int cxl_handle_segment_miss(struct cxl_context *ctx, + struct mm_struct *mm, u64 ea) +{ + int rc; + + pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea); + trace_cxl_ste_miss(ctx, ea); + + if ((rc = cxl_fault_segment(ctx, mm, ea))) + cxl_ack_ae(ctx); + else { + + mb(); /* Order seg table write to TFC MMIO write */ + cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + } + + return IRQ_HANDLED; +} + +static void cxl_handle_page_fault(struct cxl_context *ctx, + struct mm_struct *mm, u64 dsisr, u64 dar) +{ + unsigned flt = 0; + int result; + unsigned long access, flags, inv_flags = 0; + + trace_cxl_pte_miss(ctx, dsisr, dar); + + if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { + pr_devel("copro_handle_mm_fault failed: %#x\n", result); + return cxl_ack_ae(ctx); + } + + /* + * update_mmu_cache() will not have loaded the hash since current->trap + * is not a 0x400 or 0x300, so just call hash_page_mm() here. + */ + access = _PAGE_PRESENT; + if (dsisr & CXL_PSL_DSISR_An_S) + access |= _PAGE_RW; + if ((!ctx->kernel) || ~(dar & (1ULL << 63))) + access |= _PAGE_USER; + + if (dsisr & DSISR_NOHPTE) + inv_flags |= HPTE_NOHPTE_UPDATE; + + local_irq_save(flags); + hash_page_mm(mm, dar, access, 0x300, inv_flags); + local_irq_restore(flags); + + pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); + cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); +} + +void cxl_handle_fault(struct work_struct *fault_work) +{ + struct cxl_context *ctx = + container_of(fault_work, struct cxl_context, fault_work); + u64 dsisr = ctx->dsisr; + u64 dar = ctx->dar; + struct task_struct *task; + struct mm_struct *mm; + + if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || + cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || + cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { + /* Most likely explanation is harmless - a dedicated process + * has detached and these were cleared by the PSL purge, but + * warn about it just in case */ + dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); + return; + } + + /* Early return if the context is being / has been detached */ + if (ctx->status == CLOSED) { + cxl_ack_ae(ctx); + return; + } + + pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " + "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); + + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_handle_fault unable to get task %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_handle_fault unable to get mm %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + goto out; + } + + if (dsisr & CXL_PSL_DSISR_An_DS) + cxl_handle_segment_miss(ctx, mm, dar); + else if (dsisr & CXL_PSL_DSISR_An_DM) + cxl_handle_page_fault(ctx, mm, dsisr, dar); + else + WARN(1, "cxl_handle_fault has nothing to handle\n"); + + mmput(mm); +out: + put_task_struct(task); +} + +static void cxl_prefault_one(struct cxl_context *ctx, u64 ea) +{ + int rc; + struct task_struct *task; + struct mm_struct *mm; + + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_prefault_one unable to get task %i\n", + pid_nr(ctx->pid)); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_prefault_one unable to get mm %i\n", + pid_nr(ctx->pid)); + put_task_struct(task); + return; + } + + rc = cxl_fault_segment(ctx, mm, ea); + + mmput(mm); + put_task_struct(task); +} + +static u64 next_segment(u64 ea, u64 vsid) +{ + if (vsid & SLB_VSID_B_1T) + ea |= (1ULL << 40) - 1; + else + ea |= (1ULL << 28) - 1; + + return ea + 1; +} + +static void cxl_prefault_vma(struct cxl_context *ctx) +{ + u64 ea, last_esid = 0; + struct copro_slb slb; + struct vm_area_struct *vma; + int rc; + struct task_struct *task; + struct mm_struct *mm; + + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_prefault_vma unable to get task %i\n", + pid_nr(ctx->pid)); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_prefault_vm unable to get mm %i\n", + pid_nr(ctx->pid)); + goto out1; + } + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + for (ea = vma->vm_start; ea < vma->vm_end; + ea = next_segment(ea, slb.vsid)) { + rc = copro_calculate_slb(mm, ea, &slb); + if (rc) + continue; + + if (last_esid == slb.esid) + continue; + + cxl_load_segment(ctx, &slb); + last_esid = slb.esid; + } + } + up_read(&mm->mmap_sem); + + mmput(mm); +out1: + put_task_struct(task); +} + +void cxl_prefault(struct cxl_context *ctx, u64 wed) +{ + switch (ctx->afu->prefault_mode) { + case CXL_PREFAULT_WED: + cxl_prefault_one(ctx, wed); + break; + case CXL_PREFAULT_ALL: + cxl_prefault_vma(ctx); + break; + default: + break; + } +} diff --git a/kernel/drivers/misc/cxl/file.c b/kernel/drivers/misc/cxl/file.c new file mode 100644 index 000000000..2364bcadb --- /dev/null +++ b/kernel/drivers/misc/cxl/file.c @@ -0,0 +1,529 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/bitmap.h> +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/pid.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <asm/cputable.h> +#include <asm/current.h> +#include <asm/copro.h> + +#include "cxl.h" +#include "trace.h" + +#define CXL_NUM_MINORS 256 /* Total to reserve */ +#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ + +#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) +#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice)) +#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1) +#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2) +#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu)) +#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu)) +#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu)) + +#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) +#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3) + +#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0) + +static dev_t cxl_dev; + +static struct class *cxl_class; + +static int __afu_open(struct inode *inode, struct file *file, bool master) +{ + struct cxl *adapter; + struct cxl_afu *afu; + struct cxl_context *ctx; + int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); + int slice = CXL_DEVT_AFU(inode->i_rdev); + int rc = -ENODEV; + + pr_devel("afu_open afu%i.%i\n", slice, adapter_num); + + if (!(adapter = get_cxl_adapter(adapter_num))) + return -ENODEV; + + if (slice > adapter->slices) + goto err_put_adapter; + + spin_lock(&adapter->afu_list_lock); + if (!(afu = adapter->afu[slice])) { + spin_unlock(&adapter->afu_list_lock); + goto err_put_adapter; + } + get_device(&afu->dev); + spin_unlock(&adapter->afu_list_lock); + + if (!afu->current_mode) + goto err_put_afu; + + if (!(ctx = cxl_context_alloc())) { + rc = -ENOMEM; + goto err_put_afu; + } + + if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping))) + goto err_put_afu; + + pr_devel("afu_open pe: %i\n", ctx->pe); + file->private_data = ctx; + cxl_ctx_get(); + + /* Our ref on the AFU will now hold the adapter */ + put_device(&adapter->dev); + + return 0; + +err_put_afu: + put_device(&afu->dev); +err_put_adapter: + put_device(&adapter->dev); + return rc; +} +static int afu_open(struct inode *inode, struct file *file) +{ + return __afu_open(inode, file, false); +} + +static int afu_master_open(struct inode *inode, struct file *file) +{ + return __afu_open(inode, file, true); +} + +static int afu_release(struct inode *inode, struct file *file) +{ + struct cxl_context *ctx = file->private_data; + + pr_devel("%s: closing cxl file descriptor. pe: %i\n", + __func__, ctx->pe); + cxl_context_detach(ctx); + + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + + put_device(&ctx->afu->dev); + + /* + * At this this point all bottom halfs have finished and we should be + * getting no more IRQs from the hardware for this context. Once it's + * removed from the IDR (and RCU synchronised) it's safe to free the + * sstp and context. + */ + cxl_context_free(ctx); + + cxl_ctx_put(); + return 0; +} + +static long afu_ioctl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work __user *uwork) +{ + struct cxl_ioctl_start_work work; + u64 amr = 0; + int rc; + + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + /* Do this outside the status_mutex to avoid a circular dependency with + * the locking in cxl_mmap_fault() */ + if (copy_from_user(&work, uwork, + sizeof(struct cxl_ioctl_start_work))) { + rc = -EFAULT; + goto out; + } + + mutex_lock(&ctx->status_mutex); + if (ctx->status != OPENED) { + rc = -EIO; + goto out; + } + + /* + * if any of the reserved fields are set or any of the unused + * flags are set it's invalid + */ + if (work.reserved1 || work.reserved2 || work.reserved3 || + work.reserved4 || work.reserved5 || work.reserved6 || + (work.flags & ~CXL_START_WORK_ALL)) { + rc = -EINVAL; + goto out; + } + + if (!(work.flags & CXL_START_WORK_NUM_IRQS)) + work.num_interrupts = ctx->afu->pp_irqs; + else if ((work.num_interrupts < ctx->afu->pp_irqs) || + (work.num_interrupts > ctx->afu->irqs_max)) { + rc = -EINVAL; + goto out; + } + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) + goto out; + + if (work.flags & CXL_START_WORK_AMR) + amr = work.amr & mfspr(SPRN_UAMOR); + + /* + * We grab the PID here and not in the file open to allow for the case + * where a process (master, some daemon, etc) has opened the chardev on + * behalf of another process, so the AFU's mm gets bound to the process + * that performs this ioctl and not the process that opened the file. + */ + ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID)); + + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); + + if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, + amr))) { + afu_release_irqs(ctx); + goto out; + } + + ctx->status = STARTED; + rc = 0; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} +static long afu_ioctl_process_element(struct cxl_context *ctx, + int __user *upe) +{ + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + if (copy_to_user(upe, &ctx->pe, sizeof(__u32))) + return -EFAULT; + + return 0; +} + +static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct cxl_context *ctx = file->private_data; + + if (ctx->status == CLOSED) + return -EIO; + + pr_devel("afu_ioctl\n"); + switch (cmd) { + case CXL_IOCTL_START_WORK: + return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg); + case CXL_IOCTL_GET_PROCESS_ELEMENT: + return afu_ioctl_process_element(ctx, (__u32 __user *)arg); + } + return -EINVAL; +} + +static long afu_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return afu_ioctl(file, cmd, arg); +} + +static int afu_mmap(struct file *file, struct vm_area_struct *vm) +{ + struct cxl_context *ctx = file->private_data; + + /* AFU must be started before we can MMIO */ + if (ctx->status != STARTED) + return -EIO; + + return cxl_context_iomap(ctx, vm); +} + +static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) +{ + struct cxl_context *ctx = file->private_data; + int mask = 0; + unsigned long flags; + + + poll_wait(file, &ctx->wq, poll); + + pr_devel("afu_poll wait done pe: %i\n", ctx->pe); + + spin_lock_irqsave(&ctx->lock, flags); + if (ctx->pending_irq || ctx->pending_fault || + ctx->pending_afu_err) + mask |= POLLIN | POLLRDNORM; + else if (ctx->status == CLOSED) + /* Only error on closed when there are no futher events pending + */ + mask |= POLLERR; + spin_unlock_irqrestore(&ctx->lock, flags); + + pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask); + + return mask; +} + +static inline int ctx_event_pending(struct cxl_context *ctx) +{ + return (ctx->pending_irq || ctx->pending_fault || + ctx->pending_afu_err || (ctx->status == CLOSED)); +} + +static ssize_t afu_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + struct cxl_context *ctx = file->private_data; + struct cxl_event event; + unsigned long flags; + int rc; + DEFINE_WAIT(wait); + + if (count < CXL_READ_MIN_SIZE) + return -EINVAL; + + spin_lock_irqsave(&ctx->lock, flags); + + for (;;) { + prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); + if (ctx_event_pending(ctx)) + break; + + if (file->f_flags & O_NONBLOCK) { + rc = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + rc = -ERESTARTSYS; + goto out; + } + + spin_unlock_irqrestore(&ctx->lock, flags); + pr_devel("afu_read going to sleep...\n"); + schedule(); + pr_devel("afu_read woken up\n"); + spin_lock_irqsave(&ctx->lock, flags); + } + + finish_wait(&ctx->wq, &wait); + + memset(&event, 0, sizeof(event)); + event.header.process_element = ctx->pe; + event.header.size = sizeof(struct cxl_event_header); + if (ctx->pending_irq) { + pr_devel("afu_read delivering AFU interrupt\n"); + event.header.size += sizeof(struct cxl_event_afu_interrupt); + event.header.type = CXL_EVENT_AFU_INTERRUPT; + event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1; + clear_bit(event.irq.irq - 1, ctx->irq_bitmap); + if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count)) + ctx->pending_irq = false; + } else if (ctx->pending_fault) { + pr_devel("afu_read delivering data storage fault\n"); + event.header.size += sizeof(struct cxl_event_data_storage); + event.header.type = CXL_EVENT_DATA_STORAGE; + event.fault.addr = ctx->fault_addr; + event.fault.dsisr = ctx->fault_dsisr; + ctx->pending_fault = false; + } else if (ctx->pending_afu_err) { + pr_devel("afu_read delivering afu error\n"); + event.header.size += sizeof(struct cxl_event_afu_error); + event.header.type = CXL_EVENT_AFU_ERROR; + event.afu_error.error = ctx->afu_err; + ctx->pending_afu_err = false; + } else if (ctx->status == CLOSED) { + pr_devel("afu_read fatal error\n"); + spin_unlock_irqrestore(&ctx->lock, flags); + return -EIO; + } else + WARN(1, "afu_read must be buggy\n"); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(buf, &event, event.header.size)) + return -EFAULT; + return event.header.size; + +out: + finish_wait(&ctx->wq, &wait); + spin_unlock_irqrestore(&ctx->lock, flags); + return rc; +} + +static const struct file_operations afu_fops = { + .owner = THIS_MODULE, + .open = afu_open, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, +}; + +static const struct file_operations afu_master_fops = { + .owner = THIS_MODULE, + .open = afu_master_open, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, +}; + + +static char *cxl_devnode(struct device *dev, umode_t *mode) +{ + if (CXL_DEVT_IS_CARD(dev->devt)) { + /* + * These minor numbers will eventually be used to program the + * PSL and AFUs once we have dynamic reprogramming support + */ + return NULL; + } + return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); +} + +extern struct class *cxl_class; + +static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev, + struct device **chardev, char *postfix, char *desc, + const struct file_operations *fops) +{ + struct device *dev; + int rc; + + cdev_init(cdev, fops); + if ((rc = cdev_add(cdev, devt, 1))) { + dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc); + return rc; + } + + dev = device_create(cxl_class, &afu->dev, devt, afu, + "afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix); + if (IS_ERR(dev)) { + dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc); + rc = PTR_ERR(dev); + goto err; + } + + *chardev = dev; + + return 0; +err: + cdev_del(cdev); + return rc; +} + +int cxl_chardev_d_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d, + &afu->chardev_d, "d", "dedicated", + &afu_master_fops); /* Uses master fops */ +} + +int cxl_chardev_m_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m, + &afu->chardev_m, "m", "master", + &afu_master_fops); +} + +int cxl_chardev_s_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s, + &afu->chardev_s, "s", "shared", + &afu_fops); +} + +void cxl_chardev_afu_remove(struct cxl_afu *afu) +{ + if (afu->chardev_d) { + cdev_del(&afu->afu_cdev_d); + device_unregister(afu->chardev_d); + afu->chardev_d = NULL; + } + if (afu->chardev_m) { + cdev_del(&afu->afu_cdev_m); + device_unregister(afu->chardev_m); + afu->chardev_m = NULL; + } + if (afu->chardev_s) { + cdev_del(&afu->afu_cdev_s); + device_unregister(afu->chardev_s); + afu->chardev_s = NULL; + } +} + +int cxl_register_afu(struct cxl_afu *afu) +{ + afu->dev.class = cxl_class; + + return device_register(&afu->dev); +} + +int cxl_register_adapter(struct cxl *adapter) +{ + adapter->dev.class = cxl_class; + + /* + * Future: When we support dynamically reprogramming the PSL & AFU we + * will expose the interface to do that via a chardev: + * adapter->dev.devt = CXL_CARD_MKDEV(adapter); + */ + + return device_register(&adapter->dev); +} + +int __init cxl_file_init(void) +{ + int rc; + + /* + * If these change we really need to update API. Either change some + * flags or update API version number CXL_API_VERSION. + */ + BUILD_BUG_ON(CXL_API_VERSION != 1); + BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); + BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); + BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); + BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32); + BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16); + + if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) { + pr_err("Unable to allocate CXL major number: %i\n", rc); + return rc; + } + + pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev)); + + cxl_class = class_create(THIS_MODULE, "cxl"); + if (IS_ERR(cxl_class)) { + pr_err("Unable to create CXL class\n"); + rc = PTR_ERR(cxl_class); + goto err; + } + cxl_class->devnode = cxl_devnode; + + return 0; + +err: + unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); + return rc; +} + +void cxl_file_exit(void) +{ + unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); + class_destroy(cxl_class); +} diff --git a/kernel/drivers/misc/cxl/irq.c b/kernel/drivers/misc/cxl/irq.c new file mode 100644 index 000000000..c8929c526 --- /dev/null +++ b/kernel/drivers/misc/cxl/irq.c @@ -0,0 +1,497 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/slab.h> +#include <linux/pid.h> +#include <asm/cputable.h> +#include <misc/cxl.h> + +#include "cxl.h" +#include "trace.h" + +/* XXX: This is implementation specific */ +static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat) +{ + u64 fir1, fir2, fir_slice, serr, afu_debug; + + fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); + fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%.16llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%.16llx\n", fir2); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(ctx->afu->adapter); + + return cxl_ack_irq(ctx, 0, errstat); +} + +irqreturn_t cxl_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + u64 fir_slice, errstat, serr, afu_debug; + + WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); + errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%.16llx\n", errstat); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq_err(int irq, void *data) +{ + struct cxl *adapter = data; + u64 fir1, fir2, err_ivte; + + WARN(1, "CXL ERROR interrupt %i\n", irq); + + err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%.16llx\n", err_ivte); + + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(adapter); + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + + dev_crit(&adapter->dev, "PSL_FIR1: 0x%.16llx\nPSL_FIR2: 0x%.16llx\n", fir1, fir2); + + return IRQ_HANDLED; +} + +static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) +{ + ctx->dsisr = dsisr; + ctx->dar = dar; + schedule_work(&ctx->fault_work); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) +{ + struct cxl_context *ctx = data; + u64 dsisr, dar; + + dsisr = irq_info->dsisr; + dar = irq_info->dar; + + trace_cxl_psl_irq(ctx, irq, dsisr, dar); + + pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); + + if (dsisr & CXL_PSL_DSISR_An_DS) { + /* + * We don't inherently need to sleep to handle this, but we do + * need to get a ref to the task's mm, which we can't do from + * irq context without the potential for a deadlock since it + * takes the task_lock. An alternate option would be to keep a + * reference to the task's mm the entire time it has cxl open, + * but to do that we need to solve the issue where we hold a + * ref to the mm, but the mm can hold a ref to the fd after an + * mmap preventing anything from being cleaned up. + */ + pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + + if (dsisr & CXL_PSL_DSISR_An_M) + pr_devel("CXL interrupt: PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_P) + pr_devel("CXL interrupt: Storage protection violation\n"); + if (dsisr & CXL_PSL_DSISR_An_A) + pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n"); + if (dsisr & CXL_PSL_DSISR_An_S) + pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n"); + if (dsisr & CXL_PSL_DSISR_An_K) + pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n"); + + if (dsisr & CXL_PSL_DSISR_An_DM) { + /* + * In some cases we might be able to handle the fault + * immediately if hash_page would succeed, but we still need + * the task's mm, which as above we can't get without a lock + */ + pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + if (dsisr & CXL_PSL_DSISR_An_ST) + WARN(1, "CXL interrupt: Segment Table PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_UR) + pr_devel("CXL interrupt: AURP PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_PE) + return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); + if (dsisr & CXL_PSL_DSISR_An_AE) { + pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err); + + if (ctx->pending_afu_err) { + /* + * This shouldn't happen - the PSL treats these errors + * as fatal and will have reset the AFU, so there's not + * much point buffering multiple AFU errors. + * OTOH if we DO ever see a storm of these come in it's + * probably best that we log them somewhere: + */ + dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " + "undelivered to pe %i: %.llx\n", + ctx->pe, irq_info->afu_err); + } else { + spin_lock(&ctx->lock); + ctx->afu_err = irq_info->afu_err; + ctx->pending_afu_err = 1; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + } + + cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + return IRQ_HANDLED; + } + if (dsisr & CXL_PSL_DSISR_An_OC) + pr_devel("CXL interrupt: OS Context Warning\n"); + + WARN(1, "Unhandled CXL PSL IRQ\n"); + return IRQ_HANDLED; +} + +static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq_multiplexed(int irq, void *data) +{ + struct cxl_afu *afu = data; + struct cxl_context *ctx; + struct cxl_irq_info irq_info; + int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; + int ret; + + if ((ret = cxl_get_irq(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); + return fail_psl_irq(afu, &irq_info); + } + + rcu_read_lock(); + ctx = idr_find(&afu->contexts_idr, ph); + if (ctx) { + ret = cxl_irq(irq, ctx, &irq_info); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR" + " %.16llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + return fail_psl_irq(afu, &irq_info); +} + +static irqreturn_t cxl_irq_afu(int irq, void *data) +{ + struct cxl_context *ctx = data; + irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); + int irq_off, afu_irq = 1; + __u16 range; + int r; + + for (r = 1; r < CXL_IRQ_RANGES; r++) { + irq_off = hwirq - ctx->irqs.offset[r]; + range = ctx->irqs.range[r]; + if (irq_off >= 0 && irq_off < range) { + afu_irq += irq_off; + break; + } + afu_irq += range; + } + if (unlikely(r >= CXL_IRQ_RANGES)) { + WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", + ctx->pe, irq, hwirq); + return IRQ_HANDLED; + } + + trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq); + pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n", + afu_irq, ctx->pe, irq, hwirq); + + if (unlikely(!ctx->irq_bitmap)) { + WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n"); + return IRQ_HANDLED; + } + spin_lock(&ctx->lock); + set_bit(afu_irq - 1, ctx->irq_bitmap); + ctx->pending_irq = true; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + + return IRQ_HANDLED; +} + +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie, const char *name) +{ + unsigned int virq; + int result; + + /* IRQ Domain? */ + virq = irq_create_mapping(NULL, hwirq); + if (!virq) { + dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n"); + return 0; + } + + cxl_setup_irq(adapter, hwirq, virq); + + pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); + + result = request_irq(virq, handler, 0, name, cookie); + if (result) { + dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); + return 0; + } + + return virq; +} + +void cxl_unmap_irq(unsigned int virq, void *cookie) +{ + free_irq(virq, cookie); + irq_dispose_mapping(virq); +} + +static int cxl_register_one_irq(struct cxl *adapter, + irq_handler_t handler, + void *cookie, + irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, + const char *name) +{ + int hwirq, virq; + + if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) + return hwirq; + + if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) + goto err; + + *dest_hwirq = hwirq; + *dest_virq = virq; + + return 0; + +err: + cxl_release_one_irq(adapter, hwirq); + return -ENOMEM; +} + +int cxl_register_psl_err_irq(struct cxl *adapter) +{ + int rc; + + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, + &adapter->err_hwirq, + &adapter->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; + return rc; + } + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); + + return 0; +} + +void cxl_release_psl_err_irq(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + cxl_unmap_irq(adapter->err_virq, adapter); + cxl_release_one_irq(adapter, adapter->err_hwirq); + kfree(adapter->irq_name); +} + +int cxl_register_serr_irq(struct cxl_afu *afu) +{ + u64 serr; + int rc; + + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, + &afu->serr_hwirq, + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return rc; + } + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return 0; +} + +void cxl_release_serr_irq(struct cxl_afu *afu) +{ + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); + cxl_unmap_irq(afu->serr_virq, afu); + cxl_release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); +} + +int cxl_register_psl_irq(struct cxl_afu *afu) +{ + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, + &afu->psl_hwirq, &afu->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; +} + +void cxl_release_psl_irq(struct cxl_afu *afu) +{ + cxl_unmap_irq(afu->psl_virq, afu); + cxl_release_one_irq(afu->adapter, afu->psl_hwirq); + kfree(afu->psl_irq_name); +} + +void afu_irq_name_free(struct cxl_context *ctx) +{ + struct cxl_irq_name *irq_name, *tmp; + + list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { + kfree(irq_name->name); + list_del(&irq_name->list); + kfree(irq_name); + } +} + +int afu_register_irqs(struct cxl_context *ctx, u32 count) +{ + irq_hw_number_t hwirq; + int rc, r, i, j = 1; + struct cxl_irq_name *irq_name; + + if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) + return rc; + + /* Multiplexed PSL Interrupt */ + ctx->irqs.offset[0] = ctx->afu->psl_hwirq; + ctx->irqs.range[0] = 1; + + ctx->irq_count = count; + ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), + sizeof(*ctx->irq_bitmap), GFP_KERNEL); + if (!ctx->irq_bitmap) + return -ENOMEM; + + /* + * Allocate names first. If any fail, bail out before allocating + * actual hardware IRQs. + */ + INIT_LIST_HEAD(&ctx->irq_names); + for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; i++) { + irq_name = kmalloc(sizeof(struct cxl_irq_name), + GFP_KERNEL); + if (!irq_name) + goto out; + irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", + dev_name(&ctx->afu->dev), + ctx->pe, j); + if (!irq_name->name) { + kfree(irq_name); + goto out; + } + /* Add to tail so next look get the correct order */ + list_add_tail(&irq_name->list, &ctx->irq_names); + j++; + } + } + + /* We've allocated all memory now, so let's do the irq allocations */ + irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); + for (r = 1; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + cxl_map_irq(ctx->afu->adapter, hwirq, + cxl_irq_afu, ctx, irq_name->name); + irq_name = list_next_entry(irq_name, list); + } + } + + return 0; + +out: + afu_irq_name_free(ctx); + return -ENOMEM; +} + +void afu_release_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + for (r = 1; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, ctx); + } + } + + afu_irq_name_free(ctx); + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} diff --git a/kernel/drivers/misc/cxl/main.c b/kernel/drivers/misc/cxl/main.c new file mode 100644 index 000000000..8ccddceea --- /dev/null +++ b/kernel/drivers/misc/cxl/main.c @@ -0,0 +1,232 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include <linux/pci.h> +#include <asm/cputable.h> +#include <misc/cxl.h> + +#include "cxl.h" +#include "trace.h" + +static DEFINE_SPINLOCK(adapter_idr_lock); +static DEFINE_IDR(cxl_adapter_idr); + +uint cxl_verbose; +module_param_named(verbose, cxl_verbose, uint, 0600); +MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); + +static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) +{ + struct task_struct *task; + unsigned long flags; + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("%s unable to get task %i\n", + __func__, pid_nr(ctx->pid)); + return; + } + + if (task->mm != mm) + goto out_put; + + pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__, + ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe); + + spin_lock_irqsave(&ctx->sste_lock, flags); + trace_cxl_slbia(ctx); + memset(ctx->sstp, 0, ctx->sst_size); + spin_unlock_irqrestore(&ctx->sste_lock, flags); + mb(); + cxl_afu_slbia(ctx->afu); +out_put: + put_task_struct(task); +} + +static inline void cxl_slbia_core(struct mm_struct *mm) +{ + struct cxl *adapter; + struct cxl_afu *afu; + struct cxl_context *ctx; + int card, slice, id; + + pr_devel("%s called\n", __func__); + + spin_lock(&adapter_idr_lock); + idr_for_each_entry(&cxl_adapter_idr, adapter, card) { + /* XXX: Make this lookup faster with link from mm to ctx */ + spin_lock(&adapter->afu_list_lock); + for (slice = 0; slice < adapter->slices; slice++) { + afu = adapter->afu[slice]; + if (!afu->enabled) + continue; + rcu_read_lock(); + idr_for_each_entry(&afu->contexts_idr, ctx, id) + _cxl_slbia(ctx, mm); + rcu_read_unlock(); + } + spin_unlock(&adapter->afu_list_lock); + } + spin_unlock(&adapter_idr_lock); +} + +static struct cxl_calls cxl_calls = { + .cxl_slbia = cxl_slbia_core, + .owner = THIS_MODULE, +}; + +int cxl_alloc_sst(struct cxl_context *ctx) +{ + unsigned long vsid; + u64 ea_mask, size, sstp0, sstp1; + + sstp0 = 0; + sstp1 = 0; + + ctx->sst_size = PAGE_SIZE; + ctx->sst_lru = 0; + ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL); + if (!ctx->sstp) { + pr_err("cxl_alloc_sst: Unable to allocate segment table\n"); + return -ENOMEM; + } + pr_devel("SSTP allocated at 0x%p\n", ctx->sstp); + + vsid = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12; + + sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT; + sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50; + + size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT; + if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) { + WARN(1, "Impossible segment table size\n"); + return -EINVAL; + } + sstp0 |= size; + + if (mmu_kernel_ssize == MMU_SEGSIZE_256M) + ea_mask = 0xfffff00ULL; + else + ea_mask = 0xffffffff00ULL; + + sstp0 |= vsid >> (50-14); /* Top 14 bits of VSID */ + sstp1 |= (vsid << (64-(50-14))) & ~ea_mask; + sstp1 |= (u64)ctx->sstp & ea_mask; + sstp1 |= CXL_SSTP1_An_V; + + pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n", + (u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1); + + /* Store calculated sstp hardware points for use later */ + ctx->sstp0 = sstp0; + ctx->sstp1 = sstp1; + + return 0; +} + +/* Find a CXL adapter by it's number and increase it's refcount */ +struct cxl *get_cxl_adapter(int num) +{ + struct cxl *adapter; + + spin_lock(&adapter_idr_lock); + if ((adapter = idr_find(&cxl_adapter_idr, num))) + get_device(&adapter->dev); + spin_unlock(&adapter_idr_lock); + + return adapter; +} + +int cxl_alloc_adapter_nr(struct cxl *adapter) +{ + int i; + + idr_preload(GFP_KERNEL); + spin_lock(&adapter_idr_lock); + i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT); + spin_unlock(&adapter_idr_lock); + idr_preload_end(); + if (i < 0) + return i; + + adapter->adapter_num = i; + + return 0; +} + +void cxl_remove_adapter_nr(struct cxl *adapter) +{ + idr_remove(&cxl_adapter_idr, adapter->adapter_num); +} + +int cxl_afu_select_best_mode(struct cxl_afu *afu) +{ + if (afu->modes_supported & CXL_MODE_DIRECTED) + return cxl_afu_activate_mode(afu, CXL_MODE_DIRECTED); + + if (afu->modes_supported & CXL_MODE_DEDICATED) + return cxl_afu_activate_mode(afu, CXL_MODE_DEDICATED); + + dev_warn(&afu->dev, "No supported programming modes available\n"); + /* We don't fail this so the user can inspect sysfs */ + return 0; +} + +static int __init init_cxl(void) +{ + int rc = 0; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EPERM; + + if ((rc = cxl_file_init())) + return rc; + + cxl_debugfs_init(); + + if ((rc = register_cxl_calls(&cxl_calls))) + goto err; + + if ((rc = pci_register_driver(&cxl_pci_driver))) + goto err1; + + return 0; +err1: + unregister_cxl_calls(&cxl_calls); +err: + cxl_debugfs_exit(); + cxl_file_exit(); + + return rc; +} + +static void exit_cxl(void) +{ + pci_unregister_driver(&cxl_pci_driver); + + cxl_debugfs_exit(); + cxl_file_exit(); + unregister_cxl_calls(&cxl_calls); +} + +module_init(init_cxl); +module_exit(exit_cxl); + +MODULE_DESCRIPTION("IBM Coherent Accelerator"); +MODULE_AUTHOR("Ian Munsie <imunsie@au1.ibm.com>"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/cxl/native.c b/kernel/drivers/misc/cxl/native.c new file mode 100644 index 000000000..29185fc61 --- /dev/null +++ b/kernel/drivers/misc/cxl/native.c @@ -0,0 +1,704 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/mutex.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <asm/synch.h> +#include <misc/cxl.h> + +#include "cxl.h" +#include "trace.h" + +static int afu_control(struct cxl_afu *afu, u64 command, + u64 result, u64 mask, bool enabled) +{ + u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + spin_lock(&afu->afu_cntl_lock); + pr_devel("AFU command starting: %llx\n", command); + + trace_cxl_afu_ctrl(afu, command); + + cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command); + + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + while ((AFU_Cntl & mask) != result) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: AFU control timed out!\n"); + rc = -EBUSY; + goto out; + } + pr_devel_ratelimited("AFU control... (0x%.16llx)\n", + AFU_Cntl | command); + cpu_relax(); + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + }; + pr_devel("AFU command complete: %llx\n", command); + afu->enabled = enabled; +out: + trace_cxl_afu_ctrl_done(afu, command, rc); + spin_unlock(&afu->afu_cntl_lock); + + return rc; +} + +static int afu_enable(struct cxl_afu *afu) +{ + pr_devel("AFU enable request\n"); + + return afu_control(afu, CXL_AFU_Cntl_An_E, + CXL_AFU_Cntl_An_ES_Enabled, + CXL_AFU_Cntl_An_ES_MASK, true); +} + +int cxl_afu_disable(struct cxl_afu *afu) +{ + pr_devel("AFU disable request\n"); + + return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled, + CXL_AFU_Cntl_An_ES_MASK, false); +} + +/* This will disable as well as reset */ +int cxl_afu_reset(struct cxl_afu *afu) +{ + pr_devel("AFU reset request\n"); + + return afu_control(afu, CXL_AFU_Cntl_An_RA, + CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, + CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, + false); +} + +static int afu_check_and_enable(struct cxl_afu *afu) +{ + if (afu->enabled) + return 0; + return afu_enable(afu); +} + +int cxl_psl_purge(struct cxl_afu *afu) +{ + u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + u64 dsisr, dar; + u64 start, end; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc); + + pr_devel("PSL purge request\n"); + + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + WARN(1, "psl_purge request while AFU not disabled!\n"); + cxl_afu_disable(afu); + } + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, + PSL_CNTL | CXL_PSL_SCNTL_An_Pc); + start = local_clock(); + PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + while ((PSL_CNTL & CXL_PSL_SCNTL_An_Ps_MASK) + == CXL_PSL_SCNTL_An_Ps_Pending) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n"); + rc = -EBUSY; + goto out; + } + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr); + if (dsisr & CXL_PSL_DSISR_TRANS) { + dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar); + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + } else if (dsisr) { + dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr); + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } else { + cpu_relax(); + } + PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + }; + end = local_clock(); + pr_devel("PSL purged in %lld ns\n", end - start); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, + PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc); +out: + trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc); + return rc; +} + +static int spa_max_procs(int spa_size) +{ + /* + * From the CAIA: + * end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255 + * Most of that junk is really just an overly-complicated way of saying + * the last 256 bytes are __aligned(128), so it's really: + * end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255 + * and + * end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1 + * so + * sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256 + * Ignore the alignment (which is safe in this case as long as we are + * careful with our rounding) and solve for n: + */ + return ((spa_size / 8) - 96) / 17; +} + +static int alloc_spa(struct cxl_afu *afu) +{ + u64 spap; + + /* Work out how many pages to allocate */ + afu->spa_order = 0; + do { + afu->spa_order++; + afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE; + afu->spa_max_procs = spa_max_procs(afu->spa_size); + } while (afu->spa_max_procs < afu->num_procs); + + WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */ + + if (!(afu->spa = (struct cxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) { + pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n"); + return -ENOMEM; + } + pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", + 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs); + + afu->sw_command_status = (__be64 *)((char *)afu->spa + + ((afu->spa_max_procs + 3) * 128)); + + spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr; + spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; + spap |= CXL_PSL_SPAP_V; + pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); + cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); + + return 0; +} + +static void release_spa(struct cxl_afu *afu) +{ + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); + free_pages((unsigned long) afu->spa, afu->spa_order); +} + +int cxl_tlb_slb_invalidate(struct cxl *adapter) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("CXL adapter wide TLBIA & SLBIA\n"); + + cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A); + + cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL); + while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); + return -EBUSY; + } + cpu_relax(); + } + + cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL); + while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); + return -EBUSY; + } + cpu_relax(); + } + return 0; +} + +int cxl_afu_slbia(struct cxl_afu *afu) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("cxl_afu_slbia issuing SLBIA command\n"); + cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); + while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); + return -EBUSY; + } + cpu_relax(); + } + return 0; +} + +static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1) +{ + int rc; + + /* 1. Disable SSTP by writing 0 to SSTP1[V] */ + cxl_p2n_write(afu, CXL_SSTP1_An, 0); + + /* 2. Invalidate all SLB entries */ + if ((rc = cxl_afu_slbia(afu))) + return rc; + + /* 3. Set SSTP0_An */ + cxl_p2n_write(afu, CXL_SSTP0_An, sstp0); + + /* 4. Set SSTP1_An */ + cxl_p2n_write(afu, CXL_SSTP1_An, sstp1); + + return 0; +} + +/* Using per slice version may improve performance here. (ie. SLBIA_An) */ +static void slb_invalid(struct cxl_context *ctx) +{ + struct cxl *adapter = ctx->afu->adapter; + u64 slbia; + + WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex)); + + cxl_p1_write(adapter, CXL_PSL_LBISEL, + ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) | + be32_to_cpu(ctx->elem->lpid)); + cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); + + while (1) { + slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); + if (!(slbia & CXL_TLB_SLB_P)) + break; + cpu_relax(); + } +} + +static int do_process_element_cmd(struct cxl_context *ctx, + u64 cmd, u64 pe_state) +{ + u64 state; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + trace_cxl_llcmd(ctx, cmd); + + WARN_ON(!ctx->afu->enabled); + + ctx->elem->software_state = cpu_to_be32(pe_state); + smp_wmb(); + *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); + smp_mb(); + cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); + while (1) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n"); + rc = -EBUSY; + goto out; + } + state = be64_to_cpup(ctx->afu->sw_command_status); + if (state == ~0ULL) { + pr_err("cxl: Error adding process element to AFU\n"); + rc = -1; + goto out; + } + if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK | CXL_SPA_SW_LINK_MASK)) == + (cmd | (cmd >> 16) | ctx->pe)) + break; + /* + * The command won't finish in the PSL if there are + * outstanding DSIs. Hence we need to yield here in + * case there are outstanding DSIs that we need to + * service. Tuning possiblity: we could wait for a + * while before sched + */ + schedule(); + + } +out: + trace_cxl_llcmd_done(ctx, cmd, rc); + return rc; +} + +static int add_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + mutex_lock(&ctx->afu->spa_mutex); + pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe); + if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V))) + ctx->pe_inserted = true; + pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->spa_mutex); + return rc; +} + +static int terminate_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + /* fast path terminate if it's already invalid */ + if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V))) + return rc; + + mutex_lock(&ctx->afu->spa_mutex); + pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + ctx->elem->software_state = 0; /* Remove Valid bit */ + pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->spa_mutex); + return rc; +} + +static int remove_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + mutex_lock(&ctx->afu->spa_mutex); + pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); + if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0))) + ctx->pe_inserted = false; + slb_invalid(ctx); + pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->spa_mutex); + + return rc; +} + + +static void assign_psn_space(struct cxl_context *ctx) +{ + if (!ctx->afu->pp_size || ctx->master) { + ctx->psn_phys = ctx->afu->psn_phys; + ctx->psn_size = ctx->afu->adapter->ps_size; + } else { + ctx->psn_phys = ctx->afu->psn_phys + + (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe); + ctx->psn_size = ctx->afu->pp_size; + } +} + +static int activate_afu_directed(struct cxl_afu *afu) +{ + int rc; + + dev_info(&afu->dev, "Activating AFU directed mode\n"); + + if (alloc_spa(afu)) + return -ENOMEM; + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); + cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); + cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); + + afu->current_mode = CXL_MODE_DIRECTED; + afu->num_procs = afu->max_procs_virtualised; + + if ((rc = cxl_chardev_m_afu_add(afu))) + return rc; + + if ((rc = cxl_sysfs_afu_m_add(afu))) + goto err; + + if ((rc = cxl_chardev_s_afu_add(afu))) + goto err1; + + return 0; +err1: + cxl_sysfs_afu_m_remove(afu); +err: + cxl_chardev_afu_remove(afu); + return rc; +} + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE) +#else +#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) +#endif + +static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) +{ + u64 sr; + int r, result; + + assign_psn_space(ctx); + + ctx->elem->ctxtime = 0; /* disable */ + ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + ctx->elem->haurp = 0; /* disable */ + ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1)); + + sr = 0; + if (ctx->master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + /* HV=0, PR=1, R=1 for userspace + * For kernel contexts: this would need to change + */ + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + set_endian(sr); + sr &= ~(CXL_PSL_SR_An_HV); + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + ctx->elem->common.pid = cpu_to_be32(current->pid); + ctx->elem->common.tid = 0; + ctx->elem->sr = cpu_to_be64(sr); + + ctx->elem->common.csrp = 0; /* disable */ + ctx->elem->common.aurp0 = 0; /* disable */ + ctx->elem->common.aurp1 = 0; /* disable */ + + cxl_prefault(ctx, wed); + + ctx->elem->common.sstp0 = cpu_to_be64(ctx->sstp0); + ctx->elem->common.sstp1 = cpu_to_be64(ctx->sstp1); + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); + ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); + } + + ctx->elem->common.amr = cpu_to_be64(amr); + ctx->elem->common.wed = cpu_to_be64(wed); + + /* first guy needs to enable */ + if ((result = afu_check_and_enable(ctx->afu))) + return result; + + add_process_element(ctx); + + return 0; +} + +static int deactivate_afu_directed(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating AFU directed mode\n"); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_sysfs_afu_m_remove(afu); + cxl_chardev_afu_remove(afu); + + cxl_afu_reset(afu); + cxl_afu_disable(afu); + cxl_psl_purge(afu); + + release_spa(afu); + + return 0; +} + +static int activate_dedicated_process(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Activating dedicated process mode\n"); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process); + + cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); + cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID)); + cxl_p1n_write(afu, CXL_HAURP_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1)); + + cxl_p2n_write(afu, CXL_CSRP_An, 0); /* disable */ + cxl_p2n_write(afu, CXL_AURP0_An, 0); /* disable */ + cxl_p2n_write(afu, CXL_AURP1_An, 0); /* disable */ + + afu->current_mode = CXL_MODE_DEDICATED; + afu->num_procs = 1; + + return cxl_chardev_d_afu_add(afu); +} + +static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_afu *afu = ctx->afu; + u64 sr; + int rc; + + sr = 0; + set_endian(sr); + if (ctx->master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32); + cxl_p1n_write(afu, CXL_PSL_SR_An, sr); + + if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1))) + return rc; + + cxl_prefault(ctx, wed); + + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, + (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | + (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | + (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | + ((u64)ctx->irqs.offset[3] & 0xffff)); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) + (((u64)ctx->irqs.range[0] & 0xffff) << 48) | + (((u64)ctx->irqs.range[1] & 0xffff) << 32) | + (((u64)ctx->irqs.range[2] & 0xffff) << 16) | + ((u64)ctx->irqs.range[3] & 0xffff)); + + cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); + + /* master only context for dedicated */ + assign_psn_space(ctx); + + if ((rc = cxl_afu_reset(afu))) + return rc; + + cxl_p2n_write(afu, CXL_PSL_WED_An, wed); + + return afu_enable(afu); +} + +static int deactivate_dedicated_process(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating dedicated process mode\n"); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_chardev_afu_remove(afu); + + return 0; +} + +int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode) +{ + if (mode == CXL_MODE_DIRECTED) + return deactivate_afu_directed(afu); + if (mode == CXL_MODE_DEDICATED) + return deactivate_dedicated_process(afu); + return 0; +} + +int cxl_afu_deactivate_mode(struct cxl_afu *afu) +{ + return _cxl_afu_deactivate_mode(afu, afu->current_mode); +} + +int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return activate_afu_directed(afu); + if (mode == CXL_MODE_DEDICATED) + return activate_dedicated_process(afu); + + return -EINVAL; +} + +int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +{ + ctx->kernel = kernel; + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return attach_afu_directed(ctx, wed, amr); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return attach_dedicated(ctx, wed, amr); + + return -EINVAL; +} + +static inline int detach_process_native_dedicated(struct cxl_context *ctx) +{ + cxl_afu_reset(ctx->afu); + cxl_afu_disable(ctx->afu); + cxl_psl_purge(ctx->afu); + return 0; +} + +static inline int detach_process_native_afu_directed(struct cxl_context *ctx) +{ + if (!ctx->pe_inserted) + return 0; + if (terminate_process_element(ctx)) + return -1; + if (remove_process_element(ctx)) + return -1; + + return 0; +} + +int cxl_detach_process(struct cxl_context *ctx) +{ + trace_cxl_detach(ctx); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return detach_process_native_dedicated(ctx); + + return detach_process_native_afu_directed(ctx); +} + +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) +{ + u64 pidtid; + + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); + pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An); + info->pid = pidtid >> 32; + info->tid = pidtid & 0xffffffff; + info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); + info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + + return 0; +} + +static void recover_psl_err(struct cxl_afu *afu, u64 errstat) +{ + u64 dsisr; + + pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat); + + /* Clear PSL_DSISR[PE] */ + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE); + + /* Write 1s to clear error status bits */ + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat); +} + +int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +{ + trace_cxl_psl_irq_ack(ctx, tfc); + if (tfc) + cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc); + if (psl_reset_mask) + recover_psl_err(ctx->afu, psl_reset_mask); + + return 0; +} + +int cxl_check_error(struct cxl_afu *afu) +{ + return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL); +} diff --git a/kernel/drivers/misc/cxl/pci.c b/kernel/drivers/misc/cxl/pci.c new file mode 100644 index 000000000..1ef016472 --- /dev/null +++ b/kernel/drivers/misc/cxl/pci.c @@ -0,0 +1,1113 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/pci_regs.h> +#include <linux/pci_ids.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/sort.h> +#include <linux/pci.h> +#include <linux/of.h> +#include <linux/delay.h> +#include <asm/opal.h> +#include <asm/msi_bitmap.h> +#include <asm/pci-bridge.h> /* for struct pci_controller */ +#include <asm/pnv-pci.h> +#include <asm/io.h> + +#include "cxl.h" + + +#define CXL_PCI_VSEC_ID 0x1280 +#define CXL_VSEC_MIN_SIZE 0x80 + +#define CXL_READ_VSEC_LENGTH(dev, vsec, dest) \ + { \ + pci_read_config_word(dev, vsec + 0x6, dest); \ + *dest >>= 4; \ + } +#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x8, dest) + +#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x9, dest) +#define CXL_STATUS_SECOND_PORT 0x80 +#define CXL_STATUS_MSI_X_FULL 0x40 +#define CXL_STATUS_MSI_X_SINGLE 0x20 +#define CXL_STATUS_FLASH_RW 0x08 +#define CXL_STATUS_FLASH_RO 0x04 +#define CXL_STATUS_LOADABLE_AFU 0x02 +#define CXL_STATUS_LOADABLE_PSL 0x01 +/* If we see these features we won't try to use the card */ +#define CXL_UNSUPPORTED_FEATURES \ + (CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE) + +#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xa, dest) +#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ + pci_write_config_byte(dev, vsec + 0xa, val) +#define CXL_VSEC_PROTOCOL_MASK 0xe0 +#define CXL_VSEC_PROTOCOL_1024TB 0x80 +#define CXL_VSEC_PROTOCOL_512TB 0x40 +#define CXL_VSEC_PROTOCOL_256TB 0x20 /* Power 8 uses this */ +#define CXL_VSEC_PROTOCOL_ENABLE 0x01 + +#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \ + pci_read_config_word(dev, vsec + 0xc, dest) +#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xe, dest) +#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xf, dest) +#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \ + pci_read_config_word(dev, vsec + 0x10, dest) + +#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x13, dest) +#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \ + pci_write_config_byte(dev, vsec + 0x13, val) +#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */ +#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */ +#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */ + +#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x20, dest) +#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x24, dest) +#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x28, dest) +#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x2c, dest) + + +/* This works a little different than the p1/p2 register accesses to make it + * easier to pull out individual fields */ +#define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off) +#define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) +#define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) + +#define AFUD_READ_INFO(afu) AFUD_READ(afu, 0x0) +#define AFUD_NUM_INTS_PER_PROC(val) EXTRACT_PPC_BITS(val, 0, 15) +#define AFUD_NUM_PROCS(val) EXTRACT_PPC_BITS(val, 16, 31) +#define AFUD_NUM_CRS(val) EXTRACT_PPC_BITS(val, 32, 47) +#define AFUD_MULTIMODE(val) EXTRACT_PPC_BIT(val, 48) +#define AFUD_PUSH_BLOCK_TRANSFER(val) EXTRACT_PPC_BIT(val, 55) +#define AFUD_DEDICATED_PROCESS(val) EXTRACT_PPC_BIT(val, 59) +#define AFUD_AFU_DIRECTED(val) EXTRACT_PPC_BIT(val, 61) +#define AFUD_TIME_SLICED(val) EXTRACT_PPC_BIT(val, 63) +#define AFUD_READ_CR(afu) AFUD_READ(afu, 0x20) +#define AFUD_CR_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_CR_OFF(afu) AFUD_READ(afu, 0x28) +#define AFUD_READ_PPPSA(afu) AFUD_READ(afu, 0x30) +#define AFUD_PPPSA_PP(val) EXTRACT_PPC_BIT(val, 6) +#define AFUD_PPPSA_PSA(val) EXTRACT_PPC_BIT(val, 7) +#define AFUD_PPPSA_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_PPPSA_OFF(afu) AFUD_READ(afu, 0x38) +#define AFUD_READ_EB(afu) AFUD_READ(afu, 0x40) +#define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) + +u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + + val = cxl_afu_cr_read32(afu, cr, aligned_off); + return (val >> ((off & 0x2) * 8)) & 0xffff; +} + +u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + + val = cxl_afu_cr_read32(afu, cr, aligned_off); + return (val >> ((off & 0x3) * 8)) & 0xff; +} + +static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = { + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, + { PCI_DEVICE_CLASS(0x120000, ~0), }, + + { } +}; +MODULE_DEVICE_TABLE(pci, cxl_pci_tbl); + + +/* + * Mostly using these wrappers to avoid confusion: + * priv 1 is BAR2, while priv 2 is BAR0 + */ +static inline resource_size_t p1_base(struct pci_dev *dev) +{ + return pci_resource_start(dev, 2); +} + +static inline resource_size_t p1_size(struct pci_dev *dev) +{ + return pci_resource_len(dev, 2); +} + +static inline resource_size_t p2_base(struct pci_dev *dev) +{ + return pci_resource_start(dev, 0); +} + +static inline resource_size_t p2_size(struct pci_dev *dev) +{ + return pci_resource_len(dev, 0); +} + +static int find_cxl_vsec(struct pci_dev *dev) +{ + int vsec = 0; + u16 val; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { + pci_read_config_word(dev, vsec + 0x4, &val); + if (val == CXL_PCI_VSEC_ID) + return vsec; + } + return 0; + +} + +static void dump_cxl_config_space(struct pci_dev *dev) +{ + int vsec; + u32 val; + + dev_info(&dev->dev, "dump_cxl_config_space\n"); + + pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val); + dev_info(&dev->dev, "BAR0: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val); + dev_info(&dev->dev, "BAR1: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val); + dev_info(&dev->dev, "BAR2: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val); + dev_info(&dev->dev, "BAR3: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val); + dev_info(&dev->dev, "BAR4: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val); + dev_info(&dev->dev, "BAR5: %#.8x\n", val); + + dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", + p1_base(dev), p1_size(dev)); + dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", + p1_base(dev), p2_size(dev)); + dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", + pci_resource_start(dev, 4), pci_resource_len(dev, 4)); + + if (!(vsec = find_cxl_vsec(dev))) + return; + +#define show_reg(name, what) \ + dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what) + + pci_read_config_dword(dev, vsec + 0x0, &val); + show_reg("Cap ID", (val >> 0) & 0xffff); + show_reg("Cap Ver", (val >> 16) & 0xf); + show_reg("Next Cap Ptr", (val >> 20) & 0xfff); + pci_read_config_dword(dev, vsec + 0x4, &val); + show_reg("VSEC ID", (val >> 0) & 0xffff); + show_reg("VSEC Rev", (val >> 16) & 0xf); + show_reg("VSEC Length", (val >> 20) & 0xfff); + pci_read_config_dword(dev, vsec + 0x8, &val); + show_reg("Num AFUs", (val >> 0) & 0xff); + show_reg("Status", (val >> 8) & 0xff); + show_reg("Mode Control", (val >> 16) & 0xff); + show_reg("Reserved", (val >> 24) & 0xff); + pci_read_config_dword(dev, vsec + 0xc, &val); + show_reg("PSL Rev", (val >> 0) & 0xffff); + show_reg("CAIA Ver", (val >> 16) & 0xffff); + pci_read_config_dword(dev, vsec + 0x10, &val); + show_reg("Base Image Rev", (val >> 0) & 0xffff); + show_reg("Reserved", (val >> 16) & 0x0fff); + show_reg("Image Control", (val >> 28) & 0x3); + show_reg("Reserved", (val >> 30) & 0x1); + show_reg("Image Loaded", (val >> 31) & 0x1); + + pci_read_config_dword(dev, vsec + 0x14, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x18, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x1c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x20, &val); + show_reg("AFU Descriptor Offset", val); + pci_read_config_dword(dev, vsec + 0x24, &val); + show_reg("AFU Descriptor Size", val); + pci_read_config_dword(dev, vsec + 0x28, &val); + show_reg("Problem State Offset", val); + pci_read_config_dword(dev, vsec + 0x2c, &val); + show_reg("Problem State Size", val); + + pci_read_config_dword(dev, vsec + 0x30, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x34, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x38, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x3c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x40, &val); + show_reg("PSL Programming Port", val); + pci_read_config_dword(dev, vsec + 0x44, &val); + show_reg("PSL Programming Control", val); + + pci_read_config_dword(dev, vsec + 0x48, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x4c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x50, &val); + show_reg("Flash Address Register", val); + pci_read_config_dword(dev, vsec + 0x54, &val); + show_reg("Flash Size Register", val); + pci_read_config_dword(dev, vsec + 0x58, &val); + show_reg("Flash Status/Control Register", val); + pci_read_config_dword(dev, vsec + 0x58, &val); + show_reg("Flash Data Port", val); + +#undef show_reg +} + +static void dump_afu_descriptor(struct cxl_afu *afu) +{ + u64 val; + +#define show_reg(name, what) \ + dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) + + val = AFUD_READ_INFO(afu); + show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val)); + show_reg("num_of_processes", AFUD_NUM_PROCS(val)); + show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); + show_reg("req_prog_mode", val & 0xffffULL); + + val = AFUD_READ(afu, 0x8); + show_reg("Reserved", val); + val = AFUD_READ(afu, 0x10); + show_reg("Reserved", val); + val = AFUD_READ(afu, 0x18); + show_reg("Reserved", val); + + val = AFUD_READ_CR(afu); + show_reg("Reserved", (val >> (63-7)) & 0xff); + show_reg("AFU_CR_len", AFUD_CR_LEN(val)); + + val = AFUD_READ_CR_OFF(afu); + show_reg("AFU_CR_offset", val); + + val = AFUD_READ_PPPSA(afu); + show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff); + show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val)); + + val = AFUD_READ_PPPSA_OFF(afu); + show_reg("PerProcessPSA_offset", val); + + val = AFUD_READ_EB(afu); + show_reg("Reserved", (val >> (63-7)) & 0xff); + show_reg("AFU_EB_len", AFUD_EB_LEN(val)); + + val = AFUD_READ_EB_OFF(afu); + show_reg("AFU_EB_offset", val); + +#undef show_reg +} + +static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +{ + struct device_node *np; + const __be32 *prop; + u64 psl_dsnctl; + u64 chipid; + + if (!(np = pnv_pci_get_phb_node(dev))) + return -ENODEV; + + while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL))) + np = of_get_next_parent(np); + if (!np) + return -ENODEV; + chipid = be32_to_cpup(prop); + of_node_put(np); + + /* Tell PSL where to route data to */ + psl_dsnctl = 0x02E8900002000000ULL | (chipid << (63-5)); + cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl); + cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); + /* snoop write mask */ + cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); + /* set fir_accum */ + cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL); + /* for debugging with trace arrays */ + cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); + + return 0; +} + +static int init_implementation_afu_regs(struct cxl_afu *afu) +{ + /* read/write masks for this slice */ + cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); + /* APC read/write masks for this slice */ + cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL); + /* for debugging with trace arrays */ + cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL); + cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S); + + return 0; +} + +int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, + unsigned int virq) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_ioda_msi_setup(dev, hwirq, virq); +} + +int cxl_update_image_control(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + int rc; + int vsec; + u8 image_state; + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) { + dev_err(&dev->dev, "failed to read image state: %i\n", rc); + return rc; + } + + if (adapter->perst_loads_image) + image_state |= CXL_VSEC_PERST_LOADS_IMAGE; + else + image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE; + + if (adapter->perst_select_user) + image_state |= CXL_VSEC_PERST_SELECT_USER; + else + image_state &= ~CXL_VSEC_PERST_SELECT_USER; + + if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) { + dev_err(&dev->dev, "failed to update image control: %i\n", rc); + return rc; + } + + return 0; +} + +int cxl_alloc_one_irq(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_alloc_hwirqs(dev, 1); +} + +void cxl_release_one_irq(struct cxl *adapter, int hwirq) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_release_hwirqs(dev, hwirq, 1); +} + +int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); +} + +void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + pnv_cxl_release_hwirq_ranges(irqs, dev); +} + +static int setup_cxl_bars(struct pci_dev *dev) +{ + /* Safety check in case we get backported to < 3.17 without M64 */ + if ((p1_base(dev) < 0x100000000ULL) || + (p2_base(dev) < 0x100000000ULL)) { + dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n"); + return -ENODEV; + } + + /* + * BAR 4/5 has a special meaning for CXL and must be programmed with a + * special value corresponding to the CXL protocol address range. + * For POWER 8 that means bits 48:49 must be set to 10 + */ + pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000); + + return 0; +} + +/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ +static int switch_card_to_cxl(struct pci_dev *dev) +{ + int vsec; + u8 val; + int rc; + + dev_info(&dev->dev, "switch card to CXL\n"); + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { + dev_err(&dev->dev, "failed to read current mode control: %i", rc); + return rc; + } + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { + dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); + return rc; + } + /* + * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching + * PCIe config space. + */ + msleep(100); + + return 0; +} + +static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +{ + u64 p1n_base, p2n_base, afu_desc; + const u64 p1n_size = 0x100; + const u64 p2n_size = 0x1000; + + p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); + p2n_base = p2_base(dev) + (afu->slice * p2n_size); + afu->psn_phys = p2_base(dev) + (adapter->ps_off + (afu->slice * adapter->ps_size)); + afu_desc = p2_base(dev) + adapter->afu_desc_off + (afu->slice * adapter->afu_desc_size); + + if (!(afu->p1n_mmio = ioremap(p1n_base, p1n_size))) + goto err; + if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) + goto err1; + if (afu_desc) { + if (!(afu->afu_desc_mmio = ioremap(afu_desc, adapter->afu_desc_size))) + goto err2; + } + + return 0; +err2: + iounmap(afu->p2n_mmio); +err1: + iounmap(afu->p1n_mmio); +err: + dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); + return -ENOMEM; +} + +static void cxl_unmap_slice_regs(struct cxl_afu *afu) +{ + if (afu->p1n_mmio) + iounmap(afu->p2n_mmio); + if (afu->p1n_mmio) + iounmap(afu->p1n_mmio); +} + +static void cxl_release_afu(struct device *dev) +{ + struct cxl_afu *afu = to_cxl_afu(dev); + + pr_devel("cxl_release_afu\n"); + + kfree(afu); +} + +static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) +{ + struct cxl_afu *afu; + + if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) + return NULL; + + afu->adapter = adapter; + afu->dev.parent = &adapter->dev; + afu->dev.release = cxl_release_afu; + afu->slice = slice; + idr_init(&afu->contexts_idr); + mutex_init(&afu->contexts_lock); + spin_lock_init(&afu->afu_cntl_lock); + mutex_init(&afu->spa_mutex); + + afu->prefault_mode = CXL_PREFAULT_NONE; + afu->irqs_max = afu->adapter->user_irqs; + + return afu; +} + +/* Expects AFU struct to have recently been zeroed out */ +static int cxl_read_afu_descriptor(struct cxl_afu *afu) +{ + u64 val; + + val = AFUD_READ_INFO(afu); + afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val); + afu->max_procs_virtualised = AFUD_NUM_PROCS(val); + afu->crs_num = AFUD_NUM_CRS(val); + + if (AFUD_AFU_DIRECTED(val)) + afu->modes_supported |= CXL_MODE_DIRECTED; + if (AFUD_DEDICATED_PROCESS(val)) + afu->modes_supported |= CXL_MODE_DEDICATED; + if (AFUD_TIME_SLICED(val)) + afu->modes_supported |= CXL_MODE_TIME_SLICED; + + val = AFUD_READ_PPPSA(afu); + afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; + afu->psa = AFUD_PPPSA_PSA(val); + if ((afu->pp_psa = AFUD_PPPSA_PP(val))) + afu->pp_offset = AFUD_READ_PPPSA_OFF(afu); + + val = AFUD_READ_CR(afu); + afu->crs_len = AFUD_CR_LEN(val) * 256; + afu->crs_offset = AFUD_READ_CR_OFF(afu); + + return 0; +} + +static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) +{ + int i; + + if (afu->psa && afu->adapter->ps_size < + (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { + dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); + return -ENODEV; + } + + if (afu->pp_psa && (afu->pp_size < PAGE_SIZE)) + dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!"); + + for (i = 0; i < afu->crs_num; i++) { + if ((cxl_afu_cr_read32(afu, i, 0) == 0)) { + dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); + return -EINVAL; + } + } + + return 0; +} + +static int sanitise_afu_regs(struct cxl_afu *afu) +{ + u64 reg; + + /* + * Clear out any regs that contain either an IVTE or address or may be + * waiting on an acknowledgement to try to be a bit safer as we bring + * it online + */ + reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg); + if (cxl_afu_reset(afu)) + return -EIO; + if (cxl_afu_disable(afu)) + return -EIO; + if (cxl_psl_purge(afu)) + return -EIO; + } + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); + reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending DSISR: %#.16llx\n", reg); + if (reg & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } + reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (reg) { + if (reg & ~0xffff) + dev_warn(&afu->dev, "AFU had pending SERR: %#.16llx\n", reg); + cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + } + reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending error status: %#.16llx\n", reg); + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); + } + + return 0; +} + +static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +{ + struct cxl_afu *afu; + bool free = true; + int rc; + + if (!(afu = cxl_alloc_afu(adapter, slice))) + return -ENOMEM; + + if ((rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice))) + goto err1; + + if ((rc = cxl_map_slice_regs(afu, adapter, dev))) + goto err1; + + if ((rc = sanitise_afu_regs(afu))) + goto err2; + + /* We need to reset the AFU before we can read the AFU descriptor */ + if ((rc = cxl_afu_reset(afu))) + goto err2; + + if (cxl_verbose) + dump_afu_descriptor(afu); + + if ((rc = cxl_read_afu_descriptor(afu))) + goto err2; + + if ((rc = cxl_afu_descriptor_looks_ok(afu))) + goto err2; + + if ((rc = init_implementation_afu_regs(afu))) + goto err2; + + if ((rc = cxl_register_serr_irq(afu))) + goto err2; + + if ((rc = cxl_register_psl_irq(afu))) + goto err3; + + /* Don't care if this fails */ + cxl_debugfs_afu_add(afu); + + /* + * After we call this function we must not free the afu directly, even + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) + goto err_put1; + + if ((rc = cxl_sysfs_afu_add(afu))) + goto err_put1; + + + if ((rc = cxl_afu_select_best_mode(afu))) + goto err_put2; + + adapter->afu[afu->slice] = afu; + + return 0; + +err_put2: + cxl_sysfs_afu_remove(afu); +err_put1: + device_unregister(&afu->dev); + free = false; + cxl_debugfs_afu_remove(afu); + cxl_release_psl_irq(afu); +err3: + cxl_release_serr_irq(afu); +err2: + cxl_unmap_slice_regs(afu); +err1: + if (free) + kfree(afu); + return rc; +} + +static void cxl_remove_afu(struct cxl_afu *afu) +{ + pr_devel("cxl_remove_afu\n"); + + if (!afu) + return; + + cxl_sysfs_afu_remove(afu); + cxl_debugfs_afu_remove(afu); + + spin_lock(&afu->adapter->afu_list_lock); + afu->adapter->afu[afu->slice] = NULL; + spin_unlock(&afu->adapter->afu_list_lock); + + cxl_context_detach_all(afu); + cxl_afu_deactivate_mode(afu); + + cxl_release_psl_irq(afu); + cxl_release_serr_irq(afu); + cxl_unmap_slice_regs(afu); + + device_unregister(&afu->dev); +} + +int cxl_reset(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + int rc; + int i; + u32 val; + + dev_info(&dev->dev, "CXL reset\n"); + + for (i = 0; i < adapter->slices; i++) + cxl_remove_afu(adapter->afu[i]); + + /* pcie_warm_reset requests a fundamental pci reset which includes a + * PERST assert/deassert. PERST triggers a loading of the image + * if "user" or "factory" is selected in sysfs */ + if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) { + dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n"); + return rc; + } + + /* the PERST done above fences the PHB. So, reset depends on EEH + * to unbind the driver, tell Sapphire to reinit the PHB, and rebind + * the driver. Do an mmio read explictly to ensure EEH notices the + * fenced PHB. Retry for a few seconds before giving up. */ + i = 0; + while (((val = mmio_read32be(adapter->p1_mmio)) != 0xffffffff) && + (i < 5)) { + msleep(500); + i++; + } + + if (val != 0xffffffff) + dev_err(&dev->dev, "cxl: PERST failed to trigger EEH\n"); + + return rc; +} + +static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +{ + if (pci_request_region(dev, 2, "priv 2 regs")) + goto err1; + if (pci_request_region(dev, 0, "priv 1 regs")) + goto err2; + + pr_devel("cxl_map_adapter_regs: p1: %#.16llx %#llx, p2: %#.16llx %#llx", + p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); + + if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) + goto err3; + + if (!(adapter->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) + goto err4; + + return 0; + +err4: + iounmap(adapter->p1_mmio); + adapter->p1_mmio = NULL; +err3: + pci_release_region(dev, 0); +err2: + pci_release_region(dev, 2); +err1: + return -ENOMEM; +} + +static void cxl_unmap_adapter_regs(struct cxl *adapter) +{ + if (adapter->p1_mmio) + iounmap(adapter->p1_mmio); + if (adapter->p2_mmio) + iounmap(adapter->p2_mmio); +} + +static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) +{ + int vsec; + u32 afu_desc_off, afu_desc_size; + u32 ps_off, ps_size; + u16 vseclen; + u8 image_state; + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&adapter->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); + if (vseclen < CXL_VSEC_MIN_SIZE) { + pr_err("ABORTING: CXL VSEC too short\n"); + return -EINVAL; + } + + CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status); + CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev); + CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major); + CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor); + CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); + CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); + adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); + adapter->perst_loads_image = true; + adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); + + CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); + CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off); + CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size); + CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off); + CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size); + + /* Convert everything to bytes, because there is NO WAY I'd look at the + * code a month later and forget what units these are in ;-) */ + adapter->ps_off = ps_off * 64 * 1024; + adapter->ps_size = ps_size * 64 * 1024; + adapter->afu_desc_off = afu_desc_off * 64 * 1024; + adapter->afu_desc_size = afu_desc_size *64 * 1024; + + /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ + adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; + + return 0; +} + +static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) +{ + if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) + return -EBUSY; + + if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { + dev_err(&adapter->dev, "ABORTING: CXL requires unsupported features\n"); + return -EINVAL; + } + + if (!adapter->slices) { + /* Once we support dynamic reprogramming we can use the card if + * it supports loadable AFUs */ + dev_err(&adapter->dev, "ABORTING: Device has no AFUs\n"); + return -EINVAL; + } + + if (!adapter->afu_desc_off || !adapter->afu_desc_size) { + dev_err(&adapter->dev, "ABORTING: VSEC shows no AFU descriptors\n"); + return -EINVAL; + } + + if (adapter->ps_size > p2_size(dev) - adapter->ps_off) { + dev_err(&adapter->dev, "ABORTING: Problem state size larger than " + "available in BAR2: 0x%llx > 0x%llx\n", + adapter->ps_size, p2_size(dev) - adapter->ps_off); + return -EINVAL; + } + + return 0; +} + +static void cxl_release_adapter(struct device *dev) +{ + struct cxl *adapter = to_cxl_adapter(dev); + + pr_devel("cxl_release_adapter\n"); + + kfree(adapter); +} + +static struct cxl *cxl_alloc_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + + if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) + return NULL; + + adapter->dev.parent = &dev->dev; + adapter->dev.release = cxl_release_adapter; + pci_set_drvdata(dev, adapter); + spin_lock_init(&adapter->afu_list_lock); + + return adapter; +} + +static int sanitise_adapter_regs(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + return cxl_tlb_slb_invalidate(adapter); +} + +static struct cxl *cxl_init_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + bool free = true; + int rc; + + + if (!(adapter = cxl_alloc_adapter(dev))) + return ERR_PTR(-ENOMEM); + + if ((rc = switch_card_to_cxl(dev))) + goto err1; + + if ((rc = cxl_alloc_adapter_nr(adapter))) + goto err1; + + if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))) + goto err2; + + if ((rc = cxl_read_vsec(adapter, dev))) + goto err2; + + if ((rc = cxl_vsec_looks_ok(adapter, dev))) + goto err2; + + if ((rc = cxl_update_image_control(adapter))) + goto err2; + + if ((rc = cxl_map_adapter_regs(adapter, dev))) + goto err2; + + if ((rc = sanitise_adapter_regs(adapter))) + goto err2; + + if ((rc = init_implementation_adapter_regs(adapter, dev))) + goto err3; + + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) + goto err3; + + /* If recovery happened, the last step is to turn on snooping. + * In the non-recovery case this has no effect */ + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) { + goto err3; + } + + if ((rc = cxl_register_psl_err_irq(adapter))) + goto err3; + + /* Don't care if this one fails: */ + cxl_debugfs_adapter_add(adapter); + + /* + * After we call this function we must not free the adapter directly, + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) + goto err_put1; + + if ((rc = cxl_sysfs_adapter_add(adapter))) + goto err_put1; + + return adapter; + +err_put1: + device_unregister(&adapter->dev); + free = false; + cxl_debugfs_adapter_remove(adapter); + cxl_release_psl_err_irq(adapter); +err3: + cxl_unmap_adapter_regs(adapter); +err2: + cxl_remove_adapter_nr(adapter); +err1: + if (free) + kfree(adapter); + return ERR_PTR(rc); +} + +static void cxl_remove_adapter(struct cxl *adapter) +{ + struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + + pr_devel("cxl_release_adapter\n"); + + cxl_sysfs_adapter_remove(adapter); + cxl_debugfs_adapter_remove(adapter); + cxl_release_psl_err_irq(adapter); + cxl_unmap_adapter_regs(adapter); + cxl_remove_adapter_nr(adapter); + + device_unregister(&adapter->dev); + + pci_release_region(pdev, 0); + pci_release_region(pdev, 2); + pci_disable_device(pdev); +} + +static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct cxl *adapter; + int slice; + int rc; + + pci_dev_get(dev); + + if (cxl_verbose) + dump_cxl_config_space(dev); + + if ((rc = setup_cxl_bars(dev))) + return rc; + + if ((rc = pci_enable_device(dev))) { + dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); + return rc; + } + + adapter = cxl_init_adapter(dev); + if (IS_ERR(adapter)) { + dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); + return PTR_ERR(adapter); + } + + for (slice = 0; slice < adapter->slices; slice++) { + if ((rc = cxl_init_afu(adapter, slice, dev))) + dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); + } + + return 0; +} + +static void cxl_remove(struct pci_dev *dev) +{ + struct cxl *adapter = pci_get_drvdata(dev); + int afu; + + dev_warn(&dev->dev, "pci remove\n"); + + /* + * Lock to prevent someone grabbing a ref through the adapter list as + * we are removing it + */ + for (afu = 0; afu < adapter->slices; afu++) + cxl_remove_afu(adapter->afu[afu]); + cxl_remove_adapter(adapter); +} + +struct pci_driver cxl_pci_driver = { + .name = "cxl-pci", + .id_table = cxl_pci_tbl, + .probe = cxl_probe, + .remove = cxl_remove, +}; diff --git a/kernel/drivers/misc/cxl/sysfs.c b/kernel/drivers/misc/cxl/sysfs.c new file mode 100644 index 000000000..d0c38c7bc --- /dev/null +++ b/kernel/drivers/misc/cxl/sysfs.c @@ -0,0 +1,601 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/pci_regs.h> + +#include "cxl.h" + +#define to_afu_chardev_m(d) dev_get_drvdata(d) + +/********* Adapter attributes **********************************************/ + +static ssize_t caia_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major, + adapter->caia_minor); +} + +static ssize_t psl_revision_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev); +} + +static ssize_t base_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image); +} + +static ssize_t image_loaded_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + if (adapter->user_image_loaded) + return scnprintf(buf, PAGE_SIZE, "user\n"); + return scnprintf(buf, PAGE_SIZE, "factory\n"); +} + +static ssize_t reset_adapter_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || (val != 1)) + return -EINVAL; + + if ((rc = cxl_reset(adapter))) + return rc; + return count; +} + +static ssize_t load_image_on_perst_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + if (!adapter->perst_loads_image) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + if (adapter->perst_select_user) + return scnprintf(buf, PAGE_SIZE, "user\n"); + return scnprintf(buf, PAGE_SIZE, "factory\n"); +} + +static ssize_t load_image_on_perst_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + + if (!strncmp(buf, "none", 4)) + adapter->perst_loads_image = false; + else if (!strncmp(buf, "user", 4)) { + adapter->perst_select_user = true; + adapter->perst_loads_image = true; + } else if (!strncmp(buf, "factory", 7)) { + adapter->perst_select_user = false; + adapter->perst_loads_image = true; + } else + return -EINVAL; + + if ((rc = cxl_update_image_control(adapter))) + return rc; + + return count; +} + +static struct device_attribute adapter_attrs[] = { + __ATTR_RO(caia_version), + __ATTR_RO(psl_revision), + __ATTR_RO(base_image), + __ATTR_RO(image_loaded), + __ATTR_RW(load_image_on_perst), + __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), +}; + + +/********* AFU master specific attributes **********************************/ + +static ssize_t mmio_size_show_master(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); +} + +static ssize_t pp_mmio_off_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_offset); +} + +static ssize_t pp_mmio_len_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); +} + +static struct device_attribute afu_master_attrs[] = { + __ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL), + __ATTR_RO(pp_mmio_off), + __ATTR_RO(pp_mmio_len), +}; + + +/********* AFU attributes **************************************************/ + +static ssize_t mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + if (afu->pp_size) + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); +} + +static ssize_t reset_store_afu(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + int rc; + + /* Not safe to reset if it is currently in use */ + mutex_lock(&afu->contexts_lock); + if (!idr_is_empty(&afu->contexts_idr)) { + rc = -EBUSY; + goto err; + } + + if ((rc = cxl_afu_reset(afu))) + goto err; + + rc = count; +err: + mutex_unlock(&afu->contexts_lock); + return rc; +} + +static ssize_t irqs_min_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs); +} + +static ssize_t irqs_max_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max); +} + +static ssize_t irqs_max_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + ssize_t ret; + int irqs_max; + + ret = sscanf(buf, "%i", &irqs_max); + if (ret != 1) + return -EINVAL; + + if (irqs_max < afu->pp_irqs) + return -EINVAL; + + if (irqs_max > afu->adapter->user_irqs) + return -EINVAL; + + afu->irqs_max = irqs_max; + return count; +} + +static ssize_t modes_supported_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + char *p = buf, *end = buf + PAGE_SIZE; + + if (afu->modes_supported & CXL_MODE_DEDICATED) + p += scnprintf(p, end - p, "dedicated_process\n"); + if (afu->modes_supported & CXL_MODE_DIRECTED) + p += scnprintf(p, end - p, "afu_directed\n"); + return (p - buf); +} + +static ssize_t prefault_mode_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + switch (afu->prefault_mode) { + case CXL_PREFAULT_WED: + return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n"); + case CXL_PREFAULT_ALL: + return scnprintf(buf, PAGE_SIZE, "all\n"); + default: + return scnprintf(buf, PAGE_SIZE, "none\n"); + } +} + +static ssize_t prefault_mode_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + enum prefault_modes mode = -1; + + if (!strncmp(buf, "work_element_descriptor", 23)) + mode = CXL_PREFAULT_WED; + if (!strncmp(buf, "all", 3)) + mode = CXL_PREFAULT_ALL; + if (!strncmp(buf, "none", 4)) + mode = CXL_PREFAULT_NONE; + + if (mode == -1) + return -EINVAL; + + afu->prefault_mode = mode; + return count; +} + +static ssize_t mode_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + if (afu->current_mode == CXL_MODE_DEDICATED) + return scnprintf(buf, PAGE_SIZE, "dedicated_process\n"); + if (afu->current_mode == CXL_MODE_DIRECTED) + return scnprintf(buf, PAGE_SIZE, "afu_directed\n"); + return scnprintf(buf, PAGE_SIZE, "none\n"); +} + +static ssize_t mode_store(struct device *device, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + int old_mode, mode = -1; + int rc = -EBUSY; + + /* can't change this if we have a user */ + mutex_lock(&afu->contexts_lock); + if (!idr_is_empty(&afu->contexts_idr)) + goto err; + + if (!strncmp(buf, "dedicated_process", 17)) + mode = CXL_MODE_DEDICATED; + if (!strncmp(buf, "afu_directed", 12)) + mode = CXL_MODE_DIRECTED; + if (!strncmp(buf, "none", 4)) + mode = 0; + + if (mode == -1) { + rc = -EINVAL; + goto err; + } + + /* + * cxl_afu_deactivate_mode needs to be done outside the lock, prevent + * other contexts coming in before we are ready: + */ + old_mode = afu->current_mode; + afu->current_mode = 0; + afu->num_procs = 0; + + mutex_unlock(&afu->contexts_lock); + + if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) + return rc; + if ((rc = cxl_afu_activate_mode(afu, mode))) + return rc; + + return count; +err: + mutex_unlock(&afu->contexts_lock); + return rc; +} + +static ssize_t api_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION); +} + +static ssize_t api_version_compatible_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE); +} + +static struct device_attribute afu_attrs[] = { + __ATTR_RO(mmio_size), + __ATTR_RO(irqs_min), + __ATTR_RW(irqs_max), + __ATTR_RO(modes_supported), + __ATTR_RW(mode), + __ATTR_RW(prefault_mode), + __ATTR_RO(api_version), + __ATTR_RO(api_version_compatible), + __ATTR(reset, S_IWUSR, NULL, reset_store_afu), +}; + +int cxl_sysfs_adapter_add(struct cxl *adapter) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { + if ((rc = device_create_file(&adapter->dev, &adapter_attrs[i]))) + goto err; + } + return 0; +err: + for (i--; i >= 0; i--) + device_remove_file(&adapter->dev, &adapter_attrs[i]); + return rc; +} +void cxl_sysfs_adapter_remove(struct cxl *adapter) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) + device_remove_file(&adapter->dev, &adapter_attrs[i]); +} + +struct afu_config_record { + struct kobject kobj; + struct bin_attribute config_attr; + struct list_head list; + int cr; + u16 device; + u16 vendor; + u32 class; +}; + +#define to_cr(obj) container_of(obj, struct afu_config_record, kobj) + +static ssize_t vendor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor); +} + +static ssize_t device_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device); +} + +static ssize_t class_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class); +} + +static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct afu_config_record *cr = to_cr(kobj); + struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj)); + + u64 i, j, val, size = afu->crs_len; + + if (off > size) + return 0; + if (off + count > size) + count = size - off; + + for (i = 0; i < count;) { + val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7); + for (j = off & 0x7; j < 8 && i < count; i++, j++, off++) + buf[i] = (val >> (j * 8)) & 0xff; + } + + return count; +} + +static struct kobj_attribute vendor_attribute = + __ATTR_RO(vendor); +static struct kobj_attribute device_attribute = + __ATTR_RO(device); +static struct kobj_attribute class_attribute = + __ATTR_RO(class); + +static struct attribute *afu_cr_attrs[] = { + &vendor_attribute.attr, + &device_attribute.attr, + &class_attribute.attr, + NULL, +}; + +static void release_afu_config_record(struct kobject *kobj) +{ + struct afu_config_record *cr = to_cr(kobj); + + kfree(cr); +} + +static struct kobj_type afu_config_record_type = { + .sysfs_ops = &kobj_sysfs_ops, + .release = release_afu_config_record, + .default_attrs = afu_cr_attrs, +}; + +static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx) +{ + struct afu_config_record *cr; + int rc; + + cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL); + if (!cr) + return ERR_PTR(-ENOMEM); + + cr->cr = cr_idx; + cr->device = cxl_afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID); + cr->vendor = cxl_afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID); + cr->class = cxl_afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION) >> 8; + + /* + * Export raw AFU PCIe like config record. For now this is read only by + * root - we can expand that later to be readable by non-root and maybe + * even writable provided we have a good use-case. Once we suport + * exposing AFUs through a virtual PHB they will get that for free from + * Linux' PCI infrastructure, but until then it's not clear that we + * need it for anything since the main use case is just identifying + * AFUs, which can be done via the vendor, device and class attributes. + */ + sysfs_bin_attr_init(&cr->config_attr); + cr->config_attr.attr.name = "config"; + cr->config_attr.attr.mode = S_IRUSR; + cr->config_attr.size = afu->crs_len; + cr->config_attr.read = afu_read_config; + + rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type, + &afu->dev.kobj, "cr%i", cr->cr); + if (rc) + goto err; + + rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr); + if (rc) + goto err1; + + rc = kobject_uevent(&cr->kobj, KOBJ_ADD); + if (rc) + goto err2; + + return cr; +err2: + sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); +err1: + kobject_put(&cr->kobj); + return ERR_PTR(rc); +err: + kfree(cr); + return ERR_PTR(rc); +} + +void cxl_sysfs_afu_remove(struct cxl_afu *afu) +{ + struct afu_config_record *cr, *tmp; + int i; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) + device_remove_file(&afu->dev, &afu_attrs[i]); + + list_for_each_entry_safe(cr, tmp, &afu->crs, list) { + sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); + kobject_put(&cr->kobj); + } +} + +int cxl_sysfs_afu_add(struct cxl_afu *afu) +{ + struct afu_config_record *cr; + int i, rc; + + INIT_LIST_HEAD(&afu->crs); + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) + goto err; + } + + for (i = 0; i < afu->crs_num; i++) { + cr = cxl_sysfs_afu_new_cr(afu, i); + if (IS_ERR(cr)) { + rc = PTR_ERR(cr); + goto err1; + } + list_add(&cr->list, &afu->crs); + } + + return 0; + +err1: + cxl_sysfs_afu_remove(afu); + return rc; +err: + for (i--; i >= 0; i--) + device_remove_file(&afu->dev, &afu_attrs[i]); + return rc; +} + +int cxl_sysfs_afu_m_add(struct cxl_afu *afu) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { + if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + return rc; +} + +void cxl_sysfs_afu_m_remove(struct cxl_afu *afu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); +} diff --git a/kernel/drivers/misc/cxl/trace.c b/kernel/drivers/misc/cxl/trace.c new file mode 100644 index 000000000..c2b06d319 --- /dev/null +++ b/kernel/drivers/misc/cxl/trace.c @@ -0,0 +1,13 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" +#endif diff --git a/kernel/drivers/misc/cxl/trace.h b/kernel/drivers/misc/cxl/trace.h new file mode 100644 index 000000000..ae434d878 --- /dev/null +++ b/kernel/drivers/misc/cxl/trace.h @@ -0,0 +1,459 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_TRACE_H + +#include <linux/tracepoint.h> + +#include "cxl.h" + +#define DSISR_FLAGS \ + { CXL_PSL_DSISR_An_DS, "DS" }, \ + { CXL_PSL_DSISR_An_DM, "DM" }, \ + { CXL_PSL_DSISR_An_ST, "ST" }, \ + { CXL_PSL_DSISR_An_UR, "UR" }, \ + { CXL_PSL_DSISR_An_PE, "PE" }, \ + { CXL_PSL_DSISR_An_AE, "AE" }, \ + { CXL_PSL_DSISR_An_OC, "OC" }, \ + { CXL_PSL_DSISR_An_M, "M" }, \ + { CXL_PSL_DSISR_An_P, "P" }, \ + { CXL_PSL_DSISR_An_A, "A" }, \ + { CXL_PSL_DSISR_An_S, "S" }, \ + { CXL_PSL_DSISR_An_K, "K" } + +#define TFC_FLAGS \ + { CXL_PSL_TFC_An_A, "A" }, \ + { CXL_PSL_TFC_An_C, "C" }, \ + { CXL_PSL_TFC_An_AE, "AE" }, \ + { CXL_PSL_TFC_An_R, "R" } + +#define LLCMD_NAMES \ + { CXL_SPA_SW_CMD_TERMINATE, "TERMINATE" }, \ + { CXL_SPA_SW_CMD_REMOVE, "REMOVE" }, \ + { CXL_SPA_SW_CMD_SUSPEND, "SUSPEND" }, \ + { CXL_SPA_SW_CMD_RESUME, "RESUME" }, \ + { CXL_SPA_SW_CMD_ADD, "ADD" }, \ + { CXL_SPA_SW_CMD_UPDATE, "UPDATE" } + +#define AFU_COMMANDS \ + { 0, "DISABLE" }, \ + { CXL_AFU_Cntl_An_E, "ENABLE" }, \ + { CXL_AFU_Cntl_An_RA, "RESET" } + +#define PSL_COMMANDS \ + { CXL_PSL_SCNTL_An_Pc, "PURGE" }, \ + { CXL_PSL_SCNTL_An_Sc, "SUSPEND" } + + +DECLARE_EVENT_CLASS(cxl_pe_class, + TP_PROTO(struct cxl_context *ctx), + + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + ), + + TP_printk("afu%i.%i pe=%i", + __entry->card, + __entry->afu, + __entry->pe + ) +); + + +TRACE_EVENT(cxl_attach, + TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr), + + TP_ARGS(ctx, wed, num_interrupts, amr), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(pid_t, pid) + __field(u64, wed) + __field(u64, amr) + __field(s16, num_interrupts) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->pid = pid_nr(ctx->pid); + __entry->wed = wed; + __entry->amr = amr; + __entry->num_interrupts = num_interrupts; + ), + + TP_printk("afu%i.%i pid=%i pe=%i wed=0x%.16llx irqs=%i amr=0x%llx", + __entry->card, + __entry->afu, + __entry->pid, + __entry->pe, + __entry->wed, + __entry->num_interrupts, + __entry->amr + ) +); + +DEFINE_EVENT(cxl_pe_class, cxl_detach, + TP_PROTO(struct cxl_context *ctx), + TP_ARGS(ctx) +); + +TRACE_EVENT(cxl_afu_irq, + TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq), + + TP_ARGS(ctx, afu_irq, virq, hwirq), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u16, afu_irq) + __field(int, virq) + __field(irq_hw_number_t, hwirq) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->afu_irq = afu_irq; + __entry->virq = virq; + __entry->hwirq = hwirq; + ), + + TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->afu_irq, + __entry->virq, + __entry->hwirq + ) +); + +TRACE_EVENT(cxl_psl_irq, + TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar), + + TP_ARGS(ctx, irq, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(int, irq) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->irq = irq; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->irq, + __print_flags(__entry->dsisr, "|", DSISR_FLAGS), + __entry->dar + ) +); + +TRACE_EVENT(cxl_psl_irq_ack, + TP_PROTO(struct cxl_context *ctx, u64 tfc), + + TP_ARGS(ctx, tfc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, tfc) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->tfc = tfc; + ), + + TP_printk("afu%i.%i pe=%i tfc=%s", + __entry->card, + __entry->afu, + __entry->pe, + __print_flags(__entry->tfc, "|", TFC_FLAGS) + ) +); + +TRACE_EVENT(cxl_ste_miss, + TP_PROTO(struct cxl_context *ctx, u64 dar), + + TP_ARGS(ctx, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i dar=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->dar + ) +); + +TRACE_EVENT(cxl_ste_write, + TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v), + + TP_ARGS(ctx, idx, e, v), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(unsigned int, idx) + __field(u64, e) + __field(u64, v) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->idx = idx; + __entry->e = e; + __entry->v = v; + ), + + TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%.16llx V=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->idx, + __entry->e, + __entry->v + ) +); + +TRACE_EVENT(cxl_pte_miss, + TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar), + + TP_ARGS(ctx, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __print_flags(__entry->dsisr, "|", DSISR_FLAGS), + __entry->dar + ) +); + +TRACE_EVENT(cxl_llcmd, + TP_PROTO(struct cxl_context *ctx, u64 cmd), + + TP_ARGS(ctx, cmd), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, cmd) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i pe=%i cmd=%s", + __entry->card, + __entry->afu, + __entry->pe, + __print_symbolic_u64(__entry->cmd, LLCMD_NAMES) + ) +); + +TRACE_EVENT(cxl_llcmd_done, + TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc), + + TP_ARGS(ctx, cmd, rc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, cmd) + __field(int, rc) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->rc = rc; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i pe=%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __entry->pe, + __print_symbolic_u64(__entry->cmd, LLCMD_NAMES), + __entry->rc + ) +); + +DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + + TP_ARGS(afu, cmd), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u64, cmd) + ), + + TP_fast_assign( + __entry->card = afu->adapter->adapter_num; + __entry->afu = afu->slice; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i cmd=%s", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, AFU_COMMANDS) + ) +); + +DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + + TP_ARGS(afu, cmd, rc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u64, cmd) + __field(int, rc) + ), + + TP_fast_assign( + __entry->card = afu->adapter->adapter_num; + __entry->afu = afu->slice; + __entry->rc = rc; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, AFU_COMMANDS), + __entry->rc + ) +); + +DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + TP_ARGS(afu, cmd) +); + +DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + TP_ARGS(afu, cmd, rc) +); + +DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + TP_ARGS(afu, cmd), + + TP_printk("psl%i.%i cmd=%s", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, PSL_COMMANDS) + ) +); + +DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + TP_ARGS(afu, cmd, rc), + + TP_printk("psl%i.%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, PSL_COMMANDS), + __entry->rc + ) +); + +DEFINE_EVENT(cxl_pe_class, cxl_slbia, + TP_PROTO(struct cxl_context *ctx), + TP_ARGS(ctx) +); + +#endif /* _CXL_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> diff --git a/kernel/drivers/misc/ds1682.c b/kernel/drivers/misc/ds1682.c new file mode 100644 index 000000000..b909fb302 --- /dev/null +++ b/kernel/drivers/misc/ds1682.c @@ -0,0 +1,255 @@ +/* + * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver + * + * Written by: Grant Likely <grant.likely@secretlab.ca> + * + * Copyright (C) 2007 Secret Lab Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The DS1682 elapsed timer recorder is a simple device that implements + * one elapsed time counter, one event counter, an alarm signal and 10 + * bytes of general purpose EEPROM. + * + * This driver provides access to the DS1682 counters and user data via + * the sysfs. The following attributes are added to the device node: + * elapsed_time (u32): Total elapsed event time in ms resolution + * alarm_time (u32): When elapsed time exceeds the value in alarm_time, + * then the alarm pin is asserted. + * event_count (u16): number of times the event pin has gone low. + * eeprom (u8[10]): general purpose EEPROM + * + * Counter registers and user data are both read/write unless the device + * has been write protected. This driver does not support turning off write + * protection. Once write protection is turned on, it is impossible to + * turn it off again, so I have left the feature out of this driver to avoid + * accidental enabling, but it is trivial to add write protect support. + * + */ + +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/string.h> +#include <linux/list.h> +#include <linux/sysfs.h> +#include <linux/ctype.h> +#include <linux/hwmon-sysfs.h> + +/* Device registers */ +#define DS1682_REG_CONFIG 0x00 +#define DS1682_REG_ALARM 0x01 +#define DS1682_REG_ELAPSED 0x05 +#define DS1682_REG_EVT_CNTR 0x09 +#define DS1682_REG_EEPROM 0x0b +#define DS1682_REG_RESET 0x1d +#define DS1682_REG_WRITE_DISABLE 0x1e +#define DS1682_REG_WRITE_MEM_DISABLE 0x1f + +#define DS1682_EEPROM_SIZE 10 + +/* + * Generic counter attributes + */ +static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + __le32 val = 0; + int rc; + + dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name); + + /* Read the register */ + rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr, + (u8 *) & val); + if (rc < 0) + return -EIO; + + /* Special case: the 32 bit regs are time values with 1/4s + * resolution, scale them up to milliseconds */ + if (sattr->nr == 4) + return sprintf(buf, "%llu\n", + ((unsigned long long)le32_to_cpu(val)) * 250); + + /* Format the output string and return # of bytes */ + return sprintf(buf, "%li\n", (long)le32_to_cpu(val)); +} + +static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + u64 val; + __le32 val_le; + int rc; + + dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name); + + /* Decode input */ + rc = kstrtoull(buf, 0, &val); + if (rc < 0) { + dev_dbg(dev, "input string not a number\n"); + return -EINVAL; + } + + /* Special case: the 32 bit regs are time values with 1/4s + * resolution, scale input down to quarter-seconds */ + if (sattr->nr == 4) + do_div(val, 250); + + /* write out the value */ + val_le = cpu_to_le32(val); + rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr, + (u8 *) & val_le); + if (rc < 0) { + dev_err(dev, "register write failed; reg=0x%x, size=%i\n", + sattr->index, sattr->nr); + return -EIO; + } + + return count; +} + +/* + * Simple register attributes + */ +static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 4, DS1682_REG_ELAPSED); +static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 4, DS1682_REG_ALARM); +static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 2, DS1682_REG_EVT_CNTR); + +static const struct attribute_group ds1682_group = { + .attrs = (struct attribute *[]) { + &sensor_dev_attr_elapsed_time.dev_attr.attr, + &sensor_dev_attr_alarm_time.dev_attr.attr, + &sensor_dev_attr_event_count.dev_attr.attr, + NULL, + }, +}; + +/* + * User data attribute + */ +static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + int rc; + + dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n", + buf, off, count); + + if (off >= DS1682_EEPROM_SIZE) + return 0; + + if (off + count > DS1682_EEPROM_SIZE) + count = DS1682_EEPROM_SIZE - off; + + rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off, + count, buf); + if (rc < 0) + return -EIO; + + return count; +} + +static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + + dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n", + buf, off, count); + + if (off >= DS1682_EEPROM_SIZE) + return -ENOSPC; + + if (off + count > DS1682_EEPROM_SIZE) + count = DS1682_EEPROM_SIZE - off; + + /* Write out to the device */ + if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off, + count, buf) < 0) + return -EIO; + + return count; +} + +static struct bin_attribute ds1682_eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO | S_IWUSR, + }, + .size = DS1682_EEPROM_SIZE, + .read = ds1682_eeprom_read, + .write = ds1682_eeprom_write, +}; + +/* + * Called when a ds1682 device is matched with this driver + */ +static int ds1682_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_I2C_BLOCK)) { + dev_err(&client->dev, "i2c bus does not support the ds1682\n"); + rc = -ENODEV; + goto exit; + } + + rc = sysfs_create_group(&client->dev.kobj, &ds1682_group); + if (rc) + goto exit; + + rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr); + if (rc) + goto exit_bin_attr; + + return 0; + + exit_bin_attr: + sysfs_remove_group(&client->dev.kobj, &ds1682_group); + exit: + return rc; +} + +static int ds1682_remove(struct i2c_client *client) +{ + sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr); + sysfs_remove_group(&client->dev.kobj, &ds1682_group); + return 0; +} + +static const struct i2c_device_id ds1682_id[] = { + { "ds1682", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1682_id); + +static struct i2c_driver ds1682_driver = { + .driver = { + .name = "ds1682", + }, + .probe = ds1682_probe, + .remove = ds1682_remove, + .id_table = ds1682_id, +}; + +module_i2c_driver(ds1682_driver); + +MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>"); +MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/dummy-irq.c b/kernel/drivers/misc/dummy-irq.c new file mode 100644 index 000000000..acbbe0390 --- /dev/null +++ b/kernel/drivers/misc/dummy-irq.c @@ -0,0 +1,64 @@ +/* + * Dummy IRQ handler driver. + * + * This module only registers itself as a handler that is specified to it + * by the 'irq' parameter. + * + * The sole purpose of this module is to help with debugging of systems on + * which spurious IRQs would happen on disabled IRQ vector. + * + * Copyright (C) 2013 Jiri Kosina + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/interrupt.h> + +static int irq = -1; + +static irqreturn_t dummy_interrupt(int irq, void *dev_id) +{ + static int count = 0; + + if (count == 0) { + printk(KERN_INFO "dummy-irq: interrupt occurred on IRQ %d\n", + irq); + count++; + } + + return IRQ_NONE; +} + +static int __init dummy_irq_init(void) +{ + if (irq < 0) { + printk(KERN_ERR "dummy-irq: no IRQ given. Use irq=N\n"); + return -EIO; + } + if (request_irq(irq, &dummy_interrupt, IRQF_SHARED, "dummy_irq", &irq)) { + printk(KERN_ERR "dummy-irq: cannot register IRQ %d\n", irq); + return -EIO; + } + printk(KERN_INFO "dummy-irq: registered for IRQ %d\n", irq); + return 0; +} + +static void __exit dummy_irq_exit(void) +{ + printk(KERN_INFO "dummy-irq unloaded\n"); + free_irq(irq, &irq); +} + +module_init(dummy_irq_init); +module_exit(dummy_irq_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jiri Kosina"); +module_param(irq, uint, 0444); +MODULE_PARM_DESC(irq, "The IRQ to register for"); +MODULE_DESCRIPTION("Dummy IRQ handler driver"); diff --git a/kernel/drivers/misc/echo/Kconfig b/kernel/drivers/misc/echo/Kconfig new file mode 100644 index 000000000..f1d41ea9c --- /dev/null +++ b/kernel/drivers/misc/echo/Kconfig @@ -0,0 +1,9 @@ +config ECHO + tristate "Line Echo Canceller support" + default n + ---help--- + This driver provides line echo cancelling support for mISDN and + Zaptel drivers. + + To compile this driver as a module, choose M here. The module + will be called echo. diff --git a/kernel/drivers/misc/echo/Makefile b/kernel/drivers/misc/echo/Makefile new file mode 100644 index 000000000..7d4caac12 --- /dev/null +++ b/kernel/drivers/misc/echo/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ECHO) += echo.o diff --git a/kernel/drivers/misc/echo/echo.c b/kernel/drivers/misc/echo/echo.c new file mode 100644 index 000000000..9597e9523 --- /dev/null +++ b/kernel/drivers/misc/echo/echo.c @@ -0,0 +1,674 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * echo.c - A line echo canceller. This code is being developed + * against and partially complies with G168. + * + * Written by Steve Underwood <steveu@coppice.org> + * and David Rowe <david_at_rowetel_dot_com> + * + * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe + * + * Based on a bit from here, a bit from there, eye of toad, ear of + * bat, 15 years of failed attempts by David and a few fried brain + * cells. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/*! \file */ + +/* Implementation Notes + David Rowe + April 2007 + + This code started life as Steve's NLMS algorithm with a tap + rotation algorithm to handle divergence during double talk. I + added a Geigel Double Talk Detector (DTD) [2] and performed some + G168 tests. However I had trouble meeting the G168 requirements, + especially for double talk - there were always cases where my DTD + failed, for example where near end speech was under the 6dB + threshold required for declaring double talk. + + So I tried a two path algorithm [1], which has so far given better + results. The original tap rotation/Geigel algorithm is available + in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit. + It's probably possible to make it work if some one wants to put some + serious work into it. + + At present no special treatment is provided for tones, which + generally cause NLMS algorithms to diverge. Initial runs of a + subset of the G168 tests for tones (e.g ./echo_test 6) show the + current algorithm is passing OK, which is kind of surprising. The + full set of tests needs to be performed to confirm this result. + + One other interesting change is that I have managed to get the NLMS + code to work with 16 bit coefficients, rather than the original 32 + bit coefficents. This reduces the MIPs and storage required. + I evaulated the 16 bit port using g168_tests.sh and listening tests + on 4 real-world samples. + + I also attempted the implementation of a block based NLMS update + [2] but although this passes g168_tests.sh it didn't converge well + on the real-world samples. I have no idea why, perhaps a scaling + problem. The block based code is also available in SVN + http://svn.rowetel.com/software/oslec/tags/before_16bit. If this + code can be debugged, it will lead to further reduction in MIPS, as + the block update code maps nicely onto DSP instruction sets (it's a + dot product) compared to the current sample-by-sample update. + + Steve also has some nice notes on echo cancellers in echo.h + + References: + + [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo + Path Models", IEEE Transactions on communications, COM-25, + No. 6, June + 1977. + http://www.rowetel.com/images/echo/dual_path_paper.pdf + + [2] The classic, very useful paper that tells you how to + actually build a real world echo canceller: + Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice + Echo Canceller with a TMS320020, + http://www.rowetel.com/images/echo/spra129.pdf + + [3] I have written a series of blog posts on this work, here is + Part 1: http://www.rowetel.com/blog/?p=18 + + [4] The source code http://svn.rowetel.com/software/oslec/ + + [5] A nice reference on LMS filters: + http://en.wikipedia.org/wiki/Least_mean_squares_filter + + Credits: + + Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan + Muthukrishnan for their suggestions and email discussions. Thanks + also to those people who collected echo samples for me such as + Mark, Pawel, and Pavel. +*/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> + +#include "echo.h" + +#define MIN_TX_POWER_FOR_ADAPTION 64 +#define MIN_RX_POWER_FOR_ADAPTION 64 +#define DTD_HANGOVER 600 /* 600 samples, or 75ms */ +#define DC_LOG2BETA 3 /* log2() of DC filter Beta */ + +/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ + +#ifdef __bfin__ +static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) +{ + int i; + int offset1; + int offset2; + int factor; + int exp; + int16_t *phist; + int n; + + if (shift > 0) + factor = clean << shift; + else + factor = clean >> -shift; + + /* Update the FIR taps */ + + offset2 = ec->curr_pos; + offset1 = ec->taps - offset2; + phist = &ec->fir_state_bg.history[offset2]; + + /* st: and en: help us locate the assembler in echo.s */ + + /* asm("st:"); */ + n = ec->taps; + for (i = 0; i < n; i++) { + exp = *phist++ * factor; + ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); + } + /* asm("en:"); */ + + /* Note the asm for the inner loop above generated by Blackfin gcc + 4.1.1 is pretty good (note even parallel instructions used): + + R0 = W [P0++] (X); + R0 *= R2; + R0 = R0 + R3 (NS) || + R1 = W [P1] (X) || + nop; + R0 >>>= 15; + R0 = R0 + R1; + W [P1++] = R0; + + A block based update algorithm would be much faster but the + above can't be improved on much. Every instruction saved in + the loop above is 2 MIPs/ch! The for loop above is where the + Blackfin spends most of it's time - about 17 MIPs/ch measured + with speedtest.c with 256 taps (32ms). Write-back and + Write-through cache gave about the same performance. + */ +} + +/* + IDEAS for further optimisation of lms_adapt_bg(): + + 1/ The rounding is quite costly. Could we keep as 32 bit coeffs + then make filter pluck the MS 16-bits of the coeffs when filtering? + However this would lower potential optimisation of filter, as I + think the dual-MAC architecture requires packed 16 bit coeffs. + + 2/ Block based update would be more efficient, as per comments above, + could use dual MAC architecture. + + 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC + packing. + + 4/ Execute the whole e/c in a block of say 20ms rather than sample + by sample. Processing a few samples every ms is inefficient. +*/ + +#else +static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) +{ + int i; + + int offset1; + int offset2; + int factor; + int exp; + + if (shift > 0) + factor = clean << shift; + else + factor = clean >> -shift; + + /* Update the FIR taps */ + + offset2 = ec->curr_pos; + offset1 = ec->taps - offset2; + + for (i = ec->taps - 1; i >= offset1; i--) { + exp = (ec->fir_state_bg.history[i - offset1] * factor); + ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); + } + for (; i >= 0; i--) { + exp = (ec->fir_state_bg.history[i + offset2] * factor); + ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); + } +} +#endif + +static inline int top_bit(unsigned int bits) +{ + if (bits == 0) + return -1; + else + return (int)fls((int32_t) bits) - 1; +} + +struct oslec_state *oslec_create(int len, int adaption_mode) +{ + struct oslec_state *ec; + int i; + const int16_t *history; + + ec = kzalloc(sizeof(*ec), GFP_KERNEL); + if (!ec) + return NULL; + + ec->taps = len; + ec->log2taps = top_bit(len); + ec->curr_pos = ec->taps - 1; + + ec->fir_taps16[0] = + kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); + if (!ec->fir_taps16[0]) + goto error_oom_0; + + ec->fir_taps16[1] = + kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); + if (!ec->fir_taps16[1]) + goto error_oom_1; + + history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps); + if (!history) + goto error_state; + history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps); + if (!history) + goto error_state_bg; + + for (i = 0; i < 5; i++) + ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; + + ec->cng_level = 1000; + oslec_adaption_mode(ec, adaption_mode); + + ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); + if (!ec->snapshot) + goto error_snap; + + ec->cond_met = 0; + ec->pstates = 0; + ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; + ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; + ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; + ec->lbgn = ec->lbgn_acc = 0; + ec->lbgn_upper = 200; + ec->lbgn_upper_acc = ec->lbgn_upper << 13; + + return ec; + +error_snap: + fir16_free(&ec->fir_state_bg); +error_state_bg: + fir16_free(&ec->fir_state); +error_state: + kfree(ec->fir_taps16[1]); +error_oom_1: + kfree(ec->fir_taps16[0]); +error_oom_0: + kfree(ec); + return NULL; +} +EXPORT_SYMBOL_GPL(oslec_create); + +void oslec_free(struct oslec_state *ec) +{ + int i; + + fir16_free(&ec->fir_state); + fir16_free(&ec->fir_state_bg); + for (i = 0; i < 2; i++) + kfree(ec->fir_taps16[i]); + kfree(ec->snapshot); + kfree(ec); +} +EXPORT_SYMBOL_GPL(oslec_free); + +void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode) +{ + ec->adaption_mode = adaption_mode; +} +EXPORT_SYMBOL_GPL(oslec_adaption_mode); + +void oslec_flush(struct oslec_state *ec) +{ + int i; + + ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; + ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; + ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; + + ec->lbgn = ec->lbgn_acc = 0; + ec->lbgn_upper = 200; + ec->lbgn_upper_acc = ec->lbgn_upper << 13; + + ec->nonupdate_dwell = 0; + + fir16_flush(&ec->fir_state); + fir16_flush(&ec->fir_state_bg); + ec->fir_state.curr_pos = ec->taps - 1; + ec->fir_state_bg.curr_pos = ec->taps - 1; + for (i = 0; i < 2; i++) + memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t)); + + ec->curr_pos = ec->taps - 1; + ec->pstates = 0; +} +EXPORT_SYMBOL_GPL(oslec_flush); + +void oslec_snapshot(struct oslec_state *ec) +{ + memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t)); +} +EXPORT_SYMBOL_GPL(oslec_snapshot); + +/* Dual Path Echo Canceller */ + +int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) +{ + int32_t echo_value; + int clean_bg; + int tmp; + int tmp1; + + /* + * Input scaling was found be required to prevent problems when tx + * starts clipping. Another possible way to handle this would be the + * filter coefficent scaling. + */ + + ec->tx = tx; + ec->rx = rx; + tx >>= 1; + rx >>= 1; + + /* + * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision + * required otherwise values do not track down to 0. Zero at DC, Pole + * at (1-Beta) on real axis. Some chip sets (like Si labs) don't + * need this, but something like a $10 X100P card does. Any DC really + * slows down convergence. + * + * Note: removes some low frequency from the signal, this reduces the + * speech quality when listening to samples through headphones but may + * not be obvious through a telephone handset. + * + * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta + * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. + */ + + if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { + tmp = rx << 15; + + /* + * Make sure the gain of the HPF is 1.0. This can still + * saturate a little under impulse conditions, and it might + * roll to 32768 and need clipping on sustained peak level + * signals. However, the scale of such clipping is small, and + * the error due to any saturation should not markedly affect + * the downstream processing. + */ + tmp -= (tmp >> 4); + + ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2; + + /* + * hard limit filter to prevent clipping. Note that at this + * stage rx should be limited to +/- 16383 due to right shift + * above + */ + tmp1 = ec->rx_1 >> 15; + if (tmp1 > 16383) + tmp1 = 16383; + if (tmp1 < -16383) + tmp1 = -16383; + rx = tmp1; + ec->rx_2 = tmp; + } + + /* Block average of power in the filter states. Used for + adaption power calculation. */ + + { + int new, old; + + /* efficient "out with the old and in with the new" algorithm so + we don't have to recalculate over the whole block of + samples. */ + new = (int)tx * (int)tx; + old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * + (int)ec->fir_state.history[ec->fir_state.curr_pos]; + ec->pstates += + ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps; + if (ec->pstates < 0) + ec->pstates = 0; + } + + /* Calculate short term average levels using simple single pole IIRs */ + + ec->ltxacc += abs(tx) - ec->ltx; + ec->ltx = (ec->ltxacc + (1 << 4)) >> 5; + ec->lrxacc += abs(rx) - ec->lrx; + ec->lrx = (ec->lrxacc + (1 << 4)) >> 5; + + /* Foreground filter */ + + ec->fir_state.coeffs = ec->fir_taps16[0]; + echo_value = fir16(&ec->fir_state, tx); + ec->clean = rx - echo_value; + ec->lcleanacc += abs(ec->clean) - ec->lclean; + ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5; + + /* Background filter */ + + echo_value = fir16(&ec->fir_state_bg, tx); + clean_bg = rx - echo_value; + ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg; + ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5; + + /* Background Filter adaption */ + + /* Almost always adap bg filter, just simple DT and energy + detection to minimise adaption in cases of strong double talk. + However this is not critical for the dual path algorithm. + */ + ec->factor = 0; + ec->shift = 0; + if ((ec->nonupdate_dwell == 0)) { + int p, logp, shift; + + /* Determine: + + f = Beta * clean_bg_rx/P ------ (1) + + where P is the total power in the filter states. + + The Boffins have shown that if we obey (1) we converge + quickly and avoid instability. + + The correct factor f must be in Q30, as this is the fixed + point format required by the lms_adapt_bg() function, + therefore the scaled version of (1) is: + + (2^30) * f = (2^30) * Beta * clean_bg_rx/P + factor = (2^30) * Beta * clean_bg_rx/P ----- (2) + + We have chosen Beta = 0.25 by experiment, so: + + factor = (2^30) * (2^-2) * clean_bg_rx/P + + (30 - 2 - log2(P)) + factor = clean_bg_rx 2 ----- (3) + + To avoid a divide we approximate log2(P) as top_bit(P), + which returns the position of the highest non-zero bit in + P. This approximation introduces an error as large as a + factor of 2, but the algorithm seems to handle it OK. + + Come to think of it a divide may not be a big deal on a + modern DSP, so its probably worth checking out the cycles + for a divide versus a top_bit() implementation. + */ + + p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates; + logp = top_bit(p) + ec->log2taps; + shift = 30 - 2 - logp; + ec->shift = shift; + + lms_adapt_bg(ec, clean_bg, shift); + } + + /* very simple DTD to make sure we dont try and adapt with strong + near end speech */ + + ec->adapt = 0; + if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx)) + ec->nonupdate_dwell = DTD_HANGOVER; + if (ec->nonupdate_dwell) + ec->nonupdate_dwell--; + + /* Transfer logic */ + + /* These conditions are from the dual path paper [1], I messed with + them a bit to improve performance. */ + + if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && + (ec->nonupdate_dwell == 0) && + /* (ec->Lclean_bg < 0.875*ec->Lclean) */ + (8 * ec->lclean_bg < 7 * ec->lclean) && + /* (ec->Lclean_bg < 0.125*ec->Ltx) */ + (8 * ec->lclean_bg < ec->ltx)) { + if (ec->cond_met == 6) { + /* + * BG filter has had better results for 6 consecutive + * samples + */ + ec->adapt = 1; + memcpy(ec->fir_taps16[0], ec->fir_taps16[1], + ec->taps * sizeof(int16_t)); + } else + ec->cond_met++; + } else + ec->cond_met = 0; + + /* Non-Linear Processing */ + + ec->clean_nlp = ec->clean; + if (ec->adaption_mode & ECHO_CAN_USE_NLP) { + /* + * Non-linear processor - a fancy way to say "zap small + * signals, to avoid residual echo due to (uLaw/ALaw) + * non-linearity in the channel.". + */ + + if ((16 * ec->lclean < ec->ltx)) { + /* + * Our e/c has improved echo by at least 24 dB (each + * factor of 2 is 6dB, so 2*2*2*2=16 is the same as + * 6+6+6+6=24dB) + */ + if (ec->adaption_mode & ECHO_CAN_USE_CNG) { + ec->cng_level = ec->lbgn; + + /* + * Very elementary comfort noise generation. + * Just random numbers rolled off very vaguely + * Hoth-like. DR: This noise doesn't sound + * quite right to me - I suspect there are some + * overflow issues in the filtering as it's too + * "crackly". + * TODO: debug this, maybe just play noise at + * high level or look at spectrum. + */ + + ec->cng_rndnum = + 1664525U * ec->cng_rndnum + 1013904223U; + ec->cng_filter = + ((ec->cng_rndnum & 0xFFFF) - 32768 + + 5 * ec->cng_filter) >> 3; + ec->clean_nlp = + (ec->cng_filter * ec->cng_level * 8) >> 14; + + } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) { + /* This sounds much better than CNG */ + if (ec->clean_nlp > ec->lbgn) + ec->clean_nlp = ec->lbgn; + if (ec->clean_nlp < -ec->lbgn) + ec->clean_nlp = -ec->lbgn; + } else { + /* + * just mute the residual, doesn't sound very + * good, used mainly in G168 tests + */ + ec->clean_nlp = 0; + } + } else { + /* + * Background noise estimator. I tried a few + * algorithms here without much luck. This very simple + * one seems to work best, we just average the level + * using a slow (1 sec time const) filter if the + * current level is less than a (experimentally + * derived) constant. This means we dont include high + * level signals like near end speech. When combined + * with CNG or especially CLIP seems to work OK. + */ + if (ec->lclean < 40) { + ec->lbgn_acc += abs(ec->clean) - ec->lbgn; + ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12; + } + } + } + + /* Roll around the taps buffer */ + if (ec->curr_pos <= 0) + ec->curr_pos = ec->taps; + ec->curr_pos--; + + if (ec->adaption_mode & ECHO_CAN_DISABLE) + ec->clean_nlp = rx; + + /* Output scaled back up again to match input scaling */ + + return (int16_t) ec->clean_nlp << 1; +} +EXPORT_SYMBOL_GPL(oslec_update); + +/* This function is separated from the echo canceller is it is usually called + as part of the tx process. See rx HP (DC blocking) filter above, it's + the same design. + + Some soft phones send speech signals with a lot of low frequency + energy, e.g. down to 20Hz. This can make the hybrid non-linear + which causes the echo canceller to fall over. This filter can help + by removing any low frequency before it gets to the tx port of the + hybrid. + + It can also help by removing and DC in the tx signal. DC is bad + for LMS algorithms. + + This is one of the classic DC removal filters, adjusted to provide + sufficient bass rolloff to meet the above requirement to protect hybrids + from things that upset them. The difference between successive samples + produces a lousy HPF, and then a suitably placed pole flattens things out. + The final result is a nicely rolled off bass end. The filtering is + implemented with extended fractional precision, which noise shapes things, + giving very clean DC removal. +*/ + +int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) +{ + int tmp; + int tmp1; + + if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { + tmp = tx << 15; + + /* + * Make sure the gain of the HPF is 1.0. The first can still + * saturate a little under impulse conditions, and it might + * roll to 32768 and need clipping on sustained peak level + * signals. However, the scale of such clipping is small, and + * the error due to any saturation should not markedly affect + * the downstream processing. + */ + tmp -= (tmp >> 4); + + ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2; + tmp1 = ec->tx_1 >> 15; + if (tmp1 > 32767) + tmp1 = 32767; + if (tmp1 < -32767) + tmp1 = -32767; + tx = tmp1; + ec->tx_2 = tmp; + } + + return tx; +} +EXPORT_SYMBOL_GPL(oslec_hpf_tx); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Rowe"); +MODULE_DESCRIPTION("Open Source Line Echo Canceller"); +MODULE_VERSION("0.3.0"); diff --git a/kernel/drivers/misc/echo/echo.h b/kernel/drivers/misc/echo/echo.h new file mode 100644 index 000000000..9b08c63e6 --- /dev/null +++ b/kernel/drivers/misc/echo/echo.h @@ -0,0 +1,187 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * echo.c - A line echo canceller. This code is being developed + * against and partially complies with G168. + * + * Written by Steve Underwood <steveu@coppice.org> + * and David Rowe <david_at_rowetel_dot_com> + * + * Copyright (C) 2001 Steve Underwood and 2007 David Rowe + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ECHO_H +#define __ECHO_H + +/* +Line echo cancellation for voice + +What does it do? + +This module aims to provide G.168-2002 compliant echo cancellation, to remove +electrical echoes (e.g. from 2-4 wire hybrids) from voice calls. + +How does it work? + +The heart of the echo cancellor is FIR filter. This is adapted to match the +echo impulse response of the telephone line. It must be long enough to +adequately cover the duration of that impulse response. The signal transmitted +to the telephone line is passed through the FIR filter. Once the FIR is +properly adapted, the resulting output is an estimate of the echo signal +received from the line. This is subtracted from the received signal. The result +is an estimate of the signal which originated at the far end of the line, free +from echos of our own transmitted signal. + +The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and +was introduced in 1960. It is the commonest form of filter adaption used in +things like modem line equalisers and line echo cancellers. There it works very +well. However, it only works well for signals of constant amplitude. It works +very poorly for things like speech echo cancellation, where the signal level +varies widely. This is quite easy to fix. If the signal level is normalised - +similar to applying AGC - LMS can work as well for a signal of varying +amplitude as it does for a modem signal. This normalised least mean squares +(NLMS) algorithm is the commonest one used for speech echo cancellation. Many +other algorithms exist - e.g. RLS (essentially the same as Kalman filtering), +FAP, etc. Some perform significantly better than NLMS. However, factors such +as computational complexity and patents favour the use of NLMS. + +A simple refinement to NLMS can improve its performance with speech. NLMS tends +to adapt best to the strongest parts of a signal. If the signal is white noise, +the NLMS algorithm works very well. However, speech has more low frequency than +high frequency content. Pre-whitening (i.e. filtering the signal to flatten its +spectrum) the echo signal improves the adapt rate for speech, and ensures the +final residual signal is not heavily biased towards high frequencies. A very +low complexity filter is adequate for this, so pre-whitening adds little to the +compute requirements of the echo canceller. + +An FIR filter adapted using pre-whitened NLMS performs well, provided certain +conditions are met: + + - The transmitted signal has poor self-correlation. + - There is no signal being generated within the environment being + cancelled. + +The difficulty is that neither of these can be guaranteed. + +If the adaption is performed while transmitting noise (or something fairly +noise like, such as voice) the adaption works very well. If the adaption is +performed while transmitting something highly correlative (typically narrow +band energy such as signalling tones or DTMF), the adaption can go seriously +wrong. The reason is there is only one solution for the adaption on a near +random signal - the impulse response of the line. For a repetitive signal, +there are any number of solutions which converge the adaption, and nothing +guides the adaption to choose the generalised one. Allowing an untrained +canceller to converge on this kind of narrowband energy probably a good thing, +since at least it cancels the tones. Allowing a well converged canceller to +continue converging on such energy is just a way to ruin its generalised +adaption. A narrowband detector is needed, so adapation can be suspended at +appropriate times. + +The adaption process is based on trying to eliminate the received signal. When +there is any signal from within the environment being cancelled it may upset +the adaption process. Similarly, if the signal we are transmitting is small, +noise may dominate and disturb the adaption process. If we can ensure that the +adaption is only performed when we are transmitting a significant signal level, +and the environment is not, things will be OK. Clearly, it is easy to tell when +we are sending a significant signal. Telling, if the environment is generating +a significant signal, and doing it with sufficient speed that the adaption will +not have diverged too much more we stop it, is a little harder. + +The key problem in detecting when the environment is sourcing significant +energy is that we must do this very quickly. Given a reasonably long sample of +the received signal, there are a number of strategies which may be used to +assess whether that signal contains a strong far end component. However, by the +time that assessment is complete the far end signal will have already caused +major mis-convergence in the adaption process. An assessment algorithm is +needed which produces a fairly accurate result from a very short burst of far +end energy. + +How do I use it? + +The echo cancellor processes both the transmit and receive streams sample by +sample. The processing function is not declared inline. Unfortunately, +cancellation requires many operations per sample, so the call overhead is only +a minor burden. +*/ + +#include "fir.h" +#include "oslec.h" + +/* + G.168 echo canceller descriptor. This defines the working state for a line + echo canceller. +*/ +struct oslec_state { + int16_t tx; + int16_t rx; + int16_t clean; + int16_t clean_nlp; + + int nonupdate_dwell; + int curr_pos; + int taps; + int log2taps; + int adaption_mode; + + int cond_met; + int32_t pstates; + int16_t adapt; + int32_t factor; + int16_t shift; + + /* Average levels and averaging filter states */ + int ltxacc; + int lrxacc; + int lcleanacc; + int lclean_bgacc; + int ltx; + int lrx; + int lclean; + int lclean_bg; + int lbgn; + int lbgn_acc; + int lbgn_upper; + int lbgn_upper_acc; + + /* foreground and background filter states */ + struct fir16_state_t fir_state; + struct fir16_state_t fir_state_bg; + int16_t *fir_taps16[2]; + + /* DC blocking filter states */ + int tx_1; + int tx_2; + int rx_1; + int rx_2; + + /* optional High Pass Filter states */ + int32_t xvtx[5]; + int32_t yvtx[5]; + int32_t xvrx[5]; + int32_t yvrx[5]; + + /* Parameters for the optional Hoth noise generator */ + int cng_level; + int cng_rndnum; + int cng_filter; + + /* snapshot sample of coeffs used for development */ + int16_t *snapshot; +}; + +#endif /* __ECHO_H */ diff --git a/kernel/drivers/misc/echo/fir.h b/kernel/drivers/misc/echo/fir.h new file mode 100644 index 000000000..7b9fabf1f --- /dev/null +++ b/kernel/drivers/misc/echo/fir.h @@ -0,0 +1,216 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * fir.h - General telephony FIR routines + * + * Written by Steve Underwood <steveu@coppice.org> + * + * Copyright (C) 2002 Steve Underwood + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#if !defined(_FIR_H_) +#define _FIR_H_ + +/* + Blackfin NOTES & IDEAS: + + A simple dot product function is used to implement the filter. This performs + just one MAC/cycle which is inefficient but was easy to implement as a first + pass. The current Blackfin code also uses an unrolled form of the filter + history to avoid 0 length hardware loop issues. This is wasteful of + memory. + + Ideas for improvement: + + 1/ Rewrite filter for dual MAC inner loop. The issue here is handling + history sample offsets that are 16 bit aligned - the dual MAC needs + 32 bit aligmnent. There are some good examples in libbfdsp. + + 2/ Use the hardware circular buffer facility tohalve memory usage. + + 3/ Consider using internal memory. + + Using less memory might also improve speed as cache misses will be + reduced. A drop in MIPs and memory approaching 50% should be + possible. + + The foreground and background filters currenlty use a total of + about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo + can. +*/ + +/* + * 16 bit integer FIR descriptor. This defines the working state for a single + * instance of an FIR filter using 16 bit integer coefficients. + */ +struct fir16_state_t { + int taps; + int curr_pos; + const int16_t *coeffs; + int16_t *history; +}; + +/* + * 32 bit integer FIR descriptor. This defines the working state for a single + * instance of an FIR filter using 32 bit integer coefficients, and filtering + * 16 bit integer data. + */ +struct fir32_state_t { + int taps; + int curr_pos; + const int32_t *coeffs; + int16_t *history; +}; + +/* + * Floating point FIR descriptor. This defines the working state for a single + * instance of an FIR filter using floating point coefficients and data. + */ +struct fir_float_state_t { + int taps; + int curr_pos; + const float *coeffs; + float *history; +}; + +static inline const int16_t *fir16_create(struct fir16_state_t *fir, + const int16_t *coeffs, int taps) +{ + fir->taps = taps; + fir->curr_pos = taps - 1; + fir->coeffs = coeffs; +#if defined(__bfin__) + fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); +#else + fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); +#endif + return fir->history; +} + +static inline void fir16_flush(struct fir16_state_t *fir) +{ +#if defined(__bfin__) + memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); +#else + memset(fir->history, 0, fir->taps * sizeof(int16_t)); +#endif +} + +static inline void fir16_free(struct fir16_state_t *fir) +{ + kfree(fir->history); +} + +#ifdef __bfin__ +static inline int32_t dot_asm(short *x, short *y, int len) +{ + int dot; + + len--; + + __asm__("I0 = %1;\n\t" + "I1 = %2;\n\t" + "A0 = 0;\n\t" + "R0.L = W[I0++] || R1.L = W[I1++];\n\t" + "LOOP dot%= LC0 = %3;\n\t" + "LOOP_BEGIN dot%=;\n\t" + "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" + "LOOP_END dot%=;\n\t" + "A0 += R0.L*R1.L (IS);\n\t" + "R0 = A0;\n\t" + "%0 = R0;\n\t" + : "=&d"(dot) + : "a"(x), "a"(y), "a"(len) + : "I0", "I1", "A1", "A0", "R0", "R1" + ); + + return dot; +} +#endif + +static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) +{ + int32_t y; +#if defined(__bfin__) + fir->history[fir->curr_pos] = sample; + fir->history[fir->curr_pos + fir->taps] = sample; + y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], + fir->taps); +#else + int i; + int offset1; + int offset2; + + fir->history[fir->curr_pos] = sample; + + offset2 = fir->curr_pos; + offset1 = fir->taps - offset2; + y = 0; + for (i = fir->taps - 1; i >= offset1; i--) + y += fir->coeffs[i] * fir->history[i - offset1]; + for (; i >= 0; i--) + y += fir->coeffs[i] * fir->history[i + offset2]; +#endif + if (fir->curr_pos <= 0) + fir->curr_pos = fir->taps; + fir->curr_pos--; + return (int16_t) (y >> 15); +} + +static inline const int16_t *fir32_create(struct fir32_state_t *fir, + const int32_t *coeffs, int taps) +{ + fir->taps = taps; + fir->curr_pos = taps - 1; + fir->coeffs = coeffs; + fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); + return fir->history; +} + +static inline void fir32_flush(struct fir32_state_t *fir) +{ + memset(fir->history, 0, fir->taps * sizeof(int16_t)); +} + +static inline void fir32_free(struct fir32_state_t *fir) +{ + kfree(fir->history); +} + +static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) +{ + int i; + int32_t y; + int offset1; + int offset2; + + fir->history[fir->curr_pos] = sample; + offset2 = fir->curr_pos; + offset1 = fir->taps - offset2; + y = 0; + for (i = fir->taps - 1; i >= offset1; i--) + y += fir->coeffs[i] * fir->history[i - offset1]; + for (; i >= 0; i--) + y += fir->coeffs[i] * fir->history[i + offset2]; + if (fir->curr_pos <= 0) + fir->curr_pos = fir->taps; + fir->curr_pos--; + return (int16_t) (y >> 15); +} + +#endif diff --git a/kernel/drivers/misc/echo/oslec.h b/kernel/drivers/misc/echo/oslec.h new file mode 100644 index 000000000..f4175360c --- /dev/null +++ b/kernel/drivers/misc/echo/oslec.h @@ -0,0 +1,94 @@ +/* + * OSLEC - A line echo canceller. This code is being developed + * against and partially complies with G168. Using code from SpanDSP + * + * Written by Steve Underwood <steveu@coppice.org> + * and David Rowe <david_at_rowetel_dot_com> + * + * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __OSLEC_H +#define __OSLEC_H + +/* Mask bits for the adaption mode */ +#define ECHO_CAN_USE_ADAPTION 0x01 +#define ECHO_CAN_USE_NLP 0x02 +#define ECHO_CAN_USE_CNG 0x04 +#define ECHO_CAN_USE_CLIP 0x08 +#define ECHO_CAN_USE_TX_HPF 0x10 +#define ECHO_CAN_USE_RX_HPF 0x20 +#define ECHO_CAN_DISABLE 0x40 + +/** + * oslec_state: G.168 echo canceller descriptor. + * + * This defines the working state for a line echo canceller. + */ +struct oslec_state; + +/** + * oslec_create - Create a voice echo canceller context. + * @len: The length of the canceller, in samples. + * @return: The new canceller context, or NULL if the canceller could not be + * created. + */ +struct oslec_state *oslec_create(int len, int adaption_mode); + +/** + * oslec_free - Free a voice echo canceller context. + * @ec: The echo canceller context. + */ +void oslec_free(struct oslec_state *ec); + +/** + * oslec_flush - Flush (reinitialise) a voice echo canceller context. + * @ec: The echo canceller context. + */ +void oslec_flush(struct oslec_state *ec); + +/** + * oslec_adaption_mode - set the adaption mode of a voice echo canceller context. + * @ec The echo canceller context. + * @adaption_mode: The mode. + */ +void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode); + +void oslec_snapshot(struct oslec_state *ec); + +/** + * oslec_update: Process a sample through a voice echo canceller. + * @ec: The echo canceller context. + * @tx: The transmitted audio sample. + * @rx: The received audio sample. + * + * The return value is the clean (echo cancelled) received sample. + */ +int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx); + +/** + * oslec_hpf_tx: Process to high pass filter the tx signal. + * @ec: The echo canceller context. + * @tx: The transmitted auio sample. + * + * The return value is the HP filtered transmit sample, send this to your D/A. + */ +int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx); + +#endif /* __OSLEC_H */ diff --git a/kernel/drivers/misc/eeprom/Kconfig b/kernel/drivers/misc/eeprom/Kconfig new file mode 100644 index 000000000..9536852fd --- /dev/null +++ b/kernel/drivers/misc/eeprom/Kconfig @@ -0,0 +1,112 @@ +menu "EEPROM support" + +config EEPROM_AT24 + tristate "I2C EEPROMs / RAMs / ROMs from most vendors" + depends on I2C && SYSFS + help + Enable this driver to get read/write support to most I2C EEPROMs + and compatible devices like FRAMs, SRAMs, ROMs etc. After you + configure the driver to know about each chip on your target + board. Use these generic chip names, instead of vendor-specific + ones like at24c64, 24lc02 or fm24c04: + + 24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08, + 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024 + + Unless you like data loss puzzles, always be sure that any chip + you configure as a 24c32 (32 kbit) or larger is NOT really a + 24c16 (16 kbit) or smaller, and vice versa. Marking the chip + as read-only won't help recover from this. Also, if your chip + has any software write-protect mechanism you may want to review the + code to make sure this driver won't turn it on by accident. + + If you use this with an SMBus adapter instead of an I2C adapter, + full functionality is not available. Only smaller devices are + supported (24c16 and below, max 4 kByte). + + This driver can also be built as a module. If so, the module + will be called at24. + +config EEPROM_AT25 + tristate "SPI EEPROMs from most vendors" + depends on SPI && SYSFS + help + Enable this driver to get read/write support to most SPI EEPROMs, + after you configure the board init code to know about each eeprom + on your target board. + + This driver can also be built as a module. If so, the module + will be called at25. + +config EEPROM_LEGACY + tristate "Old I2C EEPROM reader" + depends on I2C && SYSFS + help + If you say yes here you get read-only access to the EEPROM data + available on modern memory DIMMs and Sony Vaio laptops via I2C. Such + EEPROMs could theoretically be available on other devices as well. + + This driver can also be built as a module. If so, the module + will be called eeprom. + +config EEPROM_MAX6875 + tristate "Maxim MAX6874/5 power supply supervisor" + depends on I2C + help + If you say yes here you get read-only support for the user EEPROM of + the Maxim MAX6874/5 EEPROM-programmable, quad power-supply + sequencer/supervisor. + + All other features of this chip should be accessed via i2c-dev. + + This driver can also be built as a module. If so, the module + will be called max6875. + + +config EEPROM_93CX6 + tristate "EEPROM 93CX6 support" + help + This is a driver for the EEPROM chipsets 93c46 and 93c66. + The driver supports both read as well as write commands. + + If unsure, say N. + +config EEPROM_93XX46 + tristate "Microwire EEPROM 93XX46 support" + depends on SPI && SYSFS + help + Driver for the microwire EEPROM chipsets 93xx46x. The driver + supports both read and write commands and also the command to + erase the whole EEPROM. + + This driver can also be built as a module. If so, the module + will be called eeprom_93xx46. + + If unsure, say N. + +config EEPROM_DIGSY_MTC_CFG + bool "DigsyMTC display configuration EEPROMs device" + depends on GPIO_MPC5200 && SPI_GPIO + help + This option enables access to display configuration EEPROMs + on digsy_mtc board. You have to additionally select Microwire + EEPROM 93XX46 driver. sysfs entries will be created for that + EEPROM allowing to read/write the configuration data or to + erase the whole EEPROM. + + If unsure, say N. + +config EEPROM_SUNXI_SID + tristate "Allwinner sunxi security ID support" + depends on ARCH_SUNXI && SYSFS + help + This is a driver for the 'security ID' available on various Allwinner + devices. + + Due to the potential risks involved with changing e-fuses, + this driver is read-only. + + This driver can also be built as a module. If so, the module + will be called sunxi_sid. + +endmenu diff --git a/kernel/drivers/misc/eeprom/Makefile b/kernel/drivers/misc/eeprom/Makefile new file mode 100644 index 000000000..9507aec95 --- /dev/null +++ b/kernel/drivers/misc/eeprom/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_EEPROM_AT24) += at24.o +obj-$(CONFIG_EEPROM_AT25) += at25.o +obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o +obj-$(CONFIG_EEPROM_MAX6875) += max6875.o +obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o +obj-$(CONFIG_EEPROM_93XX46) += eeprom_93xx46.o +obj-$(CONFIG_EEPROM_SUNXI_SID) += sunxi_sid.o +obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o diff --git a/kernel/drivers/misc/eeprom/at24.c b/kernel/drivers/misc/eeprom/at24.c new file mode 100644 index 000000000..2d3db81be --- /dev/null +++ b/kernel/drivers/misc/eeprom/at24.c @@ -0,0 +1,719 @@ +/* + * at24.c - handle most I2C EEPROMs + * + * Copyright (C) 2005-2007 David Brownell + * Copyright (C) 2008 Wolfram Sang, Pengutronix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/sysfs.h> +#include <linux/mod_devicetable.h> +#include <linux/log2.h> +#include <linux/bitops.h> +#include <linux/jiffies.h> +#include <linux/of.h> +#include <linux/i2c.h> +#include <linux/platform_data/at24.h> + +/* + * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable. + * Differences between different vendor product lines (like Atmel AT24C or + * MicroChip 24LC, etc) won't much matter for typical read/write access. + * There are also I2C RAM chips, likewise interchangeable. One example + * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes). + * + * However, misconfiguration can lose data. "Set 16-bit memory address" + * to a part with 8-bit addressing will overwrite data. Writing with too + * big a page size also loses data. And it's not safe to assume that the + * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC + * uses 0x51, for just one example. + * + * Accordingly, explicit board-specific configuration data should be used + * in almost all cases. (One partial exception is an SMBus used to access + * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.) + * + * So this driver uses "new style" I2C driver binding, expecting to be + * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or + * similar kernel-resident tables; or, configuration data coming from + * a bootloader. + * + * Other than binding model, current differences from "eeprom" driver are + * that this one handles write access and isn't restricted to 24c02 devices. + * It also handles larger devices (32 kbit and up) with two-byte addresses, + * which won't work on pure SMBus systems. + */ + +struct at24_data { + struct at24_platform_data chip; + struct memory_accessor macc; + int use_smbus; + int use_smbus_write; + + /* + * Lock protects against activities from other Linux tasks, + * but not from changes by other I2C masters. + */ + struct mutex lock; + struct bin_attribute bin; + + u8 *writebuf; + unsigned write_max; + unsigned num_addresses; + + /* + * Some chips tie up multiple I2C addresses; dummy devices reserve + * them for us, and we'll use them with SMBus calls. + */ + struct i2c_client *client[]; +}; + +/* + * This parameter is to help this driver avoid blocking other drivers out + * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C + * clock, one 256 byte read takes about 1/43 second which is excessive; + * but the 1/170 second it takes at 400 kHz may be quite reasonable; and + * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible. + * + * This value is forced to be a power of two so that writes align on pages. + */ +static unsigned io_limit = 128; +module_param(io_limit, uint, 0); +MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)"); + +/* + * Specs often allow 5 msec for a page write, sometimes 20 msec; + * it's important to recover from write timeouts. + */ +static unsigned write_timeout = 25; +module_param(write_timeout, uint, 0); +MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)"); + +#define AT24_SIZE_BYTELEN 5 +#define AT24_SIZE_FLAGS 8 + +#define AT24_BITMASK(x) (BIT(x) - 1) + +/* create non-zero magic value for given eeprom parameters */ +#define AT24_DEVICE_MAGIC(_len, _flags) \ + ((1 << AT24_SIZE_FLAGS | (_flags)) \ + << AT24_SIZE_BYTELEN | ilog2(_len)) + +static const struct i2c_device_id at24_ids[] = { + /* needs 8 addresses as A0-A2 are ignored */ + { "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) }, + /* old variants can't be handled with this generic entry! */ + { "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) }, + { "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) }, + /* spd is a 24c02 in memory DIMMs */ + { "spd", AT24_DEVICE_MAGIC(2048 / 8, + AT24_FLAG_READONLY | AT24_FLAG_IRUGO) }, + { "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) }, + /* 24rf08 quirk is handled at i2c-core */ + { "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) }, + { "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) }, + { "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) }, + { "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) }, + { "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) }, + { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) }, + { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) }, + { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) }, + { "at24", 0 }, + { /* END OF LIST */ } +}; +MODULE_DEVICE_TABLE(i2c, at24_ids); + +/*-------------------------------------------------------------------------*/ + +/* + * This routine supports chips which consume multiple I2C addresses. It + * computes the addressing information to be used for a given r/w request. + * Assumes that sanity checks for offset happened at sysfs-layer. + */ +static struct i2c_client *at24_translate_offset(struct at24_data *at24, + unsigned *offset) +{ + unsigned i; + + if (at24->chip.flags & AT24_FLAG_ADDR16) { + i = *offset >> 16; + *offset &= 0xffff; + } else { + i = *offset >> 8; + *offset &= 0xff; + } + + return at24->client[i]; +} + +static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, + unsigned offset, size_t count) +{ + struct i2c_msg msg[2]; + u8 msgbuf[2]; + struct i2c_client *client; + unsigned long timeout, read_time; + int status, i; + + memset(msg, 0, sizeof(msg)); + + /* + * REVISIT some multi-address chips don't rollover page reads to + * the next slave address, so we may need to truncate the count. + * Those chips might need another quirk flag. + * + * If the real hardware used four adjacent 24c02 chips and that + * were misconfigured as one 24c08, that would be a similar effect: + * one "eeprom" file not four, but larger reads would fail when + * they crossed certain pages. + */ + + /* + * Slave address and byte offset derive from the offset. Always + * set the byte address; on a multi-master board, another master + * may have changed the chip's "current" address pointer. + */ + client = at24_translate_offset(at24, &offset); + + if (count > io_limit) + count = io_limit; + + switch (at24->use_smbus) { + case I2C_SMBUS_I2C_BLOCK_DATA: + /* Smaller eeproms can work given some SMBus extension calls */ + if (count > I2C_SMBUS_BLOCK_MAX) + count = I2C_SMBUS_BLOCK_MAX; + break; + case I2C_SMBUS_WORD_DATA: + count = 2; + break; + case I2C_SMBUS_BYTE_DATA: + count = 1; + break; + default: + /* + * When we have a better choice than SMBus calls, use a + * combined I2C message. Write address; then read up to + * io_limit data bytes. Note that read page rollover helps us + * here (unlike writes). msgbuf is u8 and will cast to our + * needs. + */ + i = 0; + if (at24->chip.flags & AT24_FLAG_ADDR16) + msgbuf[i++] = offset >> 8; + msgbuf[i++] = offset; + + msg[0].addr = client->addr; + msg[0].buf = msgbuf; + msg[0].len = i; + + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].buf = buf; + msg[1].len = count; + } + + /* + * Reads fail if the previous write didn't complete yet. We may + * loop a few times until this one succeeds, waiting at least + * long enough for one entire page write to work. + */ + timeout = jiffies + msecs_to_jiffies(write_timeout); + do { + read_time = jiffies; + switch (at24->use_smbus) { + case I2C_SMBUS_I2C_BLOCK_DATA: + status = i2c_smbus_read_i2c_block_data(client, offset, + count, buf); + break; + case I2C_SMBUS_WORD_DATA: + status = i2c_smbus_read_word_data(client, offset); + if (status >= 0) { + buf[0] = status & 0xff; + buf[1] = status >> 8; + status = count; + } + break; + case I2C_SMBUS_BYTE_DATA: + status = i2c_smbus_read_byte_data(client, offset); + if (status >= 0) { + buf[0] = status; + status = count; + } + break; + default: + status = i2c_transfer(client->adapter, msg, 2); + if (status == 2) + status = count; + } + dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", + count, offset, status, jiffies); + + if (status == count) + return count; + + /* REVISIT: at HZ=100, this is sloooow */ + msleep(1); + } while (time_before(read_time, timeout)); + + return -ETIMEDOUT; +} + +static ssize_t at24_read(struct at24_data *at24, + char *buf, loff_t off, size_t count) +{ + ssize_t retval = 0; + + if (unlikely(!count)) + return count; + + /* + * Read data from chip, protecting against concurrent updates + * from this host, but not from other I2C masters. + */ + mutex_lock(&at24->lock); + + while (count) { + ssize_t status; + + status = at24_eeprom_read(at24, buf, off, count); + if (status <= 0) { + if (retval == 0) + retval = status; + break; + } + buf += status; + off += status; + count -= status; + retval += status; + } + + mutex_unlock(&at24->lock); + + return retval; +} + +static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct at24_data *at24; + + at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); + return at24_read(at24, buf, off, count); +} + + +/* + * Note that if the hardware write-protect pin is pulled high, the whole + * chip is normally write protected. But there are plenty of product + * variants here, including OTP fuses and partial chip protect. + * + * We only use page mode writes; the alternative is sloooow. This routine + * writes at most one page. + */ +static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf, + unsigned offset, size_t count) +{ + struct i2c_client *client; + struct i2c_msg msg; + ssize_t status = 0; + unsigned long timeout, write_time; + unsigned next_page; + + /* Get corresponding I2C address and adjust offset */ + client = at24_translate_offset(at24, &offset); + + /* write_max is at most a page */ + if (count > at24->write_max) + count = at24->write_max; + + /* Never roll over backwards, to the start of this page */ + next_page = roundup(offset + 1, at24->chip.page_size); + if (offset + count > next_page) + count = next_page - offset; + + /* If we'll use I2C calls for I/O, set up the message */ + if (!at24->use_smbus) { + int i = 0; + + msg.addr = client->addr; + msg.flags = 0; + + /* msg.buf is u8 and casts will mask the values */ + msg.buf = at24->writebuf; + if (at24->chip.flags & AT24_FLAG_ADDR16) + msg.buf[i++] = offset >> 8; + + msg.buf[i++] = offset; + memcpy(&msg.buf[i], buf, count); + msg.len = i + count; + } + + /* + * Writes fail if the previous one didn't complete yet. We may + * loop a few times until this one succeeds, waiting at least + * long enough for one entire page write to work. + */ + timeout = jiffies + msecs_to_jiffies(write_timeout); + do { + write_time = jiffies; + if (at24->use_smbus_write) { + switch (at24->use_smbus_write) { + case I2C_SMBUS_I2C_BLOCK_DATA: + status = i2c_smbus_write_i2c_block_data(client, + offset, count, buf); + break; + case I2C_SMBUS_BYTE_DATA: + status = i2c_smbus_write_byte_data(client, + offset, buf[0]); + break; + } + + if (status == 0) + status = count; + } else { + status = i2c_transfer(client->adapter, &msg, 1); + if (status == 1) + status = count; + } + dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n", + count, offset, status, jiffies); + + if (status == count) + return count; + + /* REVISIT: at HZ=100, this is sloooow */ + msleep(1); + } while (time_before(write_time, timeout)); + + return -ETIMEDOUT; +} + +static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off, + size_t count) +{ + ssize_t retval = 0; + + if (unlikely(!count)) + return count; + + /* + * Write data to chip, protecting against concurrent updates + * from this host, but not from other I2C masters. + */ + mutex_lock(&at24->lock); + + while (count) { + ssize_t status; + + status = at24_eeprom_write(at24, buf, off, count); + if (status <= 0) { + if (retval == 0) + retval = status; + break; + } + buf += status; + off += status; + count -= status; + retval += status; + } + + mutex_unlock(&at24->lock); + + return retval; +} + +static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct at24_data *at24; + + if (unlikely(off >= attr->size)) + return -EFBIG; + + at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); + return at24_write(at24, buf, off, count); +} + +/*-------------------------------------------------------------------------*/ + +/* + * This lets other kernel code access the eeprom data. For example, it + * might hold a board's Ethernet address, or board-specific calibration + * data generated on the manufacturing floor. + */ + +static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf, + off_t offset, size_t count) +{ + struct at24_data *at24 = container_of(macc, struct at24_data, macc); + + return at24_read(at24, buf, offset, count); +} + +static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf, + off_t offset, size_t count) +{ + struct at24_data *at24 = container_of(macc, struct at24_data, macc); + + return at24_write(at24, buf, offset, count); +} + +/*-------------------------------------------------------------------------*/ + +#ifdef CONFIG_OF +static void at24_get_ofdata(struct i2c_client *client, + struct at24_platform_data *chip) +{ + const __be32 *val; + struct device_node *node = client->dev.of_node; + + if (node) { + if (of_get_property(node, "read-only", NULL)) + chip->flags |= AT24_FLAG_READONLY; + val = of_get_property(node, "pagesize", NULL); + if (val) + chip->page_size = be32_to_cpup(val); + } +} +#else +static void at24_get_ofdata(struct i2c_client *client, + struct at24_platform_data *chip) +{ } +#endif /* CONFIG_OF */ + +static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) +{ + struct at24_platform_data chip; + bool writable; + int use_smbus = 0; + int use_smbus_write = 0; + struct at24_data *at24; + int err; + unsigned i, num_addresses; + kernel_ulong_t magic; + + if (client->dev.platform_data) { + chip = *(struct at24_platform_data *)client->dev.platform_data; + } else { + if (!id->driver_data) + return -ENODEV; + + magic = id->driver_data; + chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN)); + magic >>= AT24_SIZE_BYTELEN; + chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS); + /* + * This is slow, but we can't know all eeproms, so we better + * play safe. Specifying custom eeprom-types via platform_data + * is recommended anyhow. + */ + chip.page_size = 1; + + /* update chipdata if OF is present */ + at24_get_ofdata(client, &chip); + + chip.setup = NULL; + chip.context = NULL; + } + + if (!is_power_of_2(chip.byte_len)) + dev_warn(&client->dev, + "byte_len looks suspicious (no power of 2)!\n"); + if (!chip.page_size) { + dev_err(&client->dev, "page_size must not be 0!\n"); + return -EINVAL; + } + if (!is_power_of_2(chip.page_size)) + dev_warn(&client->dev, + "page_size looks suspicious (no power of 2)!\n"); + + /* Use I2C operations unless we're stuck with SMBus extensions. */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + if (chip.flags & AT24_FLAG_ADDR16) + return -EPFNOSUPPORT; + + if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + use_smbus = I2C_SMBUS_I2C_BLOCK_DATA; + } else if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_WORD_DATA)) { + use_smbus = I2C_SMBUS_WORD_DATA; + } else if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + use_smbus = I2C_SMBUS_BYTE_DATA; + } else { + return -EPFNOSUPPORT; + } + } + + /* Use I2C operations unless we're stuck with SMBus extensions. */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) { + use_smbus_write = I2C_SMBUS_I2C_BLOCK_DATA; + } else if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) { + use_smbus_write = I2C_SMBUS_BYTE_DATA; + chip.page_size = 1; + } + } + + if (chip.flags & AT24_FLAG_TAKE8ADDR) + num_addresses = 8; + else + num_addresses = DIV_ROUND_UP(chip.byte_len, + (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256); + + at24 = devm_kzalloc(&client->dev, sizeof(struct at24_data) + + num_addresses * sizeof(struct i2c_client *), GFP_KERNEL); + if (!at24) + return -ENOMEM; + + mutex_init(&at24->lock); + at24->use_smbus = use_smbus; + at24->use_smbus_write = use_smbus_write; + at24->chip = chip; + at24->num_addresses = num_addresses; + + /* + * Export the EEPROM bytes through sysfs, since that's convenient. + * By default, only root should see the data (maybe passwords etc) + */ + sysfs_bin_attr_init(&at24->bin); + at24->bin.attr.name = "eeprom"; + at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR; + at24->bin.read = at24_bin_read; + at24->bin.size = chip.byte_len; + + at24->macc.read = at24_macc_read; + + writable = !(chip.flags & AT24_FLAG_READONLY); + if (writable) { + if (!use_smbus || use_smbus_write) { + + unsigned write_max = chip.page_size; + + at24->macc.write = at24_macc_write; + + at24->bin.write = at24_bin_write; + at24->bin.attr.mode |= S_IWUSR; + + if (write_max > io_limit) + write_max = io_limit; + if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX) + write_max = I2C_SMBUS_BLOCK_MAX; + at24->write_max = write_max; + + /* buffer (data + address at the beginning) */ + at24->writebuf = devm_kzalloc(&client->dev, + write_max + 2, GFP_KERNEL); + if (!at24->writebuf) + return -ENOMEM; + } else { + dev_warn(&client->dev, + "cannot write due to controller restrictions."); + } + } + + at24->client[0] = client; + + /* use dummy devices for multiple-address chips */ + for (i = 1; i < num_addresses; i++) { + at24->client[i] = i2c_new_dummy(client->adapter, + client->addr + i); + if (!at24->client[i]) { + dev_err(&client->dev, "address 0x%02x unavailable\n", + client->addr + i); + err = -EADDRINUSE; + goto err_clients; + } + } + + err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin); + if (err) + goto err_clients; + + i2c_set_clientdata(client, at24); + + dev_info(&client->dev, "%zu byte %s EEPROM, %s, %u bytes/write\n", + at24->bin.size, client->name, + writable ? "writable" : "read-only", at24->write_max); + if (use_smbus == I2C_SMBUS_WORD_DATA || + use_smbus == I2C_SMBUS_BYTE_DATA) { + dev_notice(&client->dev, "Falling back to %s reads, " + "performance will suffer\n", use_smbus == + I2C_SMBUS_WORD_DATA ? "word" : "byte"); + } + + /* export data to kernel code */ + if (chip.setup) + chip.setup(&at24->macc, chip.context); + + return 0; + +err_clients: + for (i = 1; i < num_addresses; i++) + if (at24->client[i]) + i2c_unregister_device(at24->client[i]); + + return err; +} + +static int at24_remove(struct i2c_client *client) +{ + struct at24_data *at24; + int i; + + at24 = i2c_get_clientdata(client); + sysfs_remove_bin_file(&client->dev.kobj, &at24->bin); + + for (i = 1; i < at24->num_addresses; i++) + i2c_unregister_device(at24->client[i]); + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +static struct i2c_driver at24_driver = { + .driver = { + .name = "at24", + .owner = THIS_MODULE, + }, + .probe = at24_probe, + .remove = at24_remove, + .id_table = at24_ids, +}; + +static int __init at24_init(void) +{ + if (!io_limit) { + pr_err("at24: io_limit must not be 0!\n"); + return -EINVAL; + } + + io_limit = rounddown_pow_of_two(io_limit); + return i2c_add_driver(&at24_driver); +} +module_init(at24_init); + +static void __exit at24_exit(void) +{ + i2c_del_driver(&at24_driver); +} +module_exit(at24_exit); + +MODULE_DESCRIPTION("Driver for most I2C EEPROMs"); +MODULE_AUTHOR("David Brownell and Wolfram Sang"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/eeprom/at25.c b/kernel/drivers/misc/eeprom/at25.c new file mode 100644 index 000000000..0a1af93ec --- /dev/null +++ b/kernel/drivers/misc/eeprom/at25.c @@ -0,0 +1,477 @@ +/* + * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models + * + * Copyright (C) 2006 David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/sched.h> + +#include <linux/spi/spi.h> +#include <linux/spi/eeprom.h> +#include <linux/property.h> + +/* + * NOTE: this is an *EEPROM* driver. The vagaries of product naming + * mean that some AT25 products are EEPROMs, and others are FLASH. + * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver, + * not this one! + */ + +struct at25_data { + struct spi_device *spi; + struct memory_accessor mem; + struct mutex lock; + struct spi_eeprom chip; + struct bin_attribute bin; + unsigned addrlen; +}; + +#define AT25_WREN 0x06 /* latch the write enable */ +#define AT25_WRDI 0x04 /* reset the write enable */ +#define AT25_RDSR 0x05 /* read status register */ +#define AT25_WRSR 0x01 /* write status register */ +#define AT25_READ 0x03 /* read byte(s) */ +#define AT25_WRITE 0x02 /* write byte(s)/sector */ + +#define AT25_SR_nRDY 0x01 /* nRDY = write-in-progress */ +#define AT25_SR_WEN 0x02 /* write enable (latched) */ +#define AT25_SR_BP0 0x04 /* BP for software writeprotect */ +#define AT25_SR_BP1 0x08 +#define AT25_SR_WPEN 0x80 /* writeprotect enable */ + +#define AT25_INSTR_BIT3 0x08 /* Additional address bit in instr */ + +#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ + +/* Specs often allow 5 msec for a page write, sometimes 20 msec; + * it's important to recover from write timeouts. + */ +#define EE_TIMEOUT 25 + +/*-------------------------------------------------------------------------*/ + +#define io_limit PAGE_SIZE /* bytes */ + +static ssize_t +at25_ee_read( + struct at25_data *at25, + char *buf, + unsigned offset, + size_t count +) +{ + u8 command[EE_MAXADDRLEN + 1]; + u8 *cp; + ssize_t status; + struct spi_transfer t[2]; + struct spi_message m; + u8 instr; + + if (unlikely(offset >= at25->bin.size)) + return 0; + if ((offset + count) > at25->bin.size) + count = at25->bin.size - offset; + if (unlikely(!count)) + return count; + + cp = command; + + instr = AT25_READ; + if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) + if (offset >= (1U << (at25->addrlen * 8))) + instr |= AT25_INSTR_BIT3; + *cp++ = instr; + + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = offset >> 16; + case 2: + *cp++ = offset >> 8; + case 1: + case 0: /* can't happen: for better codegen */ + *cp++ = offset >> 0; + } + + spi_message_init(&m); + memset(t, 0, sizeof t); + + t[0].tx_buf = command; + t[0].len = at25->addrlen + 1; + spi_message_add_tail(&t[0], &m); + + t[1].rx_buf = buf; + t[1].len = count; + spi_message_add_tail(&t[1], &m); + + mutex_lock(&at25->lock); + + /* Read it all at once. + * + * REVISIT that's potentially a problem with large chips, if + * other devices on the bus need to be accessed regularly or + * this chip is clocked very slowly + */ + status = spi_sync(at25->spi, &m); + dev_dbg(&at25->spi->dev, + "read %Zd bytes at %d --> %d\n", + count, offset, (int) status); + + mutex_unlock(&at25->lock); + return status ? status : count; +} + +static ssize_t +at25_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct device *dev; + struct at25_data *at25; + + dev = container_of(kobj, struct device, kobj); + at25 = dev_get_drvdata(dev); + + return at25_ee_read(at25, buf, off, count); +} + + +static ssize_t +at25_ee_write(struct at25_data *at25, const char *buf, loff_t off, + size_t count) +{ + ssize_t status = 0; + unsigned written = 0; + unsigned buf_size; + u8 *bounce; + + if (unlikely(off >= at25->bin.size)) + return -EFBIG; + if ((off + count) > at25->bin.size) + count = at25->bin.size - off; + if (unlikely(!count)) + return count; + + /* Temp buffer starts with command and address */ + buf_size = at25->chip.page_size; + if (buf_size > io_limit) + buf_size = io_limit; + bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL); + if (!bounce) + return -ENOMEM; + + /* For write, rollover is within the page ... so we write at + * most one page, then manually roll over to the next page. + */ + mutex_lock(&at25->lock); + do { + unsigned long timeout, retries; + unsigned segment; + unsigned offset = (unsigned) off; + u8 *cp = bounce; + int sr; + u8 instr; + + *cp = AT25_WREN; + status = spi_write(at25->spi, cp, 1); + if (status < 0) { + dev_dbg(&at25->spi->dev, "WREN --> %d\n", + (int) status); + break; + } + + instr = AT25_WRITE; + if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) + if (offset >= (1U << (at25->addrlen * 8))) + instr |= AT25_INSTR_BIT3; + *cp++ = instr; + + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = offset >> 16; + case 2: + *cp++ = offset >> 8; + case 1: + case 0: /* can't happen: for better codegen */ + *cp++ = offset >> 0; + } + + /* Write as much of a page as we can */ + segment = buf_size - (offset % buf_size); + if (segment > count) + segment = count; + memcpy(cp, buf, segment); + status = spi_write(at25->spi, bounce, + segment + at25->addrlen + 1); + dev_dbg(&at25->spi->dev, + "write %u bytes at %u --> %d\n", + segment, offset, (int) status); + if (status < 0) + break; + + /* REVISIT this should detect (or prevent) failed writes + * to readonly sections of the EEPROM... + */ + + /* Wait for non-busy status */ + timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT); + retries = 0; + do { + + sr = spi_w8r8(at25->spi, AT25_RDSR); + if (sr < 0 || (sr & AT25_SR_nRDY)) { + dev_dbg(&at25->spi->dev, + "rdsr --> %d (%02x)\n", sr, sr); + /* at HZ=100, this is sloooow */ + msleep(1); + continue; + } + if (!(sr & AT25_SR_nRDY)) + break; + } while (retries++ < 3 || time_before_eq(jiffies, timeout)); + + if ((sr < 0) || (sr & AT25_SR_nRDY)) { + dev_err(&at25->spi->dev, + "write %d bytes offset %d, " + "timeout after %u msecs\n", + segment, offset, + jiffies_to_msecs(jiffies - + (timeout - EE_TIMEOUT))); + status = -ETIMEDOUT; + break; + } + + off += segment; + buf += segment; + count -= segment; + written += segment; + + } while (count > 0); + + mutex_unlock(&at25->lock); + + kfree(bounce); + return written ? written : status; +} + +static ssize_t +at25_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct device *dev; + struct at25_data *at25; + + dev = container_of(kobj, struct device, kobj); + at25 = dev_get_drvdata(dev); + + return at25_ee_write(at25, buf, off, count); +} + +/*-------------------------------------------------------------------------*/ + +/* Let in-kernel code access the eeprom data. */ + +static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf, + off_t offset, size_t count) +{ + struct at25_data *at25 = container_of(mem, struct at25_data, mem); + + return at25_ee_read(at25, buf, offset, count); +} + +static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf, + off_t offset, size_t count) +{ + struct at25_data *at25 = container_of(mem, struct at25_data, mem); + + return at25_ee_write(at25, buf, offset, count); +} + +/*-------------------------------------------------------------------------*/ + +static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip) +{ + u32 val; + + memset(chip, 0, sizeof(*chip)); + strncpy(chip->name, "at25", sizeof(chip->name)); + + if (device_property_read_u32(dev, "size", &val) == 0 || + device_property_read_u32(dev, "at25,byte-len", &val) == 0) { + chip->byte_len = val; + } else { + dev_err(dev, "Error: missing \"size\" property\n"); + return -ENODEV; + } + + if (device_property_read_u32(dev, "pagesize", &val) == 0 || + device_property_read_u32(dev, "at25,page-size", &val) == 0) { + chip->page_size = (u16)val; + } else { + dev_err(dev, "Error: missing \"pagesize\" property\n"); + return -ENODEV; + } + + if (device_property_read_u32(dev, "at25,addr-mode", &val) == 0) { + chip->flags = (u16)val; + } else { + if (device_property_read_u32(dev, "address-width", &val)) { + dev_err(dev, + "Error: missing \"address-width\" property\n"); + return -ENODEV; + } + switch (val) { + case 8: + chip->flags |= EE_ADDR1; + break; + case 16: + chip->flags |= EE_ADDR2; + break; + case 24: + chip->flags |= EE_ADDR3; + break; + default: + dev_err(dev, + "Error: bad \"address-width\" property: %u\n", + val); + return -ENODEV; + } + if (device_property_present(dev, "read-only")) + chip->flags |= EE_READONLY; + } + return 0; +} + +static int at25_probe(struct spi_device *spi) +{ + struct at25_data *at25 = NULL; + struct spi_eeprom chip; + int err; + int sr; + int addrlen; + + /* Chip description */ + if (!spi->dev.platform_data) { + err = at25_fw_to_chip(&spi->dev, &chip); + if (err) + return err; + } else + chip = *(struct spi_eeprom *)spi->dev.platform_data; + + /* For now we only support 8/16/24 bit addressing */ + if (chip.flags & EE_ADDR1) + addrlen = 1; + else if (chip.flags & EE_ADDR2) + addrlen = 2; + else if (chip.flags & EE_ADDR3) + addrlen = 3; + else { + dev_dbg(&spi->dev, "unsupported address type\n"); + return -EINVAL; + } + + /* Ping the chip ... the status register is pretty portable, + * unlike probing manufacturer IDs. We do expect that system + * firmware didn't write it in the past few milliseconds! + */ + sr = spi_w8r8(spi, AT25_RDSR); + if (sr < 0 || sr & AT25_SR_nRDY) { + dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr); + return -ENXIO; + } + + at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL); + if (!at25) + return -ENOMEM; + + mutex_init(&at25->lock); + at25->chip = chip; + at25->spi = spi_dev_get(spi); + spi_set_drvdata(spi, at25); + at25->addrlen = addrlen; + + /* Export the EEPROM bytes through sysfs, since that's convenient. + * And maybe to other kernel code; it might hold a board's Ethernet + * address, or board-specific calibration data generated on the + * manufacturing floor. + * + * Default to root-only access to the data; EEPROMs often hold data + * that's sensitive for read and/or write, like ethernet addresses, + * security codes, board-specific manufacturing calibrations, etc. + */ + sysfs_bin_attr_init(&at25->bin); + at25->bin.attr.name = "eeprom"; + at25->bin.attr.mode = S_IRUSR; + at25->bin.read = at25_bin_read; + at25->mem.read = at25_mem_read; + + at25->bin.size = at25->chip.byte_len; + if (!(chip.flags & EE_READONLY)) { + at25->bin.write = at25_bin_write; + at25->bin.attr.mode |= S_IWUSR; + at25->mem.write = at25_mem_write; + } + + err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin); + if (err) + return err; + + if (chip.setup) + chip.setup(&at25->mem, chip.context); + + dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n", + (at25->bin.size < 1024) + ? at25->bin.size + : (at25->bin.size / 1024), + (at25->bin.size < 1024) ? "Byte" : "KByte", + at25->chip.name, + (chip.flags & EE_READONLY) ? " (readonly)" : "", + at25->chip.page_size); + return 0; +} + +static int at25_remove(struct spi_device *spi) +{ + struct at25_data *at25; + + at25 = spi_get_drvdata(spi); + sysfs_remove_bin_file(&spi->dev.kobj, &at25->bin); + return 0; +} + +/*-------------------------------------------------------------------------*/ + +static const struct of_device_id at25_of_match[] = { + { .compatible = "atmel,at25", }, + { } +}; +MODULE_DEVICE_TABLE(of, at25_of_match); + +static struct spi_driver at25_driver = { + .driver = { + .name = "at25", + .owner = THIS_MODULE, + .of_match_table = at25_of_match, + }, + .probe = at25_probe, + .remove = at25_remove, +}; + +module_spi_driver(at25_driver); + +MODULE_DESCRIPTION("Driver for most SPI EEPROMs"); +MODULE_AUTHOR("David Brownell"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:at25"); diff --git a/kernel/drivers/misc/eeprom/digsy_mtc_eeprom.c b/kernel/drivers/misc/eeprom/digsy_mtc_eeprom.c new file mode 100644 index 000000000..66d9e1bae --- /dev/null +++ b/kernel/drivers/misc/eeprom/digsy_mtc_eeprom.c @@ -0,0 +1,85 @@ +/* + * EEPROMs access control driver for display configuration EEPROMs + * on DigsyMTC board. + * + * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/gpio.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/spi/spi.h> +#include <linux/spi/spi_gpio.h> +#include <linux/eeprom_93xx46.h> + +#define GPIO_EEPROM_CLK 216 +#define GPIO_EEPROM_CS 210 +#define GPIO_EEPROM_DI 217 +#define GPIO_EEPROM_DO 249 +#define GPIO_EEPROM_OE 255 +#define EE_SPI_BUS_NUM 1 + +static void digsy_mtc_op_prepare(void *p) +{ + /* enable */ + gpio_set_value(GPIO_EEPROM_OE, 0); +} + +static void digsy_mtc_op_finish(void *p) +{ + /* disable */ + gpio_set_value(GPIO_EEPROM_OE, 1); +} + +struct eeprom_93xx46_platform_data digsy_mtc_eeprom_data = { + .flags = EE_ADDR8, + .prepare = digsy_mtc_op_prepare, + .finish = digsy_mtc_op_finish, +}; + +static struct spi_gpio_platform_data eeprom_spi_gpio_data = { + .sck = GPIO_EEPROM_CLK, + .mosi = GPIO_EEPROM_DI, + .miso = GPIO_EEPROM_DO, + .num_chipselect = 1, +}; + +static struct platform_device digsy_mtc_eeprom = { + .name = "spi_gpio", + .id = EE_SPI_BUS_NUM, + .dev = { + .platform_data = &eeprom_spi_gpio_data, + }, +}; + +static struct spi_board_info digsy_mtc_eeprom_info[] __initdata = { + { + .modalias = "93xx46", + .max_speed_hz = 1000000, + .bus_num = EE_SPI_BUS_NUM, + .chip_select = 0, + .mode = SPI_MODE_0, + .controller_data = (void *)GPIO_EEPROM_CS, + .platform_data = &digsy_mtc_eeprom_data, + }, +}; + +static int __init digsy_mtc_eeprom_devices_init(void) +{ + int ret; + + ret = gpio_request_one(GPIO_EEPROM_OE, GPIOF_OUT_INIT_HIGH, + "93xx46 EEPROMs OE"); + if (ret) { + pr_err("can't request gpio %d\n", GPIO_EEPROM_OE); + return ret; + } + spi_register_board_info(digsy_mtc_eeprom_info, + ARRAY_SIZE(digsy_mtc_eeprom_info)); + return platform_device_register(&digsy_mtc_eeprom); +} +device_initcall(digsy_mtc_eeprom_devices_init); diff --git a/kernel/drivers/misc/eeprom/eeprom.c b/kernel/drivers/misc/eeprom/eeprom.c new file mode 100644 index 000000000..b432873de --- /dev/null +++ b/kernel/drivers/misc/eeprom/eeprom.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and + * Philip Edelbrock <phil@netroedge.com> + * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2003 IBM Corp. + * Copyright (C) 2004 Jean Delvare <jdelvare@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/mutex.h> + +/* Addresses to scan */ +static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, I2C_CLIENT_END }; + + +/* Size of EEPROM in bytes */ +#define EEPROM_SIZE 256 + +/* possible types of eeprom devices */ +enum eeprom_nature { + UNKNOWN, + VAIO, +}; + +/* Each client has this additional data */ +struct eeprom_data { + struct mutex update_lock; + u8 valid; /* bitfield, bit!=0 if slice is valid */ + unsigned long last_updated[8]; /* In jiffies, 8 slices */ + u8 data[EEPROM_SIZE]; /* Register values */ + enum eeprom_nature nature; +}; + + +static void eeprom_update_client(struct i2c_client *client, u8 slice) +{ + struct eeprom_data *data = i2c_get_clientdata(client); + int i; + + mutex_lock(&data->update_lock); + + if (!(data->valid & (1 << slice)) || + time_after(jiffies, data->last_updated[slice] + 300 * HZ)) { + dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice); + + if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + for (i = slice << 5; i < (slice + 1) << 5; i += 32) + if (i2c_smbus_read_i2c_block_data(client, i, + 32, data->data + i) + != 32) + goto exit; + } else { + for (i = slice << 5; i < (slice + 1) << 5; i += 2) { + int word = i2c_smbus_read_word_data(client, i); + if (word < 0) + goto exit; + data->data[i] = word & 0xff; + data->data[i + 1] = word >> 8; + } + } + data->last_updated[slice] = jiffies; + data->valid |= (1 << slice); + } +exit: + mutex_unlock(&data->update_lock); +} + +static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj)); + struct eeprom_data *data = i2c_get_clientdata(client); + u8 slice; + + if (off > EEPROM_SIZE) + return 0; + if (off + count > EEPROM_SIZE) + count = EEPROM_SIZE - off; + + /* Only refresh slices which contain requested bytes */ + for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++) + eeprom_update_client(client, slice); + + /* Hide Vaio private settings to regular users: + - BIOS passwords: bytes 0x00 to 0x0f + - UUID: bytes 0x10 to 0x1f + - Serial number: 0xc0 to 0xdf */ + if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) { + int i; + + for (i = 0; i < count; i++) { + if ((off + i <= 0x1f) || + (off + i >= 0xc0 && off + i <= 0xdf)) + buf[i] = 0; + else + buf[i] = data->data[off + i]; + } + } else { + memcpy(buf, &data->data[off], count); + } + + return count; +} + +static struct bin_attribute eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO, + }, + .size = EEPROM_SIZE, + .read = eeprom_read, +}; + +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + + /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all + addresses 0x50-0x57, but we only care about 0x50. So decline + attaching to addresses >= 0x51 on DDC buses */ + if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51) + return -ENODEV; + + /* There are four ways we can read the EEPROM data: + (1) I2C block reads (faster, but unsupported by most adapters) + (2) Word reads (128% overhead) + (3) Consecutive byte reads (88% overhead, unsafe) + (4) Regular byte data reads (265% overhead) + The third and fourth methods are not implemented by this driver + because all known adapters support one of the first two. */ + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA) + && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) + return -ENODEV; + + strlcpy(info->type, "eeprom", I2C_NAME_SIZE); + + return 0; +} + +static int eeprom_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct eeprom_data *data; + + data = devm_kzalloc(&client->dev, sizeof(struct eeprom_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + memset(data->data, 0xff, EEPROM_SIZE); + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + data->nature = UNKNOWN; + + /* Detect the Vaio nature of EEPROMs. + We use the "PCG-" or "VGN-" prefix as the signature. */ + if (client->addr == 0x57 + && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + char name[4]; + + name[0] = i2c_smbus_read_byte_data(client, 0x80); + name[1] = i2c_smbus_read_byte_data(client, 0x81); + name[2] = i2c_smbus_read_byte_data(client, 0x82); + name[3] = i2c_smbus_read_byte_data(client, 0x83); + + if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) { + dev_info(&client->dev, "Vaio EEPROM detected, " + "enabling privacy protection\n"); + data->nature = VAIO; + } + } + + /* create the sysfs eeprom file */ + return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); +} + +static int eeprom_remove(struct i2c_client *client) +{ + sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr); + + return 0; +} + +static const struct i2c_device_id eeprom_id[] = { + { "eeprom", 0 }, + { } +}; + +static struct i2c_driver eeprom_driver = { + .driver = { + .name = "eeprom", + }, + .probe = eeprom_probe, + .remove = eeprom_remove, + .id_table = eeprom_id, + + .class = I2C_CLASS_DDC | I2C_CLASS_SPD, + .detect = eeprom_detect, + .address_list = normal_i2c, +}; + +module_i2c_driver(eeprom_driver); + +MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and " + "Philip Edelbrock <phil@netroedge.com> and " + "Greg Kroah-Hartman <greg@kroah.com>"); +MODULE_DESCRIPTION("I2C EEPROM driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/eeprom/eeprom_93cx6.c b/kernel/drivers/misc/eeprom/eeprom_93cx6.c new file mode 100644 index 000000000..0cf2c9d67 --- /dev/null +++ b/kernel/drivers/misc/eeprom/eeprom_93cx6.c @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2004 - 2006 rt2x00 SourceForge Project + * <http://rt2x00.serialmonkey.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Module: eeprom_93cx6 + * Abstract: EEPROM reader routines for 93cx6 chipsets. + * Supported chipsets: 93c46 & 93c66. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/eeprom_93cx6.h> + +MODULE_AUTHOR("http://rt2x00.serialmonkey.com"); +MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("EEPROM 93cx6 chip driver"); +MODULE_LICENSE("GPL"); + +static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom) +{ + eeprom->reg_data_clock = 1; + eeprom->register_write(eeprom); + + /* + * Add a short delay for the pulse to work. + * According to the specifications the "maximum minimum" + * time should be 450ns. + */ + ndelay(450); +} + +static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom) +{ + eeprom->reg_data_clock = 0; + eeprom->register_write(eeprom); + + /* + * Add a short delay for the pulse to work. + * According to the specifications the "maximum minimum" + * time should be 450ns. + */ + ndelay(450); +} + +static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom) +{ + /* + * Clear all flags, and enable chip select. + */ + eeprom->register_read(eeprom); + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->reg_data_clock = 0; + eeprom->reg_chip_select = 1; + eeprom->drive_data = 1; + eeprom->register_write(eeprom); + + /* + * kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); +} + +static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom) +{ + /* + * Clear chip_select and data_in flags. + */ + eeprom->register_read(eeprom); + eeprom->reg_data_in = 0; + eeprom->reg_chip_select = 0; + eeprom->register_write(eeprom); + + /* + * kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); +} + +static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom, + const u16 data, const u16 count) +{ + unsigned int i; + + eeprom->register_read(eeprom); + + /* + * Clear data flags. + */ + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->drive_data = 1; + + /* + * Start writing all bits. + */ + for (i = count; i > 0; i--) { + /* + * Check if this bit needs to be set. + */ + eeprom->reg_data_in = !!(data & (1 << (i - 1))); + + /* + * Write the bit to the eeprom register. + */ + eeprom->register_write(eeprom); + + /* + * Kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); + } + + eeprom->reg_data_in = 0; + eeprom->register_write(eeprom); +} + +static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom, + u16 *data, const u16 count) +{ + unsigned int i; + u16 buf = 0; + + eeprom->register_read(eeprom); + + /* + * Clear data flags. + */ + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->drive_data = 0; + + /* + * Start reading all bits. + */ + for (i = count; i > 0; i--) { + eeprom_93cx6_pulse_high(eeprom); + + eeprom->register_read(eeprom); + + /* + * Clear data_in flag. + */ + eeprom->reg_data_in = 0; + + /* + * Read if the bit has been set. + */ + if (eeprom->reg_data_out) + buf |= (1 << (i - 1)); + + eeprom_93cx6_pulse_low(eeprom); + } + + *data = buf; +} + +/** + * eeprom_93cx6_read - Read a word from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Word index from where we should start reading + * @data: target pointer where the information will have to be stored + * + * This function will read the eeprom data as host-endian word + * into the given data pointer. + */ +void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word, + u16 *data) +{ + u16 command; + + /* + * Initialize the eeprom register + */ + eeprom_93cx6_startup(eeprom); + + /* + * Select the read opcode and the word to be read. + */ + command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word; + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + /* + * Read the requested 16 bits. + */ + eeprom_93cx6_read_bits(eeprom, data, 16); + + /* + * Cleanup eeprom register. + */ + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_read); + +/** + * eeprom_93cx6_multiread - Read multiple words from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Word index from where we should start reading + * @data: target pointer where the information will have to be stored + * @words: Number of words that should be read. + * + * This function will read all requested words from the eeprom, + * this is done by calling eeprom_93cx6_read() multiple times. + * But with the additional change that while the eeprom_93cx6_read + * will return host ordered bytes, this method will return little + * endian words. + */ +void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word, + __le16 *data, const u16 words) +{ + unsigned int i; + u16 tmp; + + for (i = 0; i < words; i++) { + tmp = 0; + eeprom_93cx6_read(eeprom, word + i, &tmp); + data[i] = cpu_to_le16(tmp); + } +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread); + +/** + * eeprom_93cx6_readb - Read a byte from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Byte index from where we should start reading + * @data: target pointer where the information will have to be stored + * + * This function will read a byte of the eeprom data + * into the given data pointer. + */ +void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte, + u8 *data) +{ + u16 command; + u16 tmp; + + /* + * Initialize the eeprom register + */ + eeprom_93cx6_startup(eeprom); + + /* + * Select the read opcode and the byte to be read. + */ + command = (PCI_EEPROM_READ_OPCODE << (eeprom->width + 1)) | byte; + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1); + + /* + * Read the requested 8 bits. + */ + eeprom_93cx6_read_bits(eeprom, &tmp, 8); + *data = tmp & 0xff; + + /* + * Cleanup eeprom register. + */ + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_readb); + +/** + * eeprom_93cx6_multireadb - Read multiple bytes from eeprom + * @eeprom: Pointer to eeprom structure + * @byte: Index from where we should start reading + * @data: target pointer where the information will have to be stored + * @words: Number of bytes that should be read. + * + * This function will read all requested bytes from the eeprom, + * this is done by calling eeprom_93cx6_readb() multiple times. + */ +void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom, const u8 byte, + u8 *data, const u16 bytes) +{ + unsigned int i; + + for (i = 0; i < bytes; i++) + eeprom_93cx6_readb(eeprom, byte + i, &data[i]); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_multireadb); + +/** + * eeprom_93cx6_wren - set the write enable state + * @eeprom: Pointer to eeprom structure + * @enable: true to enable writes, otherwise disable writes + * + * Set the EEPROM write enable state to either allow or deny + * writes depending on the @enable value. + */ +void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable) +{ + u16 command; + + /* start the command */ + eeprom_93cx6_startup(eeprom); + + /* create command to enable/disable */ + + command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE; + command <<= (eeprom->width - 2); + + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_wren); + +/** + * eeprom_93cx6_write - write data to the EEPROM + * @eeprom: Pointer to eeprom structure + * @addr: Address to write data to. + * @data: The data to write to address @addr. + * + * Write the @data to the specified @addr in the EEPROM and + * waiting for the device to finish writing. + * + * Note, since we do not expect large number of write operations + * we delay in between parts of the operation to avoid using excessive + * amounts of CPU time busy waiting. + */ +void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data) +{ + int timeout = 100; + u16 command; + + /* start the command */ + eeprom_93cx6_startup(eeprom); + + command = PCI_EEPROM_WRITE_OPCODE << eeprom->width; + command |= addr; + + /* send write command */ + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + /* send data */ + eeprom_93cx6_write_bits(eeprom, data, 16); + + /* get ready to check for busy */ + eeprom->drive_data = 0; + eeprom->reg_chip_select = 1; + eeprom->register_write(eeprom); + + /* wait at-least 250ns to get DO to be the busy signal */ + usleep_range(1000, 2000); + + /* wait for DO to go high to signify finish */ + + while (true) { + eeprom->register_read(eeprom); + + if (eeprom->reg_data_out) + break; + + usleep_range(1000, 2000); + + if (--timeout <= 0) { + printk(KERN_ERR "%s: timeout\n", __func__); + break; + } + } + + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_write); diff --git a/kernel/drivers/misc/eeprom/eeprom_93xx46.c b/kernel/drivers/misc/eeprom/eeprom_93xx46.c new file mode 100644 index 000000000..9ebeacdb8 --- /dev/null +++ b/kernel/drivers/misc/eeprom/eeprom_93xx46.c @@ -0,0 +1,398 @@ +/* + * Driver for 93xx46 EEPROMs + * + * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/spi/spi.h> +#include <linux/sysfs.h> +#include <linux/eeprom_93xx46.h> + +#define OP_START 0x4 +#define OP_WRITE (OP_START | 0x1) +#define OP_READ (OP_START | 0x2) +#define ADDR_EWDS 0x00 +#define ADDR_ERAL 0x20 +#define ADDR_EWEN 0x30 + +struct eeprom_93xx46_dev { + struct spi_device *spi; + struct eeprom_93xx46_platform_data *pdata; + struct bin_attribute bin; + struct mutex lock; + int addrlen; +}; + +static ssize_t +eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct eeprom_93xx46_dev *edev; + struct device *dev; + struct spi_message m; + struct spi_transfer t[2]; + int bits, ret; + u16 cmd_addr; + + dev = container_of(kobj, struct device, kobj); + edev = dev_get_drvdata(dev); + + if (unlikely(off >= edev->bin.size)) + return 0; + if ((off + count) > edev->bin.size) + count = edev->bin.size - off; + if (unlikely(!count)) + return count; + + cmd_addr = OP_READ << edev->addrlen; + + if (edev->addrlen == 7) { + cmd_addr |= off & 0x7f; + bits = 10; + } else { + cmd_addr |= off & 0x3f; + bits = 9; + } + + dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n", + cmd_addr, edev->spi->max_speed_hz); + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = (char *)&cmd_addr; + t[0].len = 2; + t[0].bits_per_word = bits; + spi_message_add_tail(&t[0], &m); + + t[1].rx_buf = buf; + t[1].len = count; + t[1].bits_per_word = 8; + spi_message_add_tail(&t[1], &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + /* have to wait at least Tcsl ns */ + ndelay(250); + if (ret) { + dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n", + count, (int)off, ret); + } + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + return ret ? : count; +} + +static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on) +{ + struct spi_message m; + struct spi_transfer t; + int bits, ret; + u16 cmd_addr; + + cmd_addr = OP_START << edev->addrlen; + if (edev->addrlen == 7) { + cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS) << 1; + bits = 10; + } else { + cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS); + bits = 9; + } + + dev_dbg(&edev->spi->dev, "ew cmd 0x%04x\n", cmd_addr); + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + + t.tx_buf = &cmd_addr; + t.len = 2; + t.bits_per_word = bits; + spi_message_add_tail(&t, &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + /* have to wait at least Tcsl ns */ + ndelay(250); + if (ret) + dev_err(&edev->spi->dev, "erase/write %sable error %d\n", + is_on ? "en" : "dis", ret); + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + return ret; +} + +static ssize_t +eeprom_93xx46_write_word(struct eeprom_93xx46_dev *edev, + const char *buf, unsigned off) +{ + struct spi_message m; + struct spi_transfer t[2]; + int bits, data_len, ret; + u16 cmd_addr; + + cmd_addr = OP_WRITE << edev->addrlen; + + if (edev->addrlen == 7) { + cmd_addr |= off & 0x7f; + bits = 10; + data_len = 1; + } else { + cmd_addr |= off & 0x3f; + bits = 9; + data_len = 2; + } + + dev_dbg(&edev->spi->dev, "write cmd 0x%x\n", cmd_addr); + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = (char *)&cmd_addr; + t[0].len = 2; + t[0].bits_per_word = bits; + spi_message_add_tail(&t[0], &m); + + t[1].tx_buf = buf; + t[1].len = data_len; + t[1].bits_per_word = 8; + spi_message_add_tail(&t[1], &m); + + ret = spi_sync(edev->spi, &m); + /* have to wait program cycle time Twc ms */ + mdelay(6); + return ret; +} + +static ssize_t +eeprom_93xx46_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct eeprom_93xx46_dev *edev; + struct device *dev; + int i, ret, step = 1; + + dev = container_of(kobj, struct device, kobj); + edev = dev_get_drvdata(dev); + + if (unlikely(off >= edev->bin.size)) + return -EFBIG; + if ((off + count) > edev->bin.size) + count = edev->bin.size - off; + if (unlikely(!count)) + return count; + + /* only write even number of bytes on 16-bit devices */ + if (edev->addrlen == 6) { + step = 2; + count &= ~1; + } + + /* erase/write enable */ + ret = eeprom_93xx46_ew(edev, 1); + if (ret) + return ret; + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + for (i = 0; i < count; i += step) { + ret = eeprom_93xx46_write_word(edev, &buf[i], off + i); + if (ret) { + dev_err(&edev->spi->dev, "write failed at %d: %d\n", + (int)off + i, ret); + break; + } + } + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + + /* erase/write disable */ + eeprom_93xx46_ew(edev, 0); + return ret ? : count; +} + +static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev) +{ + struct eeprom_93xx46_platform_data *pd = edev->pdata; + struct spi_message m; + struct spi_transfer t; + int bits, ret; + u16 cmd_addr; + + cmd_addr = OP_START << edev->addrlen; + if (edev->addrlen == 7) { + cmd_addr |= ADDR_ERAL << 1; + bits = 10; + } else { + cmd_addr |= ADDR_ERAL; + bits = 9; + } + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + + t.tx_buf = &cmd_addr; + t.len = 2; + t.bits_per_word = bits; + spi_message_add_tail(&t, &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + if (ret) + dev_err(&edev->spi->dev, "erase error %d\n", ret); + /* have to wait erase cycle time Tec ms */ + mdelay(6); + + if (pd->finish) + pd->finish(edev); + + mutex_unlock(&edev->lock); + return ret; +} + +static ssize_t eeprom_93xx46_store_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct eeprom_93xx46_dev *edev = dev_get_drvdata(dev); + int erase = 0, ret; + + sscanf(buf, "%d", &erase); + if (erase) { + ret = eeprom_93xx46_ew(edev, 1); + if (ret) + return ret; + ret = eeprom_93xx46_eral(edev); + if (ret) + return ret; + ret = eeprom_93xx46_ew(edev, 0); + if (ret) + return ret; + } + return count; +} +static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase); + +static int eeprom_93xx46_probe(struct spi_device *spi) +{ + struct eeprom_93xx46_platform_data *pd; + struct eeprom_93xx46_dev *edev; + int err; + + pd = spi->dev.platform_data; + if (!pd) { + dev_err(&spi->dev, "missing platform data\n"); + return -ENODEV; + } + + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + + if (pd->flags & EE_ADDR8) + edev->addrlen = 7; + else if (pd->flags & EE_ADDR16) + edev->addrlen = 6; + else { + dev_err(&spi->dev, "unspecified address type\n"); + err = -EINVAL; + goto fail; + } + + mutex_init(&edev->lock); + + edev->spi = spi_dev_get(spi); + edev->pdata = pd; + + sysfs_bin_attr_init(&edev->bin); + edev->bin.attr.name = "eeprom"; + edev->bin.attr.mode = S_IRUSR; + edev->bin.read = eeprom_93xx46_bin_read; + edev->bin.size = 128; + if (!(pd->flags & EE_READONLY)) { + edev->bin.write = eeprom_93xx46_bin_write; + edev->bin.attr.mode |= S_IWUSR; + } + + err = sysfs_create_bin_file(&spi->dev.kobj, &edev->bin); + if (err) + goto fail; + + dev_info(&spi->dev, "%d-bit eeprom %s\n", + (pd->flags & EE_ADDR8) ? 8 : 16, + (pd->flags & EE_READONLY) ? "(readonly)" : ""); + + if (!(pd->flags & EE_READONLY)) { + if (device_create_file(&spi->dev, &dev_attr_erase)) + dev_err(&spi->dev, "can't create erase interface\n"); + } + + spi_set_drvdata(spi, edev); + return 0; +fail: + kfree(edev); + return err; +} + +static int eeprom_93xx46_remove(struct spi_device *spi) +{ + struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi); + + if (!(edev->pdata->flags & EE_READONLY)) + device_remove_file(&spi->dev, &dev_attr_erase); + + sysfs_remove_bin_file(&spi->dev.kobj, &edev->bin); + kfree(edev); + return 0; +} + +static struct spi_driver eeprom_93xx46_driver = { + .driver = { + .name = "93xx46", + .owner = THIS_MODULE, + }, + .probe = eeprom_93xx46_probe, + .remove = eeprom_93xx46_remove, +}; + +module_spi_driver(eeprom_93xx46_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs"); +MODULE_AUTHOR("Anatolij Gustschin <agust@denx.de>"); +MODULE_ALIAS("spi:93xx46"); diff --git a/kernel/drivers/misc/eeprom/max6875.c b/kernel/drivers/misc/eeprom/max6875.c new file mode 100644 index 000000000..580ff9df5 --- /dev/null +++ b/kernel/drivers/misc/eeprom/max6875.c @@ -0,0 +1,214 @@ +/* + * max6875.c - driver for MAX6874/MAX6875 + * + * Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com> + * + * Based on eeprom.c + * + * The MAX6875 has a bank of registers and two banks of EEPROM. + * Address ranges are defined as follows: + * * 0x0000 - 0x0046 = configuration registers + * * 0x8000 - 0x8046 = configuration EEPROM + * * 0x8100 - 0x82FF = user EEPROM + * + * This driver makes the user EEPROM available for read. + * + * The registers & config EEPROM should be accessed via i2c-dev. + * + * The MAX6875 ignores the lowest address bit, so each chip responds to + * two addresses - 0x50/0x51 and 0x52/0x53. + * + * Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read + * address, so this driver is destructive if loaded for the wrong EEPROM chip. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/mutex.h> + +/* The MAX6875 can only read/write 16 bytes at a time */ +#define SLICE_SIZE 16 +#define SLICE_BITS 4 + +/* USER EEPROM is at addresses 0x8100 - 0x82FF */ +#define USER_EEPROM_BASE 0x8100 +#define USER_EEPROM_SIZE 0x0200 +#define USER_EEPROM_SLICES 32 + +/* MAX6875 commands */ +#define MAX6875_CMD_BLK_READ 0x84 + +/* Each client has this additional data */ +struct max6875_data { + struct i2c_client *fake_client; + struct mutex update_lock; + + u32 valid; + u8 data[USER_EEPROM_SIZE]; + unsigned long last_updated[USER_EEPROM_SLICES]; +}; + +static void max6875_update_slice(struct i2c_client *client, int slice) +{ + struct max6875_data *data = i2c_get_clientdata(client); + int i, j, addr; + u8 *buf; + + if (slice >= USER_EEPROM_SLICES) + return; + + mutex_lock(&data->update_lock); + + buf = &data->data[slice << SLICE_BITS]; + + if (!(data->valid & (1 << slice)) || + time_after(jiffies, data->last_updated[slice])) { + + dev_dbg(&client->dev, "Starting update of slice %u\n", slice); + + data->valid &= ~(1 << slice); + + addr = USER_EEPROM_BASE + (slice << SLICE_BITS); + + /* select the eeprom address */ + if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) { + dev_err(&client->dev, "address set failed\n"); + goto exit_up; + } + + if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + if (i2c_smbus_read_i2c_block_data(client, + MAX6875_CMD_BLK_READ, + SLICE_SIZE, + buf) != SLICE_SIZE) { + goto exit_up; + } + } else { + for (i = 0; i < SLICE_SIZE; i++) { + j = i2c_smbus_read_byte(client); + if (j < 0) { + goto exit_up; + } + buf[i] = j; + } + } + data->last_updated[slice] = jiffies; + data->valid |= (1 << slice); + } +exit_up: + mutex_unlock(&data->update_lock); +} + +static ssize_t max6875_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + struct max6875_data *data = i2c_get_clientdata(client); + int slice, max_slice; + + if (off > USER_EEPROM_SIZE) + return 0; + + if (off + count > USER_EEPROM_SIZE) + count = USER_EEPROM_SIZE - off; + + /* refresh slices which contain requested bytes */ + max_slice = (off + count - 1) >> SLICE_BITS; + for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++) + max6875_update_slice(client, slice); + + memcpy(buf, &data->data[off], count); + + return count; +} + +static struct bin_attribute user_eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO, + }, + .size = USER_EEPROM_SIZE, + .read = max6875_read, +}; + +static int max6875_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct max6875_data *data; + int err; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA + | I2C_FUNC_SMBUS_READ_BYTE)) + return -ENODEV; + + /* Only bind to even addresses */ + if (client->addr & 1) + return -ENODEV; + + if (!(data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL))) + return -ENOMEM; + + /* A fake client is created on the odd address */ + data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1); + if (!data->fake_client) { + err = -ENOMEM; + goto exit_kfree; + } + + /* Init real i2c_client */ + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + + err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr); + if (err) + goto exit_remove_fake; + + return 0; + +exit_remove_fake: + i2c_unregister_device(data->fake_client); +exit_kfree: + kfree(data); + return err; +} + +static int max6875_remove(struct i2c_client *client) +{ + struct max6875_data *data = i2c_get_clientdata(client); + + i2c_unregister_device(data->fake_client); + + sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr); + kfree(data); + + return 0; +} + +static const struct i2c_device_id max6875_id[] = { + { "max6875", 0 }, + { } +}; + +static struct i2c_driver max6875_driver = { + .driver = { + .name = "max6875", + }, + .probe = max6875_probe, + .remove = max6875_remove, + .id_table = max6875_id, +}; + +module_i2c_driver(max6875_driver); + +MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>"); +MODULE_DESCRIPTION("MAX6875 driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/eeprom/sunxi_sid.c b/kernel/drivers/misc/eeprom/sunxi_sid.c new file mode 100644 index 000000000..8385177ff --- /dev/null +++ b/kernel/drivers/misc/eeprom/sunxi_sid.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2013 Oliver Schinagl <oliver@schinagl.nl> + * http://www.linux-sunxi.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver exposes the Allwinner security ID, efuses exported in byte- + * sized chunks. + */ + +#include <linux/compiler.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/stat.h> +#include <linux/sysfs.h> +#include <linux/types.h> + +#define DRV_NAME "sunxi-sid" + +struct sunxi_sid_data { + void __iomem *reg_base; + unsigned int keysize; +}; + +/* We read the entire key, due to a 32 bit read alignment requirement. Since we + * want to return the requested byte, this results in somewhat slower code and + * uses 4 times more reads as needed but keeps code simpler. Since the SID is + * only very rarely probed, this is not really an issue. + */ +static u8 sunxi_sid_read_byte(const struct sunxi_sid_data *sid_data, + const unsigned int offset) +{ + u32 sid_key; + + if (offset >= sid_data->keysize) + return 0; + + sid_key = ioread32be(sid_data->reg_base + round_down(offset, 4)); + sid_key >>= (offset % 4) * 8; + + return sid_key; /* Only return the last byte */ +} + +static ssize_t sid_read(struct file *fd, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t pos, size_t size) +{ + struct platform_device *pdev; + struct sunxi_sid_data *sid_data; + int i; + + pdev = to_platform_device(kobj_to_dev(kobj)); + sid_data = platform_get_drvdata(pdev); + + if (pos < 0 || pos >= sid_data->keysize) + return 0; + if (size > sid_data->keysize - pos) + size = sid_data->keysize - pos; + + for (i = 0; i < size; i++) + buf[i] = sunxi_sid_read_byte(sid_data, pos + i); + + return i; +} + +static struct bin_attribute sid_bin_attr = { + .attr = { .name = "eeprom", .mode = S_IRUGO, }, + .read = sid_read, +}; + +static int sunxi_sid_remove(struct platform_device *pdev) +{ + device_remove_bin_file(&pdev->dev, &sid_bin_attr); + dev_dbg(&pdev->dev, "driver unloaded\n"); + + return 0; +} + +static const struct of_device_id sunxi_sid_of_match[] = { + { .compatible = "allwinner,sun4i-a10-sid", .data = (void *)16}, + { .compatible = "allwinner,sun7i-a20-sid", .data = (void *)512}, + {/* sentinel */}, +}; +MODULE_DEVICE_TABLE(of, sunxi_sid_of_match); + +static int sunxi_sid_probe(struct platform_device *pdev) +{ + struct sunxi_sid_data *sid_data; + struct resource *res; + const struct of_device_id *of_dev_id; + u8 *entropy; + unsigned int i; + + sid_data = devm_kzalloc(&pdev->dev, sizeof(struct sunxi_sid_data), + GFP_KERNEL); + if (!sid_data) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + sid_data->reg_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(sid_data->reg_base)) + return PTR_ERR(sid_data->reg_base); + + of_dev_id = of_match_device(sunxi_sid_of_match, &pdev->dev); + if (!of_dev_id) + return -ENODEV; + sid_data->keysize = (int)of_dev_id->data; + + platform_set_drvdata(pdev, sid_data); + + sid_bin_attr.size = sid_data->keysize; + if (device_create_bin_file(&pdev->dev, &sid_bin_attr)) + return -ENODEV; + + entropy = kzalloc(sizeof(u8) * sid_data->keysize, GFP_KERNEL); + for (i = 0; i < sid_data->keysize; i++) + entropy[i] = sunxi_sid_read_byte(sid_data, i); + add_device_randomness(entropy, sid_data->keysize); + kfree(entropy); + + dev_dbg(&pdev->dev, "loaded\n"); + + return 0; +} + +static struct platform_driver sunxi_sid_driver = { + .probe = sunxi_sid_probe, + .remove = sunxi_sid_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = sunxi_sid_of_match, + }, +}; +module_platform_driver(sunxi_sid_driver); + +MODULE_AUTHOR("Oliver Schinagl <oliver@schinagl.nl>"); +MODULE_DESCRIPTION("Allwinner sunxi security id driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/enclosure.c b/kernel/drivers/misc/enclosure.c new file mode 100644 index 000000000..65fed7146 --- /dev/null +++ b/kernel/drivers/misc/enclosure.c @@ -0,0 +1,691 @@ +/* + * Enclosure Services + * + * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com> + * +**----------------------------------------------------------------------------- +** +** This program is free software; you can redistribute it and/or +** modify it under the terms of the GNU General Public License +** version 2 as published by the Free Software Foundation. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +**----------------------------------------------------------------------------- +*/ +#include <linux/device.h> +#include <linux/enclosure.h> +#include <linux/err.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> + +static LIST_HEAD(container_list); +static DEFINE_MUTEX(container_list_lock); +static struct class enclosure_class; + +/** + * enclosure_find - find an enclosure given a parent device + * @dev: the parent to match against + * @start: Optional enclosure device to start from (NULL if none) + * + * Looks through the list of registered enclosures to find all those + * with @dev as a parent. Returns NULL if no enclosure is + * found. @start can be used as a starting point to obtain multiple + * enclosures per parent (should begin with NULL and then be set to + * each returned enclosure device). Obtains a reference to the + * enclosure class device which must be released with device_put(). + * If @start is not NULL, a reference must be taken on it which is + * released before returning (this allows a loop through all + * enclosures to exit with only the reference on the enclosure of + * interest held). Note that the @dev may correspond to the actual + * device housing the enclosure, in which case no iteration via @start + * is required. + */ +struct enclosure_device *enclosure_find(struct device *dev, + struct enclosure_device *start) +{ + struct enclosure_device *edev; + + mutex_lock(&container_list_lock); + edev = list_prepare_entry(start, &container_list, node); + if (start) + put_device(&start->edev); + + list_for_each_entry_continue(edev, &container_list, node) { + struct device *parent = edev->edev.parent; + /* parent might not be immediate, so iterate up to + * the root of the tree if necessary */ + while (parent) { + if (parent == dev) { + get_device(&edev->edev); + mutex_unlock(&container_list_lock); + return edev; + } + parent = parent->parent; + } + } + mutex_unlock(&container_list_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(enclosure_find); + +/** + * enclosure_for_each_device - calls a function for each enclosure + * @fn: the function to call + * @data: the data to pass to each call + * + * Loops over all the enclosures calling the function. + * + * Note, this function uses a mutex which will be held across calls to + * @fn, so it must have non atomic context, and @fn may (although it + * should not) sleep or otherwise cause the mutex to be held for + * indefinite periods + */ +int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *), + void *data) +{ + int error = 0; + struct enclosure_device *edev; + + mutex_lock(&container_list_lock); + list_for_each_entry(edev, &container_list, node) { + error = fn(edev, data); + if (error) + break; + } + mutex_unlock(&container_list_lock); + + return error; +} +EXPORT_SYMBOL_GPL(enclosure_for_each_device); + +/** + * enclosure_register - register device as an enclosure + * + * @dev: device containing the enclosure + * @components: number of components in the enclosure + * + * This sets up the device for being an enclosure. Note that @dev does + * not have to be a dedicated enclosure device. It may be some other type + * of device that additionally responds to enclosure services + */ +struct enclosure_device * +enclosure_register(struct device *dev, const char *name, int components, + struct enclosure_component_callbacks *cb) +{ + struct enclosure_device *edev = + kzalloc(sizeof(struct enclosure_device) + + sizeof(struct enclosure_component)*components, + GFP_KERNEL); + int err, i; + + BUG_ON(!cb); + + if (!edev) + return ERR_PTR(-ENOMEM); + + edev->components = components; + + edev->edev.class = &enclosure_class; + edev->edev.parent = get_device(dev); + edev->cb = cb; + dev_set_name(&edev->edev, "%s", name); + err = device_register(&edev->edev); + if (err) + goto err; + + for (i = 0; i < components; i++) { + edev->component[i].number = -1; + edev->component[i].slot = -1; + edev->component[i].power_status = 1; + } + + mutex_lock(&container_list_lock); + list_add_tail(&edev->node, &container_list); + mutex_unlock(&container_list_lock); + + return edev; + + err: + put_device(edev->edev.parent); + kfree(edev); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(enclosure_register); + +static struct enclosure_component_callbacks enclosure_null_callbacks; + +/** + * enclosure_unregister - remove an enclosure + * + * @edev: the registered enclosure to remove; + */ +void enclosure_unregister(struct enclosure_device *edev) +{ + int i; + + mutex_lock(&container_list_lock); + list_del(&edev->node); + mutex_unlock(&container_list_lock); + + for (i = 0; i < edev->components; i++) + if (edev->component[i].number != -1) + device_unregister(&edev->component[i].cdev); + + /* prevent any callbacks into service user */ + edev->cb = &enclosure_null_callbacks; + device_unregister(&edev->edev); +} +EXPORT_SYMBOL_GPL(enclosure_unregister); + +#define ENCLOSURE_NAME_SIZE 64 +#define COMPONENT_NAME_SIZE 64 + +static void enclosure_link_name(struct enclosure_component *cdev, char *name) +{ + strcpy(name, "enclosure_device:"); + strcat(name, dev_name(&cdev->cdev)); +} + +static void enclosure_remove_links(struct enclosure_component *cdev) +{ + char name[ENCLOSURE_NAME_SIZE]; + + enclosure_link_name(cdev, name); + + /* + * In odd circumstances, like multipath devices, something else may + * already have removed the links, so check for this condition first. + */ + if (cdev->dev->kobj.sd) + sysfs_remove_link(&cdev->dev->kobj, name); + + if (cdev->cdev.kobj.sd) + sysfs_remove_link(&cdev->cdev.kobj, "device"); +} + +static int enclosure_add_links(struct enclosure_component *cdev) +{ + int error; + char name[ENCLOSURE_NAME_SIZE]; + + error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device"); + if (error) + return error; + + enclosure_link_name(cdev, name); + error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name); + if (error) + sysfs_remove_link(&cdev->cdev.kobj, "device"); + + return error; +} + +static void enclosure_release(struct device *cdev) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + put_device(cdev->parent); + kfree(edev); +} + +static void enclosure_component_release(struct device *dev) +{ + struct enclosure_component *cdev = to_enclosure_component(dev); + + if (cdev->dev) { + enclosure_remove_links(cdev); + put_device(cdev->dev); + } + put_device(dev->parent); +} + +static struct enclosure_component * +enclosure_component_find_by_name(struct enclosure_device *edev, + const char *name) +{ + int i; + const char *cname; + struct enclosure_component *ecomp; + + if (!edev || !name || !name[0]) + return NULL; + + for (i = 0; i < edev->components; i++) { + ecomp = &edev->component[i]; + cname = dev_name(&ecomp->cdev); + if (ecomp->number != -1 && + cname && cname[0] && + !strcmp(cname, name)) + return ecomp; + } + + return NULL; +} + +static const struct attribute_group *enclosure_component_groups[]; + +/** + * enclosure_component_alloc - prepare a new enclosure component + * @edev: the enclosure to add the component + * @num: the device number + * @type: the type of component being added + * @name: an optional name to appear in sysfs (leave NULL if none) + * + * The name is optional for enclosures that give their components a unique + * name. If not, leave the field NULL and a name will be assigned. + * + * Returns a pointer to the enclosure component or an error. + */ +struct enclosure_component * +enclosure_component_alloc(struct enclosure_device *edev, + unsigned int number, + enum enclosure_component_type type, + const char *name) +{ + struct enclosure_component *ecomp; + struct device *cdev; + int i; + char newname[COMPONENT_NAME_SIZE]; + + if (number >= edev->components) + return ERR_PTR(-EINVAL); + + ecomp = &edev->component[number]; + + if (ecomp->number != -1) + return ERR_PTR(-EINVAL); + + ecomp->type = type; + ecomp->number = number; + cdev = &ecomp->cdev; + cdev->parent = get_device(&edev->edev); + + if (name && name[0]) { + /* Some hardware (e.g. enclosure in RX300 S6) has components + * with non unique names. Registering duplicates in sysfs + * will lead to warnings during bootup. So make the names + * unique by appending consecutive numbers -1, -2, ... */ + i = 1; + snprintf(newname, COMPONENT_NAME_SIZE, + "%s", name); + while (enclosure_component_find_by_name(edev, newname)) + snprintf(newname, COMPONENT_NAME_SIZE, + "%s-%i", name, i++); + dev_set_name(cdev, "%s", newname); + } else + dev_set_name(cdev, "%u", number); + + cdev->release = enclosure_component_release; + cdev->groups = enclosure_component_groups; + + return ecomp; +} +EXPORT_SYMBOL_GPL(enclosure_component_alloc); + +/** + * enclosure_component_register - publishes an initialized enclosure component + * @ecomp: component to add + * + * Returns 0 on successful registration, releases the component otherwise + */ +int enclosure_component_register(struct enclosure_component *ecomp) +{ + struct device *cdev; + int err; + + cdev = &ecomp->cdev; + err = device_register(cdev); + if (err) { + ecomp->number = -1; + put_device(cdev); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(enclosure_component_register); + +/** + * enclosure_add_device - add a device as being part of an enclosure + * @edev: the enclosure device being added to. + * @num: the number of the component + * @dev: the device being added + * + * Declares a real device to reside in slot (or identifier) @num of an + * enclosure. This will cause the relevant sysfs links to appear. + * This function may also be used to change a device associated with + * an enclosure without having to call enclosure_remove_device() in + * between. + * + * Returns zero on success or an error. + */ +int enclosure_add_device(struct enclosure_device *edev, int component, + struct device *dev) +{ + struct enclosure_component *cdev; + + if (!edev || component >= edev->components) + return -EINVAL; + + cdev = &edev->component[component]; + + if (cdev->dev == dev) + return -EEXIST; + + if (cdev->dev) + enclosure_remove_links(cdev); + + put_device(cdev->dev); + cdev->dev = get_device(dev); + return enclosure_add_links(cdev); +} +EXPORT_SYMBOL_GPL(enclosure_add_device); + +/** + * enclosure_remove_device - remove a device from an enclosure + * @edev: the enclosure device + * @num: the number of the component to remove + * + * Returns zero on success or an error. + * + */ +int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) +{ + struct enclosure_component *cdev; + int i; + + if (!edev || !dev) + return -EINVAL; + + for (i = 0; i < edev->components; i++) { + cdev = &edev->component[i]; + if (cdev->dev == dev) { + enclosure_remove_links(cdev); + device_del(&cdev->cdev); + put_device(dev); + cdev->dev = NULL; + return device_add(&cdev->cdev); + } + } + return -ENODEV; +} +EXPORT_SYMBOL_GPL(enclosure_remove_device); + +/* + * sysfs pieces below + */ + +static ssize_t components_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + return snprintf(buf, 40, "%d\n", edev->components); +} +static DEVICE_ATTR_RO(components); + +static ssize_t id_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + if (edev->cb->show_id) + return edev->cb->show_id(edev, buf); + return -EINVAL; +} +static DEVICE_ATTR_RO(id); + +static struct attribute *enclosure_class_attrs[] = { + &dev_attr_components.attr, + &dev_attr_id.attr, + NULL, +}; +ATTRIBUTE_GROUPS(enclosure_class); + +static struct class enclosure_class = { + .name = "enclosure", + .owner = THIS_MODULE, + .dev_release = enclosure_release, + .dev_groups = enclosure_class_groups, +}; + +static const char *const enclosure_status [] = { + [ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported", + [ENCLOSURE_STATUS_OK] = "OK", + [ENCLOSURE_STATUS_CRITICAL] = "critical", + [ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical", + [ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable", + [ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed", + [ENCLOSURE_STATUS_UNKNOWN] = "unknown", + [ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable", + [ENCLOSURE_STATUS_MAX] = NULL, +}; + +static const char *const enclosure_type [] = { + [ENCLOSURE_COMPONENT_DEVICE] = "device", + [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device", +}; + +static ssize_t get_component_fault(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_fault) + edev->cb->get_fault(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->fault); +} + +static ssize_t set_component_fault(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_fault) + edev->cb->set_fault(edev, ecomp, val); + return count; +} + +static ssize_t get_component_status(struct device *cdev, + struct device_attribute *attr,char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_status) + edev->cb->get_status(edev, ecomp); + return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]); +} + +static ssize_t set_component_status(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int i; + + for (i = 0; enclosure_status[i]; i++) { + if (strncmp(buf, enclosure_status[i], + strlen(enclosure_status[i])) == 0 && + (buf[strlen(enclosure_status[i])] == '\n' || + buf[strlen(enclosure_status[i])] == '\0')) + break; + } + + if (enclosure_status[i] && edev->cb->set_status) { + edev->cb->set_status(edev, ecomp, i); + return count; + } else + return -EINVAL; +} + +static ssize_t get_component_active(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_active) + edev->cb->get_active(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->active); +} + +static ssize_t set_component_active(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_active) + edev->cb->set_active(edev, ecomp, val); + return count; +} + +static ssize_t get_component_locate(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_locate) + edev->cb->get_locate(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->locate); +} + +static ssize_t set_component_locate(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_locate) + edev->cb->set_locate(edev, ecomp, val); + return count; +} + +static ssize_t get_component_power_status(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_power_status) + edev->cb->get_power_status(edev, ecomp); + return snprintf(buf, 40, "%s\n", ecomp->power_status ? "on" : "off"); +} + +static ssize_t set_component_power_status(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val; + + if (strncmp(buf, "on", 2) == 0 && + (buf[2] == '\n' || buf[2] == '\0')) + val = 1; + else if (strncmp(buf, "off", 3) == 0 && + (buf[3] == '\n' || buf[3] == '\0')) + val = 0; + else + return -EINVAL; + + if (edev->cb->set_power_status) + edev->cb->set_power_status(edev, ecomp, val); + return count; +} + +static ssize_t get_component_type(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]); +} + +static ssize_t get_component_slot(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int slot; + + /* if the enclosure does not override then use 'number' as a stand-in */ + if (ecomp->slot >= 0) + slot = ecomp->slot; + else + slot = ecomp->number; + + return snprintf(buf, 40, "%d\n", slot); +} + +static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault, + set_component_fault); +static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status, + set_component_status); +static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active, + set_component_active); +static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate, + set_component_locate); +static DEVICE_ATTR(power_status, S_IRUGO | S_IWUSR, get_component_power_status, + set_component_power_status); +static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL); +static DEVICE_ATTR(slot, S_IRUGO, get_component_slot, NULL); + +static struct attribute *enclosure_component_attrs[] = { + &dev_attr_fault.attr, + &dev_attr_status.attr, + &dev_attr_active.attr, + &dev_attr_locate.attr, + &dev_attr_power_status.attr, + &dev_attr_type.attr, + &dev_attr_slot.attr, + NULL +}; +ATTRIBUTE_GROUPS(enclosure_component); + +static int __init enclosure_init(void) +{ + int err; + + err = class_register(&enclosure_class); + if (err) + return err; + + return 0; +} + +static void __exit enclosure_exit(void) +{ + class_unregister(&enclosure_class); +} + +module_init(enclosure_init); +module_exit(enclosure_exit); + +MODULE_AUTHOR("James Bottomley"); +MODULE_DESCRIPTION("Enclosure Services"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/fsa9480.c b/kernel/drivers/misc/fsa9480.c new file mode 100644 index 000000000..71d2793b3 --- /dev/null +++ b/kernel/drivers/misc/fsa9480.c @@ -0,0 +1,549 @@ +/* + * fsa9480.c - FSA9480 micro USB switch device driver + * + * Copyright (C) 2010 Samsung Electronics + * Minkyu Kang <mk7.kang@samsung.com> + * Wonguk Jeong <wonguk.jeong@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/platform_data/fsa9480.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/pm_runtime.h> + +/* FSA9480 I2C registers */ +#define FSA9480_REG_DEVID 0x01 +#define FSA9480_REG_CTRL 0x02 +#define FSA9480_REG_INT1 0x03 +#define FSA9480_REG_INT2 0x04 +#define FSA9480_REG_INT1_MASK 0x05 +#define FSA9480_REG_INT2_MASK 0x06 +#define FSA9480_REG_ADC 0x07 +#define FSA9480_REG_TIMING1 0x08 +#define FSA9480_REG_TIMING2 0x09 +#define FSA9480_REG_DEV_T1 0x0a +#define FSA9480_REG_DEV_T2 0x0b +#define FSA9480_REG_BTN1 0x0c +#define FSA9480_REG_BTN2 0x0d +#define FSA9480_REG_CK 0x0e +#define FSA9480_REG_CK_INT1 0x0f +#define FSA9480_REG_CK_INT2 0x10 +#define FSA9480_REG_CK_INTMASK1 0x11 +#define FSA9480_REG_CK_INTMASK2 0x12 +#define FSA9480_REG_MANSW1 0x13 +#define FSA9480_REG_MANSW2 0x14 + +/* Control */ +#define CON_SWITCH_OPEN (1 << 4) +#define CON_RAW_DATA (1 << 3) +#define CON_MANUAL_SW (1 << 2) +#define CON_WAIT (1 << 1) +#define CON_INT_MASK (1 << 0) +#define CON_MASK (CON_SWITCH_OPEN | CON_RAW_DATA | \ + CON_MANUAL_SW | CON_WAIT) + +/* Device Type 1 */ +#define DEV_USB_OTG (1 << 7) +#define DEV_DEDICATED_CHG (1 << 6) +#define DEV_USB_CHG (1 << 5) +#define DEV_CAR_KIT (1 << 4) +#define DEV_UART (1 << 3) +#define DEV_USB (1 << 2) +#define DEV_AUDIO_2 (1 << 1) +#define DEV_AUDIO_1 (1 << 0) + +#define DEV_T1_USB_MASK (DEV_USB_OTG | DEV_USB) +#define DEV_T1_UART_MASK (DEV_UART) +#define DEV_T1_CHARGER_MASK (DEV_DEDICATED_CHG | DEV_USB_CHG) + +/* Device Type 2 */ +#define DEV_AV (1 << 6) +#define DEV_TTY (1 << 5) +#define DEV_PPD (1 << 4) +#define DEV_JIG_UART_OFF (1 << 3) +#define DEV_JIG_UART_ON (1 << 2) +#define DEV_JIG_USB_OFF (1 << 1) +#define DEV_JIG_USB_ON (1 << 0) + +#define DEV_T2_USB_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON) +#define DEV_T2_UART_MASK (DEV_JIG_UART_OFF | DEV_JIG_UART_ON) +#define DEV_T2_JIG_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON | \ + DEV_JIG_UART_OFF | DEV_JIG_UART_ON) + +/* + * Manual Switch + * D- [7:5] / D+ [4:2] + * 000: Open all / 001: USB / 010: AUDIO / 011: UART / 100: V_AUDIO + */ +#define SW_VAUDIO ((4 << 5) | (4 << 2)) +#define SW_UART ((3 << 5) | (3 << 2)) +#define SW_AUDIO ((2 << 5) | (2 << 2)) +#define SW_DHOST ((1 << 5) | (1 << 2)) +#define SW_AUTO ((0 << 5) | (0 << 2)) + +/* Interrupt 1 */ +#define INT_DETACH (1 << 1) +#define INT_ATTACH (1 << 0) + +struct fsa9480_usbsw { + struct i2c_client *client; + struct fsa9480_platform_data *pdata; + int dev1; + int dev2; + int mansw; +}; + +static struct fsa9480_usbsw *chip; + +static int fsa9480_write_reg(struct i2c_client *client, + int reg, int value) +{ + int ret; + + ret = i2c_smbus_write_byte_data(client, reg, value); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int fsa9480_read_reg(struct i2c_client *client, int reg) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int fsa9480_read_irq(struct i2c_client *client, int *value) +{ + int ret; + + ret = i2c_smbus_read_i2c_block_data(client, + FSA9480_REG_INT1, 2, (u8 *)value); + *value &= 0xffff; + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static void fsa9480_set_switch(const char *buf) +{ + struct fsa9480_usbsw *usbsw = chip; + struct i2c_client *client = usbsw->client; + unsigned int value; + unsigned int path = 0; + + value = fsa9480_read_reg(client, FSA9480_REG_CTRL); + + if (!strncmp(buf, "VAUDIO", 6)) { + path = SW_VAUDIO; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "UART", 4)) { + path = SW_UART; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "AUDIO", 5)) { + path = SW_AUDIO; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "DHOST", 5)) { + path = SW_DHOST; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "AUTO", 4)) { + path = SW_AUTO; + value |= CON_MANUAL_SW; + } else { + printk(KERN_ERR "Wrong command\n"); + return; + } + + usbsw->mansw = path; + fsa9480_write_reg(client, FSA9480_REG_MANSW1, path); + fsa9480_write_reg(client, FSA9480_REG_CTRL, value); +} + +static ssize_t fsa9480_get_switch(char *buf) +{ + struct fsa9480_usbsw *usbsw = chip; + struct i2c_client *client = usbsw->client; + unsigned int value; + + value = fsa9480_read_reg(client, FSA9480_REG_MANSW1); + + if (value == SW_VAUDIO) + return sprintf(buf, "VAUDIO\n"); + else if (value == SW_UART) + return sprintf(buf, "UART\n"); + else if (value == SW_AUDIO) + return sprintf(buf, "AUDIO\n"); + else if (value == SW_DHOST) + return sprintf(buf, "DHOST\n"); + else if (value == SW_AUTO) + return sprintf(buf, "AUTO\n"); + else + return sprintf(buf, "%x", value); +} + +static ssize_t fsa9480_show_device(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct fsa9480_usbsw *usbsw = dev_get_drvdata(dev); + struct i2c_client *client = usbsw->client; + int dev1, dev2; + + dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + + if (!dev1 && !dev2) + return sprintf(buf, "NONE\n"); + + /* USB */ + if (dev1 & DEV_T1_USB_MASK || dev2 & DEV_T2_USB_MASK) + return sprintf(buf, "USB\n"); + + /* UART */ + if (dev1 & DEV_T1_UART_MASK || dev2 & DEV_T2_UART_MASK) + return sprintf(buf, "UART\n"); + + /* CHARGER */ + if (dev1 & DEV_T1_CHARGER_MASK) + return sprintf(buf, "CHARGER\n"); + + /* JIG */ + if (dev2 & DEV_T2_JIG_MASK) + return sprintf(buf, "JIG\n"); + + return sprintf(buf, "UNKNOWN\n"); +} + +static ssize_t fsa9480_show_manualsw(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return fsa9480_get_switch(buf); + +} + +static ssize_t fsa9480_set_manualsw(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + fsa9480_set_switch(buf); + + return count; +} + +static DEVICE_ATTR(device, S_IRUGO, fsa9480_show_device, NULL); +static DEVICE_ATTR(switch, S_IRUGO | S_IWUSR, + fsa9480_show_manualsw, fsa9480_set_manualsw); + +static struct attribute *fsa9480_attributes[] = { + &dev_attr_device.attr, + &dev_attr_switch.attr, + NULL +}; + +static const struct attribute_group fsa9480_group = { + .attrs = fsa9480_attributes, +}; + +static void fsa9480_detect_dev(struct fsa9480_usbsw *usbsw, int intr) +{ + int val1, val2, ctrl; + struct fsa9480_platform_data *pdata = usbsw->pdata; + struct i2c_client *client = usbsw->client; + + val1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + val2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + ctrl = fsa9480_read_reg(client, FSA9480_REG_CTRL); + + dev_info(&client->dev, "intr: 0x%x, dev1: 0x%x, dev2: 0x%x\n", + intr, val1, val2); + + if (!intr) + goto out; + + if (intr & INT_ATTACH) { /* Attached */ + /* USB */ + if (val1 & DEV_T1_USB_MASK || val2 & DEV_T2_USB_MASK) { + if (pdata->usb_cb) + pdata->usb_cb(FSA9480_ATTACHED); + + if (usbsw->mansw) { + fsa9480_write_reg(client, + FSA9480_REG_MANSW1, usbsw->mansw); + } + } + + /* UART */ + if (val1 & DEV_T1_UART_MASK || val2 & DEV_T2_UART_MASK) { + if (pdata->uart_cb) + pdata->uart_cb(FSA9480_ATTACHED); + + if (!(ctrl & CON_MANUAL_SW)) { + fsa9480_write_reg(client, + FSA9480_REG_MANSW1, SW_UART); + } + } + + /* CHARGER */ + if (val1 & DEV_T1_CHARGER_MASK) { + if (pdata->charger_cb) + pdata->charger_cb(FSA9480_ATTACHED); + } + + /* JIG */ + if (val2 & DEV_T2_JIG_MASK) { + if (pdata->jig_cb) + pdata->jig_cb(FSA9480_ATTACHED); + } + } else if (intr & INT_DETACH) { /* Detached */ + /* USB */ + if (usbsw->dev1 & DEV_T1_USB_MASK || + usbsw->dev2 & DEV_T2_USB_MASK) { + if (pdata->usb_cb) + pdata->usb_cb(FSA9480_DETACHED); + } + + /* UART */ + if (usbsw->dev1 & DEV_T1_UART_MASK || + usbsw->dev2 & DEV_T2_UART_MASK) { + if (pdata->uart_cb) + pdata->uart_cb(FSA9480_DETACHED); + } + + /* CHARGER */ + if (usbsw->dev1 & DEV_T1_CHARGER_MASK) { + if (pdata->charger_cb) + pdata->charger_cb(FSA9480_DETACHED); + } + + /* JIG */ + if (usbsw->dev2 & DEV_T2_JIG_MASK) { + if (pdata->jig_cb) + pdata->jig_cb(FSA9480_DETACHED); + } + } + + usbsw->dev1 = val1; + usbsw->dev2 = val2; + +out: + ctrl &= ~CON_INT_MASK; + fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl); +} + +static irqreturn_t fsa9480_irq_handler(int irq, void *data) +{ + struct fsa9480_usbsw *usbsw = data; + struct i2c_client *client = usbsw->client; + int intr; + + /* clear interrupt */ + fsa9480_read_irq(client, &intr); + + /* device detection */ + fsa9480_detect_dev(usbsw, intr); + + return IRQ_HANDLED; +} + +static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw) +{ + struct fsa9480_platform_data *pdata = usbsw->pdata; + struct i2c_client *client = usbsw->client; + int ret; + int intr; + unsigned int ctrl = CON_MASK; + + /* clear interrupt */ + fsa9480_read_irq(client, &intr); + + /* unmask interrupt (attach/detach only) */ + fsa9480_write_reg(client, FSA9480_REG_INT1_MASK, 0xfc); + fsa9480_write_reg(client, FSA9480_REG_INT2_MASK, 0x1f); + + usbsw->mansw = fsa9480_read_reg(client, FSA9480_REG_MANSW1); + + if (usbsw->mansw) + ctrl &= ~CON_MANUAL_SW; /* Manual Switching Mode */ + + fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl); + + if (pdata && pdata->cfg_gpio) + pdata->cfg_gpio(); + + if (client->irq) { + ret = request_threaded_irq(client->irq, NULL, + fsa9480_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "fsa9480 micro USB", usbsw); + if (ret) { + dev_err(&client->dev, "failed to request IRQ\n"); + return ret; + } + + if (pdata) + device_init_wakeup(&client->dev, pdata->wakeup); + } + + return 0; +} + +static int fsa9480_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct fsa9480_usbsw *usbsw; + int ret = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -EIO; + + usbsw = kzalloc(sizeof(struct fsa9480_usbsw), GFP_KERNEL); + if (!usbsw) { + dev_err(&client->dev, "failed to allocate driver data\n"); + return -ENOMEM; + } + + usbsw->client = client; + usbsw->pdata = client->dev.platform_data; + + chip = usbsw; + + i2c_set_clientdata(client, usbsw); + + ret = fsa9480_irq_init(usbsw); + if (ret) + goto fail1; + + ret = sysfs_create_group(&client->dev.kobj, &fsa9480_group); + if (ret) { + dev_err(&client->dev, + "failed to create fsa9480 attribute group\n"); + goto fail2; + } + + /* ADC Detect Time: 500ms */ + fsa9480_write_reg(client, FSA9480_REG_TIMING1, 0x6); + + if (chip->pdata->reset_cb) + chip->pdata->reset_cb(); + + /* device detection */ + fsa9480_detect_dev(usbsw, INT_ATTACH); + + pm_runtime_set_active(&client->dev); + + return 0; + +fail2: + if (client->irq) + free_irq(client->irq, usbsw); +fail1: + kfree(usbsw); + return ret; +} + +static int fsa9480_remove(struct i2c_client *client) +{ + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + if (client->irq) + free_irq(client->irq, usbsw); + + sysfs_remove_group(&client->dev.kobj, &fsa9480_group); + device_init_wakeup(&client->dev, 0); + kfree(usbsw); + return 0; +} + +#ifdef CONFIG_PM_SLEEP + +static int fsa9480_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + struct fsa9480_platform_data *pdata = usbsw->pdata; + + if (device_may_wakeup(&client->dev) && client->irq) + enable_irq_wake(client->irq); + + if (pdata->usb_power) + pdata->usb_power(0); + + return 0; +} + +static int fsa9480_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + int dev1, dev2; + + if (device_may_wakeup(&client->dev) && client->irq) + disable_irq_wake(client->irq); + + /* + * Clear Pending interrupt. Note that detect_dev does what + * the interrupt handler does. So, we don't miss pending and + * we reenable interrupt if there is one. + */ + fsa9480_read_reg(client, FSA9480_REG_INT1); + fsa9480_read_reg(client, FSA9480_REG_INT2); + + dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + + /* device detection */ + fsa9480_detect_dev(usbsw, (dev1 || dev2) ? INT_ATTACH : INT_DETACH); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(fsa9480_pm_ops, fsa9480_suspend, fsa9480_resume); +#define FSA9480_PM_OPS (&fsa9480_pm_ops) + +#else + +#define FSA9480_PM_OPS NULL + +#endif /* CONFIG_PM_SLEEP */ + +static const struct i2c_device_id fsa9480_id[] = { + {"fsa9480", 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, fsa9480_id); + +static struct i2c_driver fsa9480_i2c_driver = { + .driver = { + .name = "fsa9480", + .pm = FSA9480_PM_OPS, + }, + .probe = fsa9480_probe, + .remove = fsa9480_remove, + .id_table = fsa9480_id, +}; + +module_i2c_driver(fsa9480_i2c_driver); + +MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>"); +MODULE_DESCRIPTION("FSA9480 USB Switch driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/genwqe/Kconfig b/kernel/drivers/misc/genwqe/Kconfig new file mode 100644 index 000000000..4c0a033cb --- /dev/null +++ b/kernel/drivers/misc/genwqe/Kconfig @@ -0,0 +1,19 @@ +# +# IBM Accelerator Family 'GenWQE' +# + +menuconfig GENWQE + tristate "GenWQE PCIe Accelerator" + depends on PCI && 64BIT + select CRC_ITU_T + default n + help + Enables PCIe card driver for IBM GenWQE accelerators. + The user-space interface is described in + include/linux/genwqe/genwqe_card.h. + +config GENWQE_PLATFORM_ERROR_RECOVERY + int "Use platform recovery procedures (0=off, 1=on)" + depends on GENWQE + default 1 if PPC64 + default 0 diff --git a/kernel/drivers/misc/genwqe/Makefile b/kernel/drivers/misc/genwqe/Makefile new file mode 100644 index 000000000..98a2b4f0b --- /dev/null +++ b/kernel/drivers/misc/genwqe/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for GenWQE driver +# + +obj-$(CONFIG_GENWQE) := genwqe_card.o +genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \ + card_debugfs.o card_utils.o diff --git a/kernel/drivers/misc/genwqe/card_base.c b/kernel/drivers/misc/genwqe/card_base.c new file mode 100644 index 000000000..4cf8f82cf --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_base.c @@ -0,0 +1,1402 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Module initialization and PCIe setup. Card health monitoring and + * recovery functionality. Character device creation and deletion are + * controlled from here. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/err.h> +#include <linux/aer.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/device.h> +#include <linux/log2.h> + +#include "card_base.h" +#include "card_ddcb.h" + +MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>"); +MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>"); +MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>"); +MODULE_AUTHOR("Michael Jung <mijung@gmx.net>"); + +MODULE_DESCRIPTION("GenWQE Card"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL"); + +static char genwqe_driver_name[] = GENWQE_DEVNAME; +static struct class *class_genwqe; +static struct dentry *debugfs_genwqe; +static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX]; + +/* PCI structure for identifying device by PCI vendor and device ID */ +static const struct pci_device_id genwqe_device_table[] = { + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, + .class = (PCI_CLASSCODE_GENWQE5 << 8), + .class_mask = ~0, + .driver_data = 0 }, + + /* Initial SR-IOV bring-up image */ + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ + .device = 0x0000, /* VF Device ID */ + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + /* Fixed up image */ + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ + .device = 0x0000, /* VF Device ID */ + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + /* Even one more ... */ + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW, + .class = (PCI_CLASSCODE_GENWQE5 << 8), + .class_mask = ~0, + .driver_data = 0 }, + + { 0, } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, genwqe_device_table); + +/** + * genwqe_dev_alloc() - Create and prepare a new card descriptor + * + * Return: Pointer to card descriptor, or ERR_PTR(err) on error + */ +static struct genwqe_dev *genwqe_dev_alloc(void) +{ + unsigned int i = 0, j; + struct genwqe_dev *cd; + + for (i = 0; i < GENWQE_CARD_NO_MAX; i++) { + if (genwqe_devices[i] == NULL) + break; + } + if (i >= GENWQE_CARD_NO_MAX) + return ERR_PTR(-ENODEV); + + cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL); + if (!cd) + return ERR_PTR(-ENOMEM); + + cd->card_idx = i; + cd->class_genwqe = class_genwqe; + cd->debugfs_genwqe = debugfs_genwqe; + + /* + * This comes from kernel config option and can be overritten via + * debugfs. + */ + cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY; + + init_waitqueue_head(&cd->queue_waitq); + + spin_lock_init(&cd->file_lock); + INIT_LIST_HEAD(&cd->file_list); + + cd->card_state = GENWQE_CARD_UNUSED; + spin_lock_init(&cd->print_lock); + + cd->ddcb_software_timeout = genwqe_ddcb_software_timeout; + cd->kill_timeout = genwqe_kill_timeout; + + for (j = 0; j < GENWQE_MAX_VFS; j++) + cd->vf_jobtimeout_msec[j] = genwqe_vf_jobtimeout_msec; + + genwqe_devices[i] = cd; + return cd; +} + +static void genwqe_dev_free(struct genwqe_dev *cd) +{ + if (!cd) + return; + + genwqe_devices[cd->card_idx] = NULL; + kfree(cd); +} + +/** + * genwqe_bus_reset() - Card recovery + * + * pci_reset_function() will recover the device and ensure that the + * registers are accessible again when it completes with success. If + * not, the card will stay dead and registers will be unaccessible + * still. + */ +static int genwqe_bus_reset(struct genwqe_dev *cd) +{ + int bars, rc = 0; + struct pci_dev *pci_dev = cd->pci_dev; + void __iomem *mmio; + + if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE) + return -EIO; + + mmio = cd->mmio; + cd->mmio = NULL; + pci_iounmap(pci_dev, mmio); + + bars = pci_select_bars(pci_dev, IORESOURCE_MEM); + pci_release_selected_regions(pci_dev, bars); + + /* + * Firmware/BIOS might change memory mapping during bus reset. + * Settings like enable bus-mastering, ... are backuped and + * restored by the pci_reset_function(). + */ + dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__); + rc = pci_reset_function(pci_dev); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: failed reset func (rc %d)\n", __func__, rc); + return rc; + } + dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc); + + /* + * Here is the right spot to clear the register read + * failure. pci_bus_reset() does this job in real systems. + */ + cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | + GENWQE_INJECT_GFIR_FATAL | + GENWQE_INJECT_GFIR_INFO); + + rc = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: request bars failed (%d)\n", __func__, rc); + return -EIO; + } + + cd->mmio = pci_iomap(pci_dev, 0, 0); + if (cd->mmio == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: mapping BAR0 failed\n", __func__); + return -ENOMEM; + } + return 0; +} + +/* + * Hardware circumvention section. Certain bitstreams in our test-lab + * had different kinds of problems. Here is where we adjust those + * bitstreams to function will with this version of our device driver. + * + * Thise circumventions are applied to the physical function only. + * The magical numbers below are identifying development/manufacturing + * versions of the bitstream used on the card. + * + * Turn off error reporting for old/manufacturing images. + */ + +bool genwqe_need_err_masking(struct genwqe_dev *cd) +{ + return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; +} + +static void genwqe_tweak_hardware(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + /* Mask FIRs for development images */ + if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) && + ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) { + dev_warn(&pci_dev->dev, + "FIRs masked due to bitstream %016llx.%016llx\n", + cd->slu_unitcfg, cd->app_unitcfg); + + __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR, + 0xFFFFFFFFFFFFFFFFull); + + __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK, + 0x0000000000000000ull); + } +} + +/** + * genwqe_recovery_on_fatal_gfir_required() - Version depended actions + * + * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must + * be ignored. This is e.g. true for the bitstream we gave to the card + * manufacturer, but also for some old bitstreams we released to our + * test-lab. + */ +int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd) +{ + return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull; +} + +int genwqe_flash_readback_fails(struct genwqe_dev *cd) +{ + return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; +} + +/** + * genwqe_T_psec() - Calculate PF/VF timeout register content + * + * Note: From a design perspective it turned out to be a bad idea to + * use codes here to specifiy the frequency/speed values. An old + * driver cannot understand new codes and is therefore always a + * problem. Better is to measure out the value or put the + * speed/frequency directly into a register which is always a valid + * value for old as well as for new software. + */ +/* T = 1/f */ +static int genwqe_T_psec(struct genwqe_dev *cd) +{ + u16 speed; /* 1/f -> 250, 200, 166, 175 */ + static const int T[] = { 4000, 5000, 6000, 5714 }; + + speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); + if (speed >= ARRAY_SIZE(T)) + return -1; /* illegal value */ + + return T[speed]; +} + +/** + * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution + * + * Do this _after_ card_reset() is called. Otherwise the values will + * vanish. The settings need to be done when the queues are inactive. + * + * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16. + * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16. + */ +static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd) +{ + u32 T = genwqe_T_psec(cd); + u64 x; + + if (genwqe_pf_jobtimeout_msec == 0) + return false; + + /* PF: large value needed, flash update 2sec per block */ + x = ilog2(genwqe_pf_jobtimeout_msec * + 16000000000uL/(T * 15)) - 10; + + genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, + 0xff00 | (x & 0xff), 0); + return true; +} + +/** + * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution + */ +static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + unsigned int vf; + u32 T = genwqe_T_psec(cd); + u64 x; + int totalvfs; + + totalvfs = pci_sriov_get_totalvfs(pci_dev); + if (totalvfs <= 0) + return false; + + for (vf = 0; vf < totalvfs; vf++) { + + if (cd->vf_jobtimeout_msec[vf] == 0) + continue; + + x = ilog2(cd->vf_jobtimeout_msec[vf] * + 16000000000uL/(T * 15)) - 10; + + genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, + 0xff00 | (x & 0xff), vf + 1); + } + return true; +} + +static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd) +{ + unsigned int type, e = 0; + + for (type = 0; type < GENWQE_DBG_UNITS; type++) { + switch (type) { + case GENWQE_DBG_UNIT0: + e = genwqe_ffdc_buff_size(cd, 0); + break; + case GENWQE_DBG_UNIT1: + e = genwqe_ffdc_buff_size(cd, 1); + break; + case GENWQE_DBG_UNIT2: + e = genwqe_ffdc_buff_size(cd, 2); + break; + case GENWQE_DBG_REGS: + e = GENWQE_FFDC_REGS; + break; + } + + /* currently support only the debug units mentioned here */ + cd->ffdc[type].entries = e; + cd->ffdc[type].regs = + kmalloc_array(e, sizeof(struct genwqe_reg), + GFP_KERNEL); + /* + * regs == NULL is ok, the using code treats this as no regs, + * Printing warning is ok in this case. + */ + } + return 0; +} + +static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd) +{ + unsigned int type; + + for (type = 0; type < GENWQE_DBG_UNITS; type++) { + kfree(cd->ffdc[type].regs); + cd->ffdc[type].regs = NULL; + } +} + +static int genwqe_read_ids(struct genwqe_dev *cd) +{ + int err = 0; + int slu_id; + struct pci_dev *pci_dev = cd->pci_dev; + + cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); + if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "err: SLUID=%016llx\n", cd->slu_unitcfg); + err = -EIO; + goto out_err; + } + + slu_id = genwqe_get_slu_id(cd); + if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) { + dev_err(&pci_dev->dev, + "err: incompatible SLU Architecture %u\n", slu_id); + err = -ENOENT; + goto out_err; + } + + cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); + if (cd->app_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "err: APPID=%016llx\n", cd->app_unitcfg); + err = -EIO; + goto out_err; + } + genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name)); + + /* + * Is access to all registers possible? If we are a VF the + * answer is obvious. If we run fully virtualized, we need to + * check if we can access all registers. If we do not have + * full access we will cause an UR and some informational FIRs + * in the PF, but that should not harm. + */ + if (pci_dev->is_virtfn) + cd->is_privileged = 0; + else + cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) + != IO_ILLEGAL_VALUE); + + out_err: + return err; +} + +static int genwqe_start(struct genwqe_dev *cd) +{ + int err; + struct pci_dev *pci_dev = cd->pci_dev; + + err = genwqe_read_ids(cd); + if (err) + return err; + + if (genwqe_is_privileged(cd)) { + /* do this after the tweaks. alloc fail is acceptable */ + genwqe_ffdc_buffs_alloc(cd); + genwqe_stop_traps(cd); + + /* Collect registers e.g. FIRs, UNITIDs, traces ... */ + genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs, + cd->ffdc[GENWQE_DBG_REGS].entries, 0); + + genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0, + cd->ffdc[GENWQE_DBG_UNIT0].regs, + cd->ffdc[GENWQE_DBG_UNIT0].entries); + + genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1, + cd->ffdc[GENWQE_DBG_UNIT1].regs, + cd->ffdc[GENWQE_DBG_UNIT1].entries); + + genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2, + cd->ffdc[GENWQE_DBG_UNIT2].regs, + cd->ffdc[GENWQE_DBG_UNIT2].entries); + + genwqe_start_traps(cd); + + if (cd->card_state == GENWQE_CARD_FATAL_ERROR) { + dev_warn(&pci_dev->dev, + "[%s] chip reload/recovery!\n", __func__); + + /* + * Stealth Mode: Reload chip on either hot + * reset or PERST. + */ + cd->softreset = 0x7Cull; + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, + cd->softreset); + + err = genwqe_bus_reset(cd); + if (err != 0) { + dev_err(&pci_dev->dev, + "[%s] err: bus reset failed!\n", + __func__); + goto out; + } + + /* + * Re-read the IDs because + * it could happen that the bitstream load + * failed! + */ + err = genwqe_read_ids(cd); + if (err) + goto out; + } + } + + err = genwqe_setup_service_layer(cd); /* does a reset to the card */ + if (err != 0) { + dev_err(&pci_dev->dev, + "[%s] err: could not setup servicelayer!\n", __func__); + err = -ENODEV; + goto out; + } + + if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */ + genwqe_tweak_hardware(cd); + + genwqe_setup_pf_jtimer(cd); + genwqe_setup_vf_jtimer(cd); + } + + err = genwqe_device_create(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: chdev init failed! (err=%d)\n", err); + goto out_release_service_layer; + } + return 0; + + out_release_service_layer: + genwqe_release_service_layer(cd); + out: + if (genwqe_is_privileged(cd)) + genwqe_ffdc_buffs_free(cd); + return -EIO; +} + +/** + * genwqe_stop() - Stop card operation + * + * Recovery notes: + * As long as genwqe_thread runs we might access registers during + * error data capture. Same is with the genwqe_health_thread. + * When genwqe_bus_reset() fails this function might called two times: + * first by the genwqe_health_thread() and later by genwqe_remove() to + * unbind the device. We must be able to survive that. + * + * This function must be robust enough to be called twice. + */ +static int genwqe_stop(struct genwqe_dev *cd) +{ + genwqe_finish_queue(cd); /* no register access */ + genwqe_device_remove(cd); /* device removed, procs killed */ + genwqe_release_service_layer(cd); /* here genwqe_thread is stopped */ + + if (genwqe_is_privileged(cd)) { + pci_disable_sriov(cd->pci_dev); /* access pci config space */ + genwqe_ffdc_buffs_free(cd); + } + + return 0; +} + +/** + * genwqe_recover_card() - Try to recover the card if it is possible + * + * If fatal_err is set no register access is possible anymore. It is + * likely that genwqe_start fails in that situation. Proper error + * handling is required in this case. + * + * genwqe_bus_reset() will cause the pci code to call genwqe_remove() + * and later genwqe_probe() for all virtual functions. + */ +static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + genwqe_stop(cd); + + /* + * Make sure chip is not reloaded to maintain FFDC. Write SLU + * Reset Register, CPLDReset field to 0. + */ + if (!fatal_err) { + cd->softreset = 0x70ull; + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); + } + + rc = genwqe_bus_reset(cd); + if (rc != 0) { + dev_err(&pci_dev->dev, + "[%s] err: card recovery impossible!\n", __func__); + return rc; + } + + rc = genwqe_start(cd); + if (rc < 0) { + dev_err(&pci_dev->dev, + "[%s] err: failed to launch device!\n", __func__); + return rc; + } + return 0; +} + +static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir) +{ + *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + return (*gfir & GFIR_ERR_TRIGGER) && + genwqe_recovery_on_fatal_gfir_required(cd); +} + +/** + * genwqe_fir_checking() - Check the fault isolation registers of the card + * + * If this code works ok, can be tried out with help of the genwqe_poke tool: + * sudo ./tools/genwqe_poke 0x8 0xfefefefefef + * + * Now the relevant FIRs/sFIRs should be printed out and the driver should + * invoke recovery (devices are removed and readded). + */ +static u64 genwqe_fir_checking(struct genwqe_dev *cd) +{ + int j, iterations = 0; + u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec; + u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr; + struct pci_dev *pci_dev = cd->pci_dev; + + healthMonitor: + iterations++; + if (iterations > 16) { + dev_err(&pci_dev->dev, "* exit looping after %d times\n", + iterations); + goto fatal_error; + } + + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir != 0x0) + dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", + IO_SLC_CFGREG_GFIR, gfir); + if (gfir == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* + * Avoid printing when to GFIR bit is on prevents contignous + * printout e.g. for the following bug: + * FIR set without a 2ndary FIR/FIR cannot be cleared + * Comment out the following if to get the prints: + */ + if (gfir == 0) + return 0; + + gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */ + + for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */ + + /* read the primary FIR (pfir) */ + fir_addr = (uid << 24) + 0x08; + fir = __genwqe_readq(cd, fir_addr); + if (fir == 0x0) + continue; /* no error in this unit */ + + dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir); + if (fir == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* read primary FEC */ + fec_addr = (uid << 24) + 0x18; + fec = __genwqe_readq(cd, fec_addr); + + dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec); + if (fec == IO_ILLEGAL_VALUE) + goto fatal_error; + + for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) { + + /* secondary fir empty, skip it */ + if ((fir & mask) == 0x0) + continue; + + sfir_addr = (uid << 24) + 0x100 + 0x08 * j; + sfir = __genwqe_readq(cd, sfir_addr); + + if (sfir == IO_ILLEGAL_VALUE) + goto fatal_error; + dev_err(&pci_dev->dev, + "* 0x%08x 0x%016llx\n", sfir_addr, sfir); + + sfec_addr = (uid << 24) + 0x300 + 0x08 * j; + sfec = __genwqe_readq(cd, sfec_addr); + + if (sfec == IO_ILLEGAL_VALUE) + goto fatal_error; + dev_err(&pci_dev->dev, + "* 0x%08x 0x%016llx\n", sfec_addr, sfec); + + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* gfir turned on during routine! get out and + start over. */ + if ((gfir_masked == 0x0) && + (gfir & GFIR_ERR_TRIGGER)) { + goto healthMonitor; + } + + /* do not clear if we entered with a fatal gfir */ + if (gfir_masked == 0x0) { + + /* NEW clear by mask the logged bits */ + sfir_addr = (uid << 24) + 0x100 + 0x08 * j; + __genwqe_writeq(cd, sfir_addr, sfir); + + dev_dbg(&pci_dev->dev, + "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n", + sfir_addr, sfir); + + /* + * note, these cannot be error-Firs + * since gfir_masked is 0 after sfir + * was read. Also, it is safe to do + * this write if sfir=0. Still need to + * clear the primary. This just means + * there is no secondary FIR. + */ + + /* clear by mask the logged bit. */ + fir_clr_addr = (uid << 24) + 0x10; + __genwqe_writeq(cd, fir_clr_addr, mask); + + dev_dbg(&pci_dev->dev, + "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n", + fir_clr_addr, mask); + } + } + } + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir == IO_ILLEGAL_VALUE) + goto fatal_error; + + if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) { + /* + * Check once more that it didn't go on after all the + * FIRS were cleared. + */ + dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n", + iterations); + goto healthMonitor; + } + return gfir_masked; + + fatal_error: + return IO_ILLEGAL_VALUE; +} + +/** + * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot + * + * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this + * reset method will not work in all cases. + * + * Return: 0 on success or error code from pci_set_pcie_reset_state() + */ +static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev) +{ + int rc; + + /* + * lock pci config space access from userspace, + * save state and issue PCIe fundamental reset + */ + pci_cfg_access_lock(pci_dev); + pci_save_state(pci_dev); + rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset); + if (!rc) { + /* keep PCIe reset asserted for 250ms */ + msleep(250); + pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset); + /* Wait for 2s to reload flash and train the link */ + msleep(2000); + } + pci_restore_state(pci_dev); + pci_cfg_access_unlock(pci_dev); + return rc; +} + + +static int genwqe_platform_recovery(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + int rc; + + dev_info(&pci_dev->dev, + "[%s] resetting card for error recovery\n", __func__); + + /* Clear out error injection flags */ + cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | + GENWQE_INJECT_GFIR_FATAL | + GENWQE_INJECT_GFIR_INFO); + + genwqe_stop(cd); + + /* Try recoverying the card with fundamental reset */ + rc = genwqe_pci_fundamental_reset(pci_dev); + if (!rc) { + rc = genwqe_start(cd); + if (!rc) + dev_info(&pci_dev->dev, + "[%s] card recovered\n", __func__); + else + dev_err(&pci_dev->dev, + "[%s] err: cannot start card services! (err=%d)\n", + __func__, rc); + } else { + dev_err(&pci_dev->dev, + "[%s] card reset failed\n", __func__); + } + + return rc; +} + +/* + * genwqe_reload_bistream() - reload card bitstream + * + * Set the appropriate register and call fundamental reset to reaload the card + * bitstream. + * + * Return: 0 on success, error code otherwise + */ +static int genwqe_reload_bistream(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + int rc; + + dev_info(&pci_dev->dev, + "[%s] resetting card for bitstream reload\n", + __func__); + + genwqe_stop(cd); + + /* + * Cause a CPLD reprogram with the 'next_bitstream' + * partition on PCIe hot or fundamental reset + */ + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, + (cd->softreset & 0xcull) | 0x70ull); + + rc = genwqe_pci_fundamental_reset(pci_dev); + if (rc) { + /* + * A fundamental reset failure can be caused + * by lack of support on the arch, so we just + * log the error and try to start the card + * again. + */ + dev_err(&pci_dev->dev, + "[%s] err: failed to reset card for bitstream reload\n", + __func__); + } + + rc = genwqe_start(cd); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: cannot start card services! (err=%d)\n", + __func__, rc); + return rc; + } + dev_info(&pci_dev->dev, + "[%s] card reloaded\n", __func__); + return 0; +} + + +/** + * genwqe_health_thread() - Health checking thread + * + * This thread is only started for the PF of the card. + * + * This thread monitors the health of the card. A critical situation + * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In + * this case we need to be recovered from outside. Writing to + * registers will very likely not work either. + * + * This thread must only exit if kthread_should_stop() becomes true. + * + * Condition for the health-thread to trigger: + * a) when a kthread_stop() request comes in or + * b) a critical GFIR occured + * + * Informational GFIRs are checked and potentially printed in + * health_check_interval seconds. + */ +static int genwqe_health_thread(void *data) +{ + int rc, should_stop = 0; + struct genwqe_dev *cd = data; + struct pci_dev *pci_dev = cd->pci_dev; + u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg; + + health_thread_begin: + while (!kthread_should_stop()) { + rc = wait_event_interruptible_timeout(cd->health_waitq, + (genwqe_health_check_cond(cd, &gfir) || + (should_stop = kthread_should_stop())), + genwqe_health_check_interval * HZ); + + if (should_stop) + break; + + if (gfir == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] GFIR=%016llx\n", __func__, gfir); + goto fatal_error; + } + + slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); + if (slu_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] SLU_UNITCFG=%016llx\n", + __func__, slu_unitcfg); + goto fatal_error; + } + + app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); + if (app_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] APP_UNITCFG=%016llx\n", + __func__, app_unitcfg); + goto fatal_error; + } + + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] %s: GFIR=%016llx\n", __func__, + (gfir & GFIR_ERR_TRIGGER) ? "err" : "info", + gfir); + goto fatal_error; + } + + gfir_masked = genwqe_fir_checking(cd); + if (gfir_masked == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* + * GFIR ErrorTrigger bits set => reset the card! + * Never do this for old/manufacturing images! + */ + if ((gfir_masked) && !cd->skip_recovery && + genwqe_recovery_on_fatal_gfir_required(cd)) { + + cd->card_state = GENWQE_CARD_FATAL_ERROR; + + rc = genwqe_recover_card(cd, 0); + if (rc < 0) { + /* FIXME Card is unusable and needs unbind! */ + goto fatal_error; + } + } + + if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) { + /* Userspace requested card bitstream reload */ + rc = genwqe_reload_bistream(cd); + if (rc) + goto fatal_error; + } + + cd->last_gfir = gfir; + cond_resched(); + } + + return 0; + + fatal_error: + if (cd->use_platform_recovery) { + /* + * Since we use raw accessors, EEH errors won't be detected + * by the platform until we do a non-raw MMIO or config space + * read + */ + readq(cd->mmio + IO_SLC_CFGREG_GFIR); + + /* We do nothing if the card is going over PCI recovery */ + if (pci_channel_offline(pci_dev)) + return -EIO; + + /* + * If it's supported by the platform, we try a fundamental reset + * to recover from a fatal error. Otherwise, we continue to wait + * for an external recovery procedure to take care of it. + */ + rc = genwqe_platform_recovery(cd); + if (!rc) + goto health_thread_begin; + } + + dev_err(&pci_dev->dev, + "[%s] card unusable. Please trigger unbind!\n", __func__); + + /* Bring down logical devices to inform user space via udev remove. */ + cd->card_state = GENWQE_CARD_FATAL_ERROR; + genwqe_stop(cd); + + /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */ + while (!kthread_should_stop()) + cond_resched(); + + return -EIO; +} + +static int genwqe_health_check_start(struct genwqe_dev *cd) +{ + int rc; + + if (genwqe_health_check_interval <= 0) + return 0; /* valid for disabling the service */ + + /* moved before request_irq() */ + /* init_waitqueue_head(&cd->health_waitq); */ + + cd->health_thread = kthread_run(genwqe_health_thread, cd, + GENWQE_DEVNAME "%d_health", + cd->card_idx); + if (IS_ERR(cd->health_thread)) { + rc = PTR_ERR(cd->health_thread); + cd->health_thread = NULL; + return rc; + } + return 0; +} + +static int genwqe_health_thread_running(struct genwqe_dev *cd) +{ + return cd->health_thread != NULL; +} + +static int genwqe_health_check_stop(struct genwqe_dev *cd) +{ + int rc; + + if (!genwqe_health_thread_running(cd)) + return -EIO; + + rc = kthread_stop(cd->health_thread); + cd->health_thread = NULL; + return 0; +} + +/** + * genwqe_pci_setup() - Allocate PCIe related resources for our card + */ +static int genwqe_pci_setup(struct genwqe_dev *cd) +{ + int err, bars; + struct pci_dev *pci_dev = cd->pci_dev; + + bars = pci_select_bars(pci_dev, IORESOURCE_MEM); + err = pci_enable_device_mem(pci_dev); + if (err) { + dev_err(&pci_dev->dev, + "err: failed to enable pci memory (err=%d)\n", err); + goto err_out; + } + + /* Reserve PCI I/O and memory resources */ + err = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name); + if (err) { + dev_err(&pci_dev->dev, + "[%s] err: request bars failed (%d)\n", __func__, err); + err = -EIO; + goto err_disable_device; + } + + /* check for 64-bit DMA address supported (DAC) */ + if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) { + err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pci_dev->dev, + "err: DMA64 consistent mask error\n"); + err = -EIO; + goto out_release_resources; + } + /* check for 32-bit DMA address supported (SAC) */ + } else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { + err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pci_dev->dev, + "err: DMA32 consistent mask error\n"); + err = -EIO; + goto out_release_resources; + } + } else { + dev_err(&pci_dev->dev, + "err: neither DMA32 nor DMA64 supported\n"); + err = -EIO; + goto out_release_resources; + } + + pci_set_master(pci_dev); + pci_enable_pcie_error_reporting(pci_dev); + + /* EEH recovery requires PCIe fundamental reset */ + pci_dev->needs_freset = 1; + + /* request complete BAR-0 space (length = 0) */ + cd->mmio_len = pci_resource_len(pci_dev, 0); + cd->mmio = pci_iomap(pci_dev, 0, 0); + if (cd->mmio == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: mapping BAR0 failed\n", __func__); + err = -ENOMEM; + goto out_release_resources; + } + + cd->num_vfs = pci_sriov_get_totalvfs(pci_dev); + if (cd->num_vfs < 0) + cd->num_vfs = 0; + + err = genwqe_read_ids(cd); + if (err) + goto out_iounmap; + + return 0; + + out_iounmap: + pci_iounmap(pci_dev, cd->mmio); + out_release_resources: + pci_release_selected_regions(pci_dev, bars); + err_disable_device: + pci_disable_device(pci_dev); + err_out: + return err; +} + +/** + * genwqe_pci_remove() - Free PCIe related resources for our card + */ +static void genwqe_pci_remove(struct genwqe_dev *cd) +{ + int bars; + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->mmio) + pci_iounmap(pci_dev, cd->mmio); + + bars = pci_select_bars(pci_dev, IORESOURCE_MEM); + pci_release_selected_regions(pci_dev, bars); + pci_disable_device(pci_dev); +} + +/** + * genwqe_probe() - Device initialization + * @pdev: PCI device information struct + * + * Callable for multiple cards. This function is called on bind. + * + * Return: 0 if succeeded, < 0 when failed + */ +static int genwqe_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + int err; + struct genwqe_dev *cd; + + genwqe_init_crc32(); + + cd = genwqe_dev_alloc(); + if (IS_ERR(cd)) { + dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n", + (int)PTR_ERR(cd)); + return PTR_ERR(cd); + } + + dev_set_drvdata(&pci_dev->dev, cd); + cd->pci_dev = pci_dev; + + err = genwqe_pci_setup(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: problems with PCI setup (err=%d)\n", err); + goto out_free_dev; + } + + err = genwqe_start(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: cannot start card services! (err=%d)\n", err); + goto out_pci_remove; + } + + if (genwqe_is_privileged(cd)) { + err = genwqe_health_check_start(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: cannot start health checking! (err=%d)\n", + err); + goto out_stop_services; + } + } + return 0; + + out_stop_services: + genwqe_stop(cd); + out_pci_remove: + genwqe_pci_remove(cd); + out_free_dev: + genwqe_dev_free(cd); + return err; +} + +/** + * genwqe_remove() - Called when device is removed (hot-plugable) + * + * Or when driver is unloaded respecitively when unbind is done. + */ +static void genwqe_remove(struct pci_dev *pci_dev) +{ + struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); + + genwqe_health_check_stop(cd); + + /* + * genwqe_stop() must survive if it is called twice + * sequentially. This happens when the health thread calls it + * and fails on genwqe_bus_reset(). + */ + genwqe_stop(cd); + genwqe_pci_remove(cd); + genwqe_dev_free(cd); +} + +/* + * genwqe_err_error_detected() - Error detection callback + * + * This callback is called by the PCI subsystem whenever a PCI bus + * error is detected. + */ +static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, + enum pci_channel_state state) +{ + struct genwqe_dev *cd; + + dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); + + cd = dev_get_drvdata(&pci_dev->dev); + if (cd == NULL) + return PCI_ERS_RESULT_DISCONNECT; + + /* Stop the card */ + genwqe_health_check_stop(cd); + genwqe_stop(cd); + + /* + * On permanent failure, the PCI code will call device remove + * after the return of this function. + * genwqe_stop() can be called twice. + */ + if (state == pci_channel_io_perm_failure) { + return PCI_ERS_RESULT_DISCONNECT; + } else { + genwqe_pci_remove(cd); + return PCI_ERS_RESULT_NEED_RESET; + } +} + +static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev) +{ + int rc; + struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); + + rc = genwqe_pci_setup(cd); + if (!rc) { + return PCI_ERS_RESULT_RECOVERED; + } else { + dev_err(&pci_dev->dev, + "err: problems with PCI setup (err=%d)\n", rc); + return PCI_ERS_RESULT_DISCONNECT; + } +} + +static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_NONE; +} + +static void genwqe_err_resume(struct pci_dev *pci_dev) +{ + int rc; + struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); + + rc = genwqe_start(cd); + if (!rc) { + rc = genwqe_health_check_start(cd); + if (rc) + dev_err(&pci_dev->dev, + "err: cannot start health checking! (err=%d)\n", + rc); + } else { + dev_err(&pci_dev->dev, + "err: cannot start card services! (err=%d)\n", rc); + } +} + +static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) +{ + int rc; + struct genwqe_dev *cd = dev_get_drvdata(&dev->dev); + + if (numvfs > 0) { + genwqe_setup_vf_jtimer(cd); + rc = pci_enable_sriov(dev, numvfs); + if (rc < 0) + return rc; + return numvfs; + } + if (numvfs == 0) { + pci_disable_sriov(dev); + return 0; + } + return 0; +} + +static struct pci_error_handlers genwqe_err_handler = { + .error_detected = genwqe_err_error_detected, + .mmio_enabled = genwqe_err_result_none, + .link_reset = genwqe_err_result_none, + .slot_reset = genwqe_err_slot_reset, + .resume = genwqe_err_resume, +}; + +static struct pci_driver genwqe_driver = { + .name = genwqe_driver_name, + .id_table = genwqe_device_table, + .probe = genwqe_probe, + .remove = genwqe_remove, + .sriov_configure = genwqe_sriov_configure, + .err_handler = &genwqe_err_handler, +}; + +/** + * genwqe_init_module() - Driver registration and initialization + */ +static int __init genwqe_init_module(void) +{ + int rc; + + class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME); + if (IS_ERR(class_genwqe)) { + pr_err("[%s] create class failed\n", __func__); + return -ENOMEM; + } + + debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL); + if (!debugfs_genwqe) { + rc = -ENOMEM; + goto err_out; + } + + rc = pci_register_driver(&genwqe_driver); + if (rc != 0) { + pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc); + goto err_out0; + } + + return rc; + + err_out0: + debugfs_remove(debugfs_genwqe); + err_out: + class_destroy(class_genwqe); + return rc; +} + +/** + * genwqe_exit_module() - Driver exit + */ +static void __exit genwqe_exit_module(void) +{ + pci_unregister_driver(&genwqe_driver); + debugfs_remove(debugfs_genwqe); + class_destroy(class_genwqe); +} + +module_init(genwqe_init_module); +module_exit(genwqe_exit_module); diff --git a/kernel/drivers/misc/genwqe/card_base.h b/kernel/drivers/misc/genwqe/card_base.h new file mode 100644 index 000000000..e73534498 --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_base.h @@ -0,0 +1,583 @@ +#ifndef __CARD_BASE_H__ +#define __CARD_BASE_H__ + +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Interfaces within the GenWQE module. Defines genwqe_card and + * ddcb_queue as well as ddcb_requ. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/cdev.h> +#include <linux/stringify.h> +#include <linux/pci.h> +#include <linux/semaphore.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/debugfs.h> +#include <linux/slab.h> + +#include <linux/genwqe/genwqe_card.h> +#include "genwqe_driver.h" + +#define GENWQE_MSI_IRQS 4 /* Just one supported, no MSIx */ +#define GENWQE_FLAG_MSI_ENABLED (1 << 0) + +#define GENWQE_MAX_VFS 15 /* maximum 15 VFs are possible */ +#define GENWQE_MAX_FUNCS 16 /* 1 PF and 15 VFs */ +#define GENWQE_CARD_NO_MAX (16 * GENWQE_MAX_FUNCS) + +/* Compile parameters, some of them appear in debugfs for later adjustment */ +#define genwqe_ddcb_max 32 /* DDCBs on the work-queue */ +#define genwqe_polling_enabled 0 /* in case of irqs not working */ +#define genwqe_ddcb_software_timeout 10 /* timeout per DDCB in seconds */ +#define genwqe_kill_timeout 8 /* time until process gets killed */ +#define genwqe_vf_jobtimeout_msec 250 /* 250 msec */ +#define genwqe_pf_jobtimeout_msec 8000 /* 8 sec should be ok */ +#define genwqe_health_check_interval 4 /* <= 0: disabled */ + +/* Sysfs attribute groups used when we create the genwqe device */ +extern const struct attribute_group *genwqe_attribute_groups[]; + +/* + * Config space for Genwqe5 A7: + * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00 + * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00 + * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04] + * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00 + */ +#define PCI_DEVICE_GENWQE 0x044b /* Genwqe DeviceID */ + +#define PCI_SUBSYSTEM_ID_GENWQE5 0x035f /* Genwqe A5 Subsystem-ID */ +#define PCI_SUBSYSTEM_ID_GENWQE5_NEW 0x044b /* Genwqe A5 Subsystem-ID */ +#define PCI_CLASSCODE_GENWQE5 0x1200 /* UNKNOWN */ + +#define PCI_SUBVENDOR_ID_IBM_SRIOV 0x0000 +#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV 0x0000 /* Genwqe A5 Subsystem-ID */ +#define PCI_CLASSCODE_GENWQE5_SRIOV 0x1200 /* UNKNOWN */ + +#define GENWQE_SLU_ARCH_REQ 2 /* Required SLU architecture level */ + +/** + * struct genwqe_reg - Genwqe data dump functionality + */ +struct genwqe_reg { + u32 addr; + u32 idx; + u64 val; +}; + +/* + * enum genwqe_dbg_type - Specify chip unit to dump/debug + */ +enum genwqe_dbg_type { + GENWQE_DBG_UNIT0 = 0, /* captured before prev errs cleared */ + GENWQE_DBG_UNIT1 = 1, + GENWQE_DBG_UNIT2 = 2, + GENWQE_DBG_UNIT3 = 3, + GENWQE_DBG_UNIT4 = 4, + GENWQE_DBG_UNIT5 = 5, + GENWQE_DBG_UNIT6 = 6, + GENWQE_DBG_UNIT7 = 7, + GENWQE_DBG_REGS = 8, + GENWQE_DBG_DMA = 9, + GENWQE_DBG_UNITS = 10, /* max number of possible debug units */ +}; + +/* Software error injection to simulate card failures */ +#define GENWQE_INJECT_HARDWARE_FAILURE 0x00000001 /* injects -1 reg reads */ +#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */ +#define GENWQE_INJECT_GFIR_FATAL 0x00000004 /* GFIR = 0x0000ffff */ +#define GENWQE_INJECT_GFIR_INFO 0x00000008 /* GFIR = 0xffff0000 */ + +/* + * Genwqe card description and management data. + * + * Error-handling in case of card malfunction + * ------------------------------------------ + * + * If the card is detected to be defective the outside environment + * will cause the PCI layer to call deinit (the cleanup function for + * probe). This is the same effect like doing a unbind/bind operation + * on the card. + * + * The genwqe card driver implements a health checking thread which + * verifies the card function. If this detects a problem the cards + * device is being shutdown and restarted again, along with a reset of + * the card and queue. + * + * All functions accessing the card device return either -EIO or -ENODEV + * code to indicate the malfunction to the user. The user has to close + * the file descriptor and open a new one, once the card becomes + * available again. + * + * If the open file descriptor is setup to receive SIGIO, the signal is + * genereated for the application which has to provide a handler to + * react on it. If the application does not close the open + * file descriptor a SIGKILL is send to enforce freeing the cards + * resources. + * + * I did not find a different way to prevent kernel problems due to + * reference counters for the cards character devices getting out of + * sync. The character device deallocation does not block, even if + * there is still an open file descriptor pending. If this pending + * descriptor is closed, the data structures used by the character + * device is reinstantiated, which will lead to the reference counter + * dropping below the allowed values. + * + * Card recovery + * ------------- + * + * To test the internal driver recovery the following command can be used: + * sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject' + */ + + +/** + * struct dma_mapping_type - Mapping type definition + * + * To avoid memcpying data arround we use user memory directly. To do + * this we need to pin/swap-in the memory and request a DMA address + * for it. + */ +enum dma_mapping_type { + GENWQE_MAPPING_RAW = 0, /* contignous memory buffer */ + GENWQE_MAPPING_SGL_TEMP, /* sglist dynamically used */ + GENWQE_MAPPING_SGL_PINNED, /* sglist used with pinning */ +}; + +/** + * struct dma_mapping - Information about memory mappings done by the driver + */ +struct dma_mapping { + enum dma_mapping_type type; + + void *u_vaddr; /* user-space vaddr/non-aligned */ + void *k_vaddr; /* kernel-space vaddr/non-aligned */ + dma_addr_t dma_addr; /* physical DMA address */ + + struct page **page_list; /* list of pages used by user buff */ + dma_addr_t *dma_list; /* list of dma addresses per page */ + unsigned int nr_pages; /* number of pages */ + unsigned int size; /* size in bytes */ + + struct list_head card_list; /* list of usr_maps for card */ + struct list_head pin_list; /* list of pinned memory for dev */ +}; + +static inline void genwqe_mapping_init(struct dma_mapping *m, + enum dma_mapping_type type) +{ + memset(m, 0, sizeof(*m)); + m->type = type; +} + +/** + * struct ddcb_queue - DDCB queue data + * @ddcb_max: Number of DDCBs on the queue + * @ddcb_next: Next free DDCB + * @ddcb_act: Next DDCB supposed to finish + * @ddcb_seq: Sequence number of last DDCB + * @ddcbs_in_flight: Currently enqueued DDCBs + * @ddcbs_completed: Number of already completed DDCBs + * @return_on_busy: Number of -EBUSY returns on full queue + * @wait_on_busy: Number of waits on full queue + * @ddcb_daddr: DMA address of first DDCB in the queue + * @ddcb_vaddr: Kernel virtual address of first DDCB in the queue + * @ddcb_req: Associated requests (one per DDCB) + * @ddcb_waitqs: Associated wait queues (one per DDCB) + * @ddcb_lock: Lock to protect queuing operations + * @ddcb_waitq: Wait on next DDCB finishing + */ + +struct ddcb_queue { + int ddcb_max; /* amount of DDCBs */ + int ddcb_next; /* next available DDCB num */ + int ddcb_act; /* DDCB to be processed */ + u16 ddcb_seq; /* slc seq num */ + unsigned int ddcbs_in_flight; /* number of ddcbs in processing */ + unsigned int ddcbs_completed; + unsigned int ddcbs_max_in_flight; + unsigned int return_on_busy; /* how many times -EBUSY? */ + unsigned int wait_on_busy; + + dma_addr_t ddcb_daddr; /* DMA address */ + struct ddcb *ddcb_vaddr; /* kernel virtual addr for DDCBs */ + struct ddcb_requ **ddcb_req; /* ddcb processing parameter */ + wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */ + + spinlock_t ddcb_lock; /* exclusive access to queue */ + wait_queue_head_t busy_waitq; /* wait for ddcb processing */ + + /* registers or the respective queue to be used */ + u32 IO_QUEUE_CONFIG; + u32 IO_QUEUE_STATUS; + u32 IO_QUEUE_SEGMENT; + u32 IO_QUEUE_INITSQN; + u32 IO_QUEUE_WRAP; + u32 IO_QUEUE_OFFSET; + u32 IO_QUEUE_WTIME; + u32 IO_QUEUE_ERRCNTS; + u32 IO_QUEUE_LRW; +}; + +/* + * GFIR, SLU_UNITCFG, APP_UNITCFG + * 8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC. + */ +#define GENWQE_FFDC_REGS (3 + (8 * (2 + 2 * 64))) + +struct genwqe_ffdc { + unsigned int entries; + struct genwqe_reg *regs; +}; + +/** + * struct genwqe_dev - GenWQE device information + * @card_state: Card operation state, see above + * @ffdc: First Failure Data Capture buffers for each unit + * @card_thread: Working thread to operate the DDCB queue + * @card_waitq: Wait queue used in card_thread + * @queue: DDCB queue + * @health_thread: Card monitoring thread (only for PFs) + * @health_waitq: Wait queue used in health_thread + * @pci_dev: Associated PCI device (function) + * @mmio: Base address of 64-bit register space + * @mmio_len: Length of register area + * @file_lock: Lock to protect access to file_list + * @file_list: List of all processes with open GenWQE file descriptors + * + * This struct contains all information needed to communicate with a + * GenWQE card. It is initialized when a GenWQE device is found and + * destroyed when it goes away. It holds data to maintain the queue as + * well as data needed to feed the user interfaces. + */ +struct genwqe_dev { + enum genwqe_card_state card_state; + spinlock_t print_lock; + + int card_idx; /* card index 0..CARD_NO_MAX-1 */ + u64 flags; /* general flags */ + + /* FFDC data gathering */ + struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS]; + + /* DDCB workqueue */ + struct task_struct *card_thread; + wait_queue_head_t queue_waitq; + struct ddcb_queue queue; /* genwqe DDCB queue */ + unsigned int irqs_processed; + + /* Card health checking thread */ + struct task_struct *health_thread; + wait_queue_head_t health_waitq; + + int use_platform_recovery; /* use platform recovery mechanisms */ + + /* char device */ + dev_t devnum_genwqe; /* major/minor num card */ + struct class *class_genwqe; /* reference to class object */ + struct device *dev; /* for device creation */ + struct cdev cdev_genwqe; /* char device for card */ + + struct dentry *debugfs_root; /* debugfs card root directory */ + struct dentry *debugfs_genwqe; /* debugfs driver root directory */ + + /* pci resources */ + struct pci_dev *pci_dev; /* PCI device */ + void __iomem *mmio; /* BAR-0 MMIO start */ + unsigned long mmio_len; + int num_vfs; + u32 vf_jobtimeout_msec[GENWQE_MAX_VFS]; + int is_privileged; /* access to all regs possible */ + + /* config regs which we need often */ + u64 slu_unitcfg; + u64 app_unitcfg; + u64 softreset; + u64 err_inject; + u64 last_gfir; + char app_name[5]; + + spinlock_t file_lock; /* lock for open files */ + struct list_head file_list; /* list of open files */ + + /* debugfs parameters */ + int ddcb_software_timeout; /* wait until DDCB times out */ + int skip_recovery; /* circumvention if recovery fails */ + int kill_timeout; /* wait after sending SIGKILL */ +}; + +/** + * enum genwqe_requ_state - State of a DDCB execution request + */ +enum genwqe_requ_state { + GENWQE_REQU_NEW = 0, + GENWQE_REQU_ENQUEUED = 1, + GENWQE_REQU_TAPPED = 2, + GENWQE_REQU_FINISHED = 3, + GENWQE_REQU_STATE_MAX, +}; + +/** + * struct genwqe_sgl - Scatter gather list describing user-space memory + * @sgl: scatter gather list needs to be 128 byte aligned + * @sgl_dma_addr: dma address of sgl + * @sgl_size: size of area used for sgl + * @user_addr: user-space address of memory area + * @user_size: size of user-space memory area + * @page: buffer for partial pages if needed + * @page_dma_addr: dma address partial pages + */ +struct genwqe_sgl { + dma_addr_t sgl_dma_addr; + struct sg_entry *sgl; + size_t sgl_size; /* size of sgl */ + + void __user *user_addr; /* user-space base-address */ + size_t user_size; /* size of memory area */ + + unsigned long nr_pages; + unsigned long fpage_offs; + size_t fpage_size; + size_t lpage_size; + + void *fpage; + dma_addr_t fpage_dma_addr; + + void *lpage; + dma_addr_t lpage_dma_addr; +}; + +int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + void __user *user_addr, size_t user_size); + +int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + dma_addr_t *dma_list); + +int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl); + +/** + * struct ddcb_requ - Kernel internal representation of the DDCB request + * @cmd: User space representation of the DDCB execution request + */ +struct ddcb_requ { + /* kernel specific content */ + enum genwqe_requ_state req_state; /* request status */ + int num; /* ddcb_no for this request */ + struct ddcb_queue *queue; /* associated queue */ + + struct dma_mapping dma_mappings[DDCB_FIXUPS]; + struct genwqe_sgl sgls[DDCB_FIXUPS]; + + /* kernel/user shared content */ + struct genwqe_ddcb_cmd cmd; /* ddcb_no for this request */ + struct genwqe_debug_data debug_data; +}; + +/** + * struct genwqe_file - Information for open GenWQE devices + */ +struct genwqe_file { + struct genwqe_dev *cd; + struct genwqe_driver *client; + struct file *filp; + + struct fasync_struct *async_queue; + struct task_struct *owner; + struct list_head list; /* entry in list of open files */ + + spinlock_t map_lock; /* lock for dma_mappings */ + struct list_head map_list; /* list of dma_mappings */ + + spinlock_t pin_lock; /* lock for pinned memory */ + struct list_head pin_list; /* list of pinned memory */ +}; + +int genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */ +int genwqe_finish_queue(struct genwqe_dev *cd); +int genwqe_release_service_layer(struct genwqe_dev *cd); + +/** + * genwqe_get_slu_id() - Read Service Layer Unit Id + * Return: 0x00: Development code + * 0x01: SLC1 (old) + * 0x02: SLC2 (sept2012) + * 0x03: SLC2 (feb2013, generic driver) + */ +static inline int genwqe_get_slu_id(struct genwqe_dev *cd) +{ + return (int)((cd->slu_unitcfg >> 32) & 0xff); +} + +int genwqe_ddcbs_in_flight(struct genwqe_dev *cd); + +u8 genwqe_card_type(struct genwqe_dev *cd); +int genwqe_card_reset(struct genwqe_dev *cd); +int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count); +void genwqe_reset_interrupt_capability(struct genwqe_dev *cd); + +int genwqe_device_create(struct genwqe_dev *cd); +int genwqe_device_remove(struct genwqe_dev *cd); + +/* debugfs */ +int genwqe_init_debugfs(struct genwqe_dev *cd); +void genqwe_exit_debugfs(struct genwqe_dev *cd); + +int genwqe_read_softreset(struct genwqe_dev *cd); + +/* Hardware Circumventions */ +int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd); +int genwqe_flash_readback_fails(struct genwqe_dev *cd); + +/** + * genwqe_write_vreg() - Write register in VF window + * @cd: genwqe device + * @reg: register address + * @val: value to write + * @func: 0: PF, 1: VF0, ..., 15: VF14 + */ +int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func); + +/** + * genwqe_read_vreg() - Read register in VF window + * @cd: genwqe device + * @reg: register address + * @func: 0: PF, 1: VF0, ..., 15: VF14 + * + * Return: content of the register + */ +u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func); + +/* FFDC Buffer Management */ +int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id); +int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id, + struct genwqe_reg *regs, unsigned int max_regs); +int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, + unsigned int max_regs, int all); +int genwqe_ffdc_dump_dma(struct genwqe_dev *cd, + struct genwqe_reg *regs, unsigned int max_regs); + +int genwqe_init_debug_data(struct genwqe_dev *cd, + struct genwqe_debug_data *d); + +void genwqe_init_crc32(void); +int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len); + +/* Memory allocation/deallocation; dma address handling */ +int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, + void *uaddr, unsigned long size, + struct ddcb_requ *req); + +int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, + struct ddcb_requ *req); + +static inline bool dma_mapping_used(struct dma_mapping *m) +{ + if (!m) + return 0; + return m->size != 0; +} + +/** + * __genwqe_execute_ddcb() - Execute DDCB request with addr translation + * + * This function will do the address translation changes to the DDCBs + * according to the definitions required by the ATS field. It looks up + * the memory allocation buffer or does vmap/vunmap for the respective + * user-space buffers, inclusive page pinning and scatter gather list + * buildup and teardown. + */ +int __genwqe_execute_ddcb(struct genwqe_dev *cd, + struct genwqe_ddcb_cmd *cmd, unsigned int f_flags); + +/** + * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation + * + * This version will not do address translation or any modifcation of + * the DDCB data. It is used e.g. for the MoveFlash DDCB which is + * entirely prepared by the driver itself. That means the appropriate + * DMA addresses are already in the DDCB and do not need any + * modification. + */ +int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd, + struct genwqe_ddcb_cmd *cmd, + unsigned int f_flags); +int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, + struct ddcb_requ *req, + unsigned int f_flags); + +int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req); +int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req); + +/* register access */ +int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val); +u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs); +int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val); +u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs); + +void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, + dma_addr_t *dma_handle); +void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, + void *vaddr, dma_addr_t dma_handle); + +/* Base clock frequency in MHz */ +int genwqe_base_clock_frequency(struct genwqe_dev *cd); + +/* Before FFDC is captured the traps should be stopped. */ +void genwqe_stop_traps(struct genwqe_dev *cd); +void genwqe_start_traps(struct genwqe_dev *cd); + +/* Hardware circumvention */ +bool genwqe_need_err_masking(struct genwqe_dev *cd); + +/** + * genwqe_is_privileged() - Determine operation mode for PCI function + * + * On Intel with SRIOV support we see: + * PF: is_physfn = 1 is_virtfn = 0 + * VF: is_physfn = 0 is_virtfn = 1 + * + * On Systems with no SRIOV support _and_ virtualized systems we get: + * is_physfn = 0 is_virtfn = 0 + * + * Other vendors have individual pci device ids to distinguish between + * virtual function drivers and physical function drivers. GenWQE + * unfortunately has just on pci device id for both, VFs and PF. + * + * The following code is used to distinguish if the card is running in + * privileged mode, either as true PF or in a virtualized system with + * full register access e.g. currently on PowerPC. + * + * if (pci_dev->is_virtfn) + * cd->is_privileged = 0; + * else + * cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) + * != IO_ILLEGAL_VALUE); + */ +static inline int genwqe_is_privileged(struct genwqe_dev *cd) +{ + return cd->is_privileged; +} + +#endif /* __CARD_BASE_H__ */ diff --git a/kernel/drivers/misc/genwqe/card_ddcb.c b/kernel/drivers/misc/genwqe/card_ddcb.c new file mode 100644 index 000000000..6d51e5f08 --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_ddcb.c @@ -0,0 +1,1411 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Device Driver Control Block (DDCB) queue support. Definition of + * interrupt handlers for queue support as well as triggering the + * health monitor code in case of problems. The current hardware uses + * an MSI interrupt which is shared between error handling and + * functional code. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/dma-mapping.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/crc-itu-t.h> + +#include "card_base.h" +#include "card_ddcb.h" + +/* + * N: next DDCB, this is where the next DDCB will be put. + * A: active DDCB, this is where the code will look for the next completion. + * x: DDCB is enqueued, we are waiting for its completion. + + * Situation (1): Empty queue + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * A/N + * enqueued_ddcbs = A - N = 2 - 2 = 0 + * + * Situation (2): Wrapped, N > A + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | | | x | x | | | | | + * +---+---+---+---+---+---+---+---+ + * A N + * enqueued_ddcbs = N - A = 4 - 2 = 2 + * + * Situation (3): Queue wrapped, A > N + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | x | x | | | x | x | x | x | + * +---+---+---+---+---+---+---+---+ + * N A + * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 2) = 6 + * + * Situation (4a): Queue full N > A + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | x | x | x | x | x | x | x | | + * +---+---+---+---+---+---+---+---+ + * A N + * + * enqueued_ddcbs = N - A = 7 - 0 = 7 + * + * Situation (4a): Queue full A > N + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | x | x | x | | x | x | x | x | + * +---+---+---+---+---+---+---+---+ + * N A + * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7 + */ + +static int queue_empty(struct ddcb_queue *queue) +{ + return queue->ddcb_next == queue->ddcb_act; +} + +static int queue_enqueued_ddcbs(struct ddcb_queue *queue) +{ + if (queue->ddcb_next >= queue->ddcb_act) + return queue->ddcb_next - queue->ddcb_act; + + return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next); +} + +static int queue_free_ddcbs(struct ddcb_queue *queue) +{ + int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1; + + if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */ + return 0; + } + return free_ddcbs; +} + +/* + * Use of the PRIV field in the DDCB for queue debugging: + * + * (1) Trying to get rid of a DDCB which saw a timeout: + * pddcb->priv[6] = 0xcc; # cleared + * + * (2) Append a DDCB via NEXT bit: + * pddcb->priv[7] = 0xaa; # appended + * + * (3) DDCB needed tapping: + * pddcb->priv[7] = 0xbb; # tapped + * + * (4) DDCB marked as correctly finished: + * pddcb->priv[6] = 0xff; # finished + */ + +static inline void ddcb_mark_tapped(struct ddcb *pddcb) +{ + pddcb->priv[7] = 0xbb; /* tapped */ +} + +static inline void ddcb_mark_appended(struct ddcb *pddcb) +{ + pddcb->priv[7] = 0xaa; /* appended */ +} + +static inline void ddcb_mark_cleared(struct ddcb *pddcb) +{ + pddcb->priv[6] = 0xcc; /* cleared */ +} + +static inline void ddcb_mark_finished(struct ddcb *pddcb) +{ + pddcb->priv[6] = 0xff; /* finished */ +} + +static inline void ddcb_mark_unused(struct ddcb *pddcb) +{ + pddcb->priv_64 = cpu_to_be64(0); /* not tapped */ +} + +/** + * genwqe_crc16() - Generate 16-bit crc as required for DDCBs + * @buff: pointer to data buffer + * @len: length of data for calculation + * @init: initial crc (0xffff at start) + * + * Polynomial = x^16 + x^12 + x^5 + 1 (0x1021) + * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff + * should result in a crc16 of 0x89c3 + * + * Return: crc16 checksum in big endian format ! + */ +static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init) +{ + return crc_itu_t(init, buff, len); +} + +static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue) +{ + int i; + struct ddcb *pddcb; + unsigned long flags; + struct pci_dev *pci_dev = cd->pci_dev; + + spin_lock_irqsave(&cd->print_lock, flags); + + dev_info(&pci_dev->dev, + "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n", + cd->card_idx, queue->ddcb_act, queue->ddcb_next); + + pddcb = queue->ddcb_vaddr; + for (i = 0; i < queue->ddcb_max; i++) { + dev_err(&pci_dev->dev, + " %c %-3d: RETC=%03x SEQ=%04x HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n", + i == queue->ddcb_act ? '>' : ' ', + i, + be16_to_cpu(pddcb->retc_16), + be16_to_cpu(pddcb->seqnum_16), + pddcb->hsi, + pddcb->shi, + be64_to_cpu(pddcb->priv_64), + pddcb->cmd); + pddcb++; + } + spin_unlock_irqrestore(&cd->print_lock, flags); +} + +struct genwqe_ddcb_cmd *ddcb_requ_alloc(void) +{ + struct ddcb_requ *req; + + req = kzalloc(sizeof(*req), GFP_ATOMIC); + if (!req) + return NULL; + + return &req->cmd; +} + +void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd) +{ + struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); + + kfree(req); +} + +static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req) +{ + return req->req_state; +} + +static inline void ddcb_requ_set_state(struct ddcb_requ *req, + enum genwqe_requ_state new_state) +{ + req->req_state = new_state; +} + +static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req) +{ + return req->cmd.ddata_addr != 0x0; +} + +/** + * ddcb_requ_finished() - Returns the hardware state of the associated DDCB + * @cd: pointer to genwqe device descriptor + * @req: DDCB work request + * + * Status of ddcb_requ mirrors this hardware state, but is copied in + * the ddcb_requ on interrupt/polling function. The lowlevel code + * should check the hardware state directly, the higher level code + * should check the copy. + * + * This function will also return true if the state of the queue is + * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the + * shutdown case. + */ +static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req) +{ + return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) || + (cd->card_state != GENWQE_CARD_USED); +} + +/** + * enqueue_ddcb() - Enqueue a DDCB + * @cd: pointer to genwqe device descriptor + * @queue: queue this operation should be done on + * @ddcb_no: pointer to ddcb number being tapped + * + * Start execution of DDCB by tapping or append to queue via NEXT + * bit. This is done by an atomic 'compare and swap' instruction and + * checking SHI and HSI of the previous DDCB. + * + * This function must only be called with ddcb_lock held. + * + * Return: 1 if new DDCB is appended to previous + * 2 if DDCB queue is tapped via register/simulation + */ +#define RET_DDCB_APPENDED 1 +#define RET_DDCB_TAPPED 2 + +static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue, + struct ddcb *pddcb, int ddcb_no) +{ + unsigned int try; + int prev_no; + struct ddcb *prev_ddcb; + __be32 old, new, icrc_hsi_shi; + u64 num; + + /* + * For performance checks a Dispatch Timestamp can be put into + * DDCB It is supposed to use the SLU's free running counter, + * but this requires PCIe cycles. + */ + ddcb_mark_unused(pddcb); + + /* check previous DDCB if already fetched */ + prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1; + prev_ddcb = &queue->ddcb_vaddr[prev_no]; + + /* + * It might have happened that the HSI.FETCHED bit is + * set. Retry in this case. Therefore I expect maximum 2 times + * trying. + */ + ddcb_mark_appended(pddcb); + for (try = 0; try < 2; try++) { + old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */ + + /* try to append via NEXT bit if prev DDCB is not completed */ + if ((old & DDCB_COMPLETED_BE32) != 0x00000000) + break; + + new = (old | DDCB_NEXT_BE32); + + wmb(); /* need to ensure write ordering */ + icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new); + + if (icrc_hsi_shi == old) + return RET_DDCB_APPENDED; /* appended to queue */ + } + + /* Queue must be re-started by updating QUEUE_OFFSET */ + ddcb_mark_tapped(pddcb); + num = (u64)ddcb_no << 8; + + wmb(); /* need to ensure write ordering */ + __genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */ + + return RET_DDCB_TAPPED; +} + +/** + * copy_ddcb_results() - Copy output state from real DDCB to request + * + * Copy DDCB ASV to request struct. There is no endian + * conversion made, since data structure in ASV is still + * unknown here. + * + * This is needed by: + * - genwqe_purge_ddcb() + * - genwqe_check_ddcb_queue() + */ +static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no) +{ + struct ddcb_queue *queue = req->queue; + struct ddcb *pddcb = &queue->ddcb_vaddr[req->num]; + + memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH); + + /* copy status flags of the variant part */ + req->cmd.vcrc = be16_to_cpu(pddcb->vcrc_16); + req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64); + req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64); + + req->cmd.attn = be16_to_cpu(pddcb->attn_16); + req->cmd.progress = be32_to_cpu(pddcb->progress_32); + req->cmd.retc = be16_to_cpu(pddcb->retc_16); + + if (ddcb_requ_collect_debug_data(req)) { + int prev_no = (ddcb_no == 0) ? + queue->ddcb_max - 1 : ddcb_no - 1; + struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no]; + + memcpy(&req->debug_data.ddcb_finished, pddcb, + sizeof(req->debug_data.ddcb_finished)); + memcpy(&req->debug_data.ddcb_prev, prev_pddcb, + sizeof(req->debug_data.ddcb_prev)); + } +} + +/** + * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests. + * @cd: pointer to genwqe device descriptor + * + * Return: Number of DDCBs which were finished + */ +static int genwqe_check_ddcb_queue(struct genwqe_dev *cd, + struct ddcb_queue *queue) +{ + unsigned long flags; + int ddcbs_finished = 0; + struct pci_dev *pci_dev = cd->pci_dev; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + /* FIXME avoid soft locking CPU */ + while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) { + + struct ddcb *pddcb; + struct ddcb_requ *req; + u16 vcrc, vcrc_16, retc_16; + + pddcb = &queue->ddcb_vaddr[queue->ddcb_act]; + + if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == + 0x00000000) + goto go_home; /* not completed, continue waiting */ + + wmb(); /* Add sync to decouple prev. read operations */ + + /* Note: DDCB could be purged */ + req = queue->ddcb_req[queue->ddcb_act]; + if (req == NULL) { + /* this occurs if DDCB is purged, not an error */ + /* Move active DDCB further; Nothing to do anymore. */ + goto pick_next_one; + } + + /* + * HSI=0x44 (fetched and completed), but RETC is + * 0x101, or even worse 0x000. + * + * In case of seeing the queue in inconsistent state + * we read the errcnts and the queue status to provide + * a trigger for our PCIe analyzer stop capturing. + */ + retc_16 = be16_to_cpu(pddcb->retc_16); + if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) { + u64 errcnts, status; + u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr; + + errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS); + status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS); + + dev_err(&pci_dev->dev, + "[%s] SEQN=%04x HSI=%02x RETC=%03x Q_ERRCNTS=%016llx Q_STATUS=%016llx DDCB_DMA_ADDR=%016llx\n", + __func__, be16_to_cpu(pddcb->seqnum_16), + pddcb->hsi, retc_16, errcnts, status, + queue->ddcb_daddr + ddcb_offs); + } + + copy_ddcb_results(req, queue->ddcb_act); + queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */ + + dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num); + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + ddcb_mark_finished(pddcb); + + /* calculate CRC_16 to see if VCRC is correct */ + vcrc = genwqe_crc16(pddcb->asv, + VCRC_LENGTH(req->cmd.asv_length), + 0xffff); + vcrc_16 = be16_to_cpu(pddcb->vcrc_16); + if (vcrc != vcrc_16) { + printk_ratelimited(KERN_ERR + "%s %s: err: wrong VCRC pre=%02x vcrc_len=%d bytes vcrc_data=%04x is not vcrc_card=%04x\n", + GENWQE_DEVNAME, dev_name(&pci_dev->dev), + pddcb->pre, VCRC_LENGTH(req->cmd.asv_length), + vcrc, vcrc_16); + } + + ddcb_requ_set_state(req, GENWQE_REQU_FINISHED); + queue->ddcbs_completed++; + queue->ddcbs_in_flight--; + + /* wake up process waiting for this DDCB, and + processes on the busy queue */ + wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]); + wake_up_interruptible(&queue->busy_waitq); + +pick_next_one: + queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max; + ddcbs_finished++; + } + + go_home: + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return ddcbs_finished; +} + +/** + * __genwqe_wait_ddcb(): Waits until DDCB is completed + * @cd: pointer to genwqe device descriptor + * @req: pointer to requsted DDCB parameters + * + * The Service Layer will update the RETC in DDCB when processing is + * pending or done. + * + * Return: > 0 remaining jiffies, DDCB completed + * -ETIMEDOUT when timeout + * -ERESTARTSYS when ^C + * -EINVAL when unknown error condition + * + * When an error is returned the called needs to ensure that + * purge_ddcb() is being called to get the &req removed from the + * queue. + */ +int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) +{ + int rc; + unsigned int ddcb_no; + struct ddcb_queue *queue; + struct pci_dev *pci_dev = cd->pci_dev; + + if (req == NULL) + return -EINVAL; + + queue = req->queue; + if (queue == NULL) + return -EINVAL; + + ddcb_no = req->num; + if (ddcb_no >= queue->ddcb_max) + return -EINVAL; + + rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no], + ddcb_requ_finished(cd, req), + genwqe_ddcb_software_timeout * HZ); + + /* + * We need to distinguish 3 cases here: + * 1. rc == 0 timeout occured + * 2. rc == -ERESTARTSYS signal received + * 3. rc > 0 remaining jiffies condition is true + */ + if (rc == 0) { + struct ddcb_queue *queue = req->queue; + struct ddcb *pddcb; + + /* + * Timeout may be caused by long task switching time. + * When timeout happens, check if the request has + * meanwhile completed. + */ + genwqe_check_ddcb_queue(cd, req->queue); + if (ddcb_requ_finished(cd, req)) + return rc; + + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n", + __func__, req->num, rc, ddcb_requ_get_state(req), + req); + dev_err(&pci_dev->dev, + "[%s] IO_QUEUE_STATUS=0x%016llx\n", __func__, + __genwqe_readq(cd, queue->IO_QUEUE_STATUS)); + + pddcb = &queue->ddcb_vaddr[req->num]; + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + print_ddcb_info(cd, req->queue); + return -ETIMEDOUT; + + } else if (rc == -ERESTARTSYS) { + return rc; + /* + * EINTR: Stops the application + * ERESTARTSYS: Restartable systemcall; called again + */ + + } else if (rc < 0) { + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d unknown result (rc=%d) %d!\n", + __func__, req->num, rc, ddcb_requ_get_state(req)); + return -EINVAL; + } + + /* Severe error occured. Driver is forced to stop operation */ + if (cd->card_state != GENWQE_CARD_USED) { + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d forced to stop (rc=%d)\n", + __func__, req->num, rc); + return -EIO; + } + return rc; +} + +/** + * get_next_ddcb() - Get next available DDCB + * @cd: pointer to genwqe device descriptor + * + * DDCB's content is completely cleared but presets for PRE and + * SEQNUM. This function must only be called when ddcb_lock is held. + * + * Return: NULL if no empty DDCB available otherwise ptr to next DDCB. + */ +static struct ddcb *get_next_ddcb(struct genwqe_dev *cd, + struct ddcb_queue *queue, + int *num) +{ + u64 *pu64; + struct ddcb *pddcb; + + if (queue_free_ddcbs(queue) == 0) /* queue is full */ + return NULL; + + /* find new ddcb */ + pddcb = &queue->ddcb_vaddr[queue->ddcb_next]; + + /* if it is not completed, we are not allowed to use it */ + /* barrier(); */ + if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000) + return NULL; + + *num = queue->ddcb_next; /* internal DDCB number */ + queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max; + + /* clear important DDCB fields */ + pu64 = (u64 *)pddcb; + pu64[0] = 0ULL; /* offs 0x00 (ICRC,HSI,SHI,...) */ + pu64[1] = 0ULL; /* offs 0x01 (ACFUNC,CMD...) */ + + /* destroy previous results in ASV */ + pu64[0x80/8] = 0ULL; /* offs 0x80 (ASV + 0) */ + pu64[0x88/8] = 0ULL; /* offs 0x88 (ASV + 0x08) */ + pu64[0x90/8] = 0ULL; /* offs 0x90 (ASV + 0x10) */ + pu64[0x98/8] = 0ULL; /* offs 0x98 (ASV + 0x18) */ + pu64[0xd0/8] = 0ULL; /* offs 0xd0 (RETC,ATTN...) */ + + pddcb->pre = DDCB_PRESET_PRE; /* 128 */ + pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++); + return pddcb; +} + +/** + * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue + * @cd: genwqe device descriptor + * @req: DDCB request + * + * This will fail when the request was already FETCHED. In this case + * we need to wait until it is finished. Else the DDCB can be + * reused. This function also ensures that the request data structure + * is removed from ddcb_req[]. + * + * Do not forget to call this function when genwqe_wait_ddcb() fails, + * such that the request gets really removed from ddcb_req[]. + * + * Return: 0 success + */ +int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) +{ + struct ddcb *pddcb = NULL; + unsigned int t; + unsigned long flags; + struct ddcb_queue *queue = req->queue; + struct pci_dev *pci_dev = cd->pci_dev; + u64 queue_status; + __be32 icrc_hsi_shi = 0x0000; + __be32 old, new; + + /* unsigned long flags; */ + if (genwqe_ddcb_software_timeout <= 0) { + dev_err(&pci_dev->dev, + "[%s] err: software timeout is not set!\n", __func__); + return -EFAULT; + } + + pddcb = &queue->ddcb_vaddr[req->num]; + + for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) { + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + /* Check if req was meanwhile finished */ + if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) + goto go_home; + + /* try to set PURGE bit if FETCHED/COMPLETED are not set */ + old = pddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */ + if ((old & DDCB_FETCHED_BE32) == 0x00000000) { + + new = (old | DDCB_PURGE_BE32); + icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32, + old, new); + if (icrc_hsi_shi == old) + goto finish_ddcb; + } + + /* normal finish with HSI bit */ + barrier(); + icrc_hsi_shi = pddcb->icrc_hsi_shi_32; + if (icrc_hsi_shi & DDCB_COMPLETED_BE32) + goto finish_ddcb; + + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + /* + * Here the check_ddcb() function will most likely + * discover this DDCB to be finished some point in + * time. It will mark the req finished and free it up + * in the list. + */ + + copy_ddcb_results(req, req->num); /* for the failing case */ + msleep(100); /* sleep for 1/10 second and try again */ + continue; + +finish_ddcb: + copy_ddcb_results(req, req->num); + ddcb_requ_set_state(req, GENWQE_REQU_FINISHED); + queue->ddcbs_in_flight--; + queue->ddcb_req[req->num] = NULL; /* delete from array */ + ddcb_mark_cleared(pddcb); + + /* Move active DDCB further; Nothing to do here anymore. */ + + /* + * We need to ensure that there is at least one free + * DDCB in the queue. To do that, we must update + * ddcb_act only if the COMPLETED bit is set for the + * DDCB we are working on else we treat that DDCB even + * if we PURGED it as occupied (hardware is supposed + * to set the COMPLETED bit yet!). + */ + icrc_hsi_shi = pddcb->icrc_hsi_shi_32; + if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) && + (queue->ddcb_act == req->num)) { + queue->ddcb_act = ((queue->ddcb_act + 1) % + queue->ddcb_max); + } +go_home: + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 0; + } + + /* + * If the card is dead and the queue is forced to stop, we + * might see this in the queue status register. + */ + queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS); + + dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num); + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d not purged and not completed after %d seconds QSTAT=%016llx!!\n", + __func__, req->num, genwqe_ddcb_software_timeout, + queue_status); + + print_ddcb_info(cd, req->queue); + + return -EFAULT; +} + +int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d) +{ + int len; + struct pci_dev *pci_dev = cd->pci_dev; + + if (d == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: invalid memory for debug data!\n", + __func__); + return -EFAULT; + } + + len = sizeof(d->driver_version); + snprintf(d->driver_version, len, "%s", DRV_VERSION); + d->slu_unitcfg = cd->slu_unitcfg; + d->app_unitcfg = cd->app_unitcfg; + return 0; +} + +/** + * __genwqe_enqueue_ddcb() - Enqueue a DDCB + * @cd: pointer to genwqe device descriptor + * @req: pointer to DDCB execution request + * @f_flags: file mode: blocking, non-blocking + * + * Return: 0 if enqueuing succeeded + * -EIO if card is unusable/PCIe problems + * -EBUSY if enqueuing failed + */ +int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req, + unsigned int f_flags) +{ + struct ddcb *pddcb; + unsigned long flags; + struct ddcb_queue *queue; + struct pci_dev *pci_dev = cd->pci_dev; + u16 icrc; + + retry: + if (cd->card_state != GENWQE_CARD_USED) { + printk_ratelimited(KERN_ERR + "%s %s: [%s] Card is unusable/PCIe problem Req#%d\n", + GENWQE_DEVNAME, dev_name(&pci_dev->dev), + __func__, req->num); + return -EIO; + } + + queue = req->queue = &cd->queue; + + /* FIXME circumvention to improve performance when no irq is + * there. + */ + if (genwqe_polling_enabled) + genwqe_check_ddcb_queue(cd, queue); + + /* + * It must be ensured to process all DDCBs in successive + * order. Use a lock here in order to prevent nested DDCB + * enqueuing. + */ + spin_lock_irqsave(&queue->ddcb_lock, flags); + + pddcb = get_next_ddcb(cd, queue, &req->num); /* get ptr and num */ + if (pddcb == NULL) { + int rc; + + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + if (f_flags & O_NONBLOCK) { + queue->return_on_busy++; + return -EBUSY; + } + + queue->wait_on_busy++; + rc = wait_event_interruptible(queue->busy_waitq, + queue_free_ddcbs(queue) != 0); + dev_dbg(&pci_dev->dev, "[%s] waiting for free DDCB: rc=%d\n", + __func__, rc); + if (rc == -ERESTARTSYS) + return rc; /* interrupted by a signal */ + + goto retry; + } + + if (queue->ddcb_req[req->num] != NULL) { + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + dev_err(&pci_dev->dev, + "[%s] picked DDCB %d with req=%p still in use!!\n", + __func__, req->num, req); + return -EFAULT; + } + ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED); + queue->ddcb_req[req->num] = req; + + pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts); + pddcb->cmd = req->cmd.cmd; + pddcb->acfunc = req->cmd.acfunc; /* functional unit */ + + /* + * We know that we can get retc 0x104 with CRC error, do not + * stop the queue in those cases for this command. XDIR = 1 + * does not work for old SLU versions. + * + * Last bitstream with the old XDIR behavior had SLU_ID + * 0x34199. + */ + if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull) + pddcb->xdir = 0x1; + else + pddcb->xdir = 0x0; + + + pddcb->psp = (((req->cmd.asiv_length / 8) << 4) | + ((req->cmd.asv_length / 8))); + pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts); + + /* + * If copying the whole DDCB_ASIV_LENGTH is impacting + * performance we need to change it to + * req->cmd.asiv_length. But simulation benefits from some + * non-architectured bits behind the architectured content. + * + * How much data is copied depends on the availability of the + * ATS field, which was introduced late. If the ATS field is + * supported ASIV is 8 bytes shorter than it used to be. Since + * the ATS field is copied too, the code should do exactly + * what it did before, but I wanted to make copying of the ATS + * field very explicit. + */ + if (genwqe_get_slu_id(cd) <= 0x2) { + memcpy(&pddcb->__asiv[0], /* destination */ + &req->cmd.__asiv[0], /* source */ + DDCB_ASIV_LENGTH); /* req->cmd.asiv_length */ + } else { + pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats); + memcpy(&pddcb->n.asiv[0], /* destination */ + &req->cmd.asiv[0], /* source */ + DDCB_ASIV_LENGTH_ATS); /* req->cmd.asiv_length */ + } + + pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */ + + /* + * Calculate CRC_16 for corresponding range PSP(7:4). Include + * empty 4 bytes prior to the data. + */ + icrc = genwqe_crc16((const u8 *)pddcb, + ICRC_LENGTH(req->cmd.asiv_length), 0xffff); + pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16); + + /* enable DDCB completion irq */ + if (!genwqe_polling_enabled) + pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32; + + dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num); + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + if (ddcb_requ_collect_debug_data(req)) { + /* use the kernel copy of debug data. copying back to + user buffer happens later */ + + genwqe_init_debug_data(cd, &req->debug_data); + memcpy(&req->debug_data.ddcb_before, pddcb, + sizeof(req->debug_data.ddcb_before)); + } + + enqueue_ddcb(cd, queue, pddcb, req->num); + queue->ddcbs_in_flight++; + + if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight) + queue->ddcbs_max_in_flight = queue->ddcbs_in_flight; + + ddcb_requ_set_state(req, GENWQE_REQU_TAPPED); + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + wake_up_interruptible(&cd->queue_waitq); + + return 0; +} + +/** + * __genwqe_execute_raw_ddcb() - Setup and execute DDCB + * @cd: pointer to genwqe device descriptor + * @req: user provided DDCB request + * @f_flags: file mode: blocking, non-blocking + */ +int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd, + struct genwqe_ddcb_cmd *cmd, + unsigned int f_flags) +{ + int rc = 0; + struct pci_dev *pci_dev = cd->pci_dev; + struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); + + if (cmd->asiv_length > DDCB_ASIV_LENGTH) { + dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n", + __func__, cmd->asiv_length); + return -EINVAL; + } + if (cmd->asv_length > DDCB_ASV_LENGTH) { + dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n", + __func__, cmd->asiv_length); + return -EINVAL; + } + rc = __genwqe_enqueue_ddcb(cd, req, f_flags); + if (rc != 0) + return rc; + + rc = __genwqe_wait_ddcb(cd, req); + if (rc < 0) /* error or signal interrupt */ + goto err_exit; + + if (ddcb_requ_collect_debug_data(req)) { + if (copy_to_user((struct genwqe_debug_data __user *) + (unsigned long)cmd->ddata_addr, + &req->debug_data, + sizeof(struct genwqe_debug_data))) + return -EFAULT; + } + + /* + * Higher values than 0x102 indicate completion with faults, + * lower values than 0x102 indicate processing faults. Note + * that DDCB might have been purged. E.g. Cntl+C. + */ + if (cmd->retc != DDCB_RETC_COMPLETE) { + /* This might happen e.g. flash read, and needs to be + handled by the upper layer code. */ + rc = -EBADMSG; /* not processed/error retc */ + } + + return rc; + + err_exit: + __genwqe_purge_ddcb(cd, req); + + if (ddcb_requ_collect_debug_data(req)) { + if (copy_to_user((struct genwqe_debug_data __user *) + (unsigned long)cmd->ddata_addr, + &req->debug_data, + sizeof(struct genwqe_debug_data))) + return -EFAULT; + } + return rc; +} + +/** + * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished + * + * We use this as condition for our wait-queue code. + */ +static int genwqe_next_ddcb_ready(struct genwqe_dev *cd) +{ + unsigned long flags; + struct ddcb *pddcb; + struct ddcb_queue *queue = &cd->queue; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + if (queue_empty(queue)) { /* emtpy queue */ + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 0; + } + + pddcb = &queue->ddcb_vaddr[queue->ddcb_act]; + if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */ + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 1; + } + + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 0; +} + +/** + * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight + * + * Keep track on the number of DDCBs which ware currently in the + * queue. This is needed for statistics as well as conditon if we want + * to wait or better do polling in case of no interrupts available. + */ +int genwqe_ddcbs_in_flight(struct genwqe_dev *cd) +{ + unsigned long flags; + int ddcbs_in_flight = 0; + struct ddcb_queue *queue = &cd->queue; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + ddcbs_in_flight += queue->ddcbs_in_flight; + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + return ddcbs_in_flight; +} + +static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) +{ + int rc, i; + struct ddcb *pddcb; + u64 val64; + unsigned int queue_size; + struct pci_dev *pci_dev = cd->pci_dev; + + if (genwqe_ddcb_max < 2) + return -EINVAL; + + queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE); + + queue->ddcbs_in_flight = 0; /* statistics */ + queue->ddcbs_max_in_flight = 0; + queue->ddcbs_completed = 0; + queue->return_on_busy = 0; + queue->wait_on_busy = 0; + + queue->ddcb_seq = 0x100; /* start sequence number */ + queue->ddcb_max = genwqe_ddcb_max; /* module parameter */ + queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size, + &queue->ddcb_daddr); + if (queue->ddcb_vaddr == NULL) { + dev_err(&pci_dev->dev, + "[%s] **err: could not allocate DDCB **\n", __func__); + return -ENOMEM; + } + memset(queue->ddcb_vaddr, 0, queue_size); + + queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) * + queue->ddcb_max, GFP_KERNEL); + if (!queue->ddcb_req) { + rc = -ENOMEM; + goto free_ddcbs; + } + + queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) * + queue->ddcb_max, GFP_KERNEL); + if (!queue->ddcb_waitqs) { + rc = -ENOMEM; + goto free_requs; + } + + for (i = 0; i < queue->ddcb_max; i++) { + pddcb = &queue->ddcb_vaddr[i]; /* DDCBs */ + pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32; + pddcb->retc_16 = cpu_to_be16(0xfff); + + queue->ddcb_req[i] = NULL; /* requests */ + init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */ + } + + queue->ddcb_act = 0; + queue->ddcb_next = 0; /* queue is empty */ + + spin_lock_init(&queue->ddcb_lock); + init_waitqueue_head(&queue->busy_waitq); + + val64 = ((u64)(queue->ddcb_max - 1) << 8); /* lastptr */ + __genwqe_writeq(cd, queue->IO_QUEUE_CONFIG, 0x07); /* iCRC/vCRC */ + __genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr); + __genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq); + __genwqe_writeq(cd, queue->IO_QUEUE_WRAP, val64); + return 0; + + free_requs: + kfree(queue->ddcb_req); + queue->ddcb_req = NULL; + free_ddcbs: + __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr, + queue->ddcb_daddr); + queue->ddcb_vaddr = NULL; + queue->ddcb_daddr = 0ull; + return -ENODEV; + +} + +static int ddcb_queue_initialized(struct ddcb_queue *queue) +{ + return queue->ddcb_vaddr != NULL; +} + +static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) +{ + unsigned int queue_size; + + queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE); + + kfree(queue->ddcb_req); + queue->ddcb_req = NULL; + + if (queue->ddcb_vaddr) { + __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr, + queue->ddcb_daddr); + queue->ddcb_vaddr = NULL; + queue->ddcb_daddr = 0ull; + } +} + +static irqreturn_t genwqe_pf_isr(int irq, void *dev_id) +{ + u64 gfir; + struct genwqe_dev *cd = (struct genwqe_dev *)dev_id; + struct pci_dev *pci_dev = cd->pci_dev; + + /* + * In case of fatal FIR error the queue is stopped, such that + * we can safely check it without risking anything. + */ + cd->irqs_processed++; + wake_up_interruptible(&cd->queue_waitq); + + /* + * Checking for errors before kicking the queue might be + * safer, but slower for the good-case ... See above. + */ + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (((gfir & GFIR_ERR_TRIGGER) != 0x0) && + !pci_channel_offline(pci_dev)) { + + if (cd->use_platform_recovery) { + /* + * Since we use raw accessors, EEH errors won't be + * detected by the platform until we do a non-raw + * MMIO or config space read + */ + readq(cd->mmio + IO_SLC_CFGREG_GFIR); + + /* Don't do anything if the PCI channel is frozen */ + if (pci_channel_offline(pci_dev)) + goto exit; + } + + wake_up_interruptible(&cd->health_waitq); + + /* + * By default GFIRs causes recovery actions. This + * count is just for debug when recovery is masked. + */ + dev_err_ratelimited(&pci_dev->dev, + "[%s] GFIR=%016llx\n", + __func__, gfir); + } + + exit: + return IRQ_HANDLED; +} + +static irqreturn_t genwqe_vf_isr(int irq, void *dev_id) +{ + struct genwqe_dev *cd = (struct genwqe_dev *)dev_id; + + cd->irqs_processed++; + wake_up_interruptible(&cd->queue_waitq); + + return IRQ_HANDLED; +} + +/** + * genwqe_card_thread() - Work thread for the DDCB queue + * + * The idea is to check if there are DDCBs in processing. If there are + * some finished DDCBs, we process them and wakeup the + * requestors. Otherwise we give other processes time using + * cond_resched(). + */ +static int genwqe_card_thread(void *data) +{ + int should_stop = 0, rc = 0; + struct genwqe_dev *cd = (struct genwqe_dev *)data; + + while (!kthread_should_stop()) { + + genwqe_check_ddcb_queue(cd, &cd->queue); + + if (genwqe_polling_enabled) { + rc = wait_event_interruptible_timeout( + cd->queue_waitq, + genwqe_ddcbs_in_flight(cd) || + (should_stop = kthread_should_stop()), 1); + } else { + rc = wait_event_interruptible_timeout( + cd->queue_waitq, + genwqe_next_ddcb_ready(cd) || + (should_stop = kthread_should_stop()), HZ); + } + if (should_stop) + break; + + /* + * Avoid soft lockups on heavy loads; we do not want + * to disable our interrupts. + */ + cond_resched(); + } + return 0; +} + +/** + * genwqe_setup_service_layer() - Setup DDCB queue + * @cd: pointer to genwqe device descriptor + * + * Allocate DDCBs. Configure Service Layer Controller (SLC). + * + * Return: 0 success + */ +int genwqe_setup_service_layer(struct genwqe_dev *cd) +{ + int rc; + struct ddcb_queue *queue; + struct pci_dev *pci_dev = cd->pci_dev; + + if (genwqe_is_privileged(cd)) { + rc = genwqe_card_reset(cd); + if (rc < 0) { + dev_err(&pci_dev->dev, + "[%s] err: reset failed.\n", __func__); + return rc; + } + genwqe_read_softreset(cd); + } + + queue = &cd->queue; + queue->IO_QUEUE_CONFIG = IO_SLC_QUEUE_CONFIG; + queue->IO_QUEUE_STATUS = IO_SLC_QUEUE_STATUS; + queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT; + queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN; + queue->IO_QUEUE_OFFSET = IO_SLC_QUEUE_OFFSET; + queue->IO_QUEUE_WRAP = IO_SLC_QUEUE_WRAP; + queue->IO_QUEUE_WTIME = IO_SLC_QUEUE_WTIME; + queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS; + queue->IO_QUEUE_LRW = IO_SLC_QUEUE_LRW; + + rc = setup_ddcb_queue(cd, queue); + if (rc != 0) { + rc = -ENODEV; + goto err_out; + } + + init_waitqueue_head(&cd->queue_waitq); + cd->card_thread = kthread_run(genwqe_card_thread, cd, + GENWQE_DEVNAME "%d_thread", + cd->card_idx); + if (IS_ERR(cd->card_thread)) { + rc = PTR_ERR(cd->card_thread); + cd->card_thread = NULL; + goto stop_free_queue; + } + + rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS); + if (rc) + goto stop_kthread; + + /* + * We must have all wait-queues initialized when we enable the + * interrupts. Otherwise we might crash if we get an early + * irq. + */ + init_waitqueue_head(&cd->health_waitq); + + if (genwqe_is_privileged(cd)) { + rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED, + GENWQE_DEVNAME, cd); + } else { + rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED, + GENWQE_DEVNAME, cd); + } + if (rc < 0) { + dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq); + goto stop_irq_cap; + } + + cd->card_state = GENWQE_CARD_USED; + return 0; + + stop_irq_cap: + genwqe_reset_interrupt_capability(cd); + stop_kthread: + kthread_stop(cd->card_thread); + cd->card_thread = NULL; + stop_free_queue: + free_ddcb_queue(cd, queue); + err_out: + return rc; +} + +/** + * queue_wake_up_all() - Handles fatal error case + * + * The PCI device got unusable and we have to stop all pending + * requests as fast as we can. The code after this must purge the + * DDCBs in question and ensure that all mappings are freed. + */ +static int queue_wake_up_all(struct genwqe_dev *cd) +{ + unsigned int i; + unsigned long flags; + struct ddcb_queue *queue = &cd->queue; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + for (i = 0; i < queue->ddcb_max; i++) + wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]); + + wake_up_interruptible(&queue->busy_waitq); + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + return 0; +} + +/** + * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces + * + * Relies on the pre-condition that there are no users of the card + * device anymore e.g. with open file-descriptors. + * + * This function must be robust enough to be called twice. + */ +int genwqe_finish_queue(struct genwqe_dev *cd) +{ + int i, rc = 0, in_flight; + int waitmax = genwqe_ddcb_software_timeout; + struct pci_dev *pci_dev = cd->pci_dev; + struct ddcb_queue *queue = &cd->queue; + + if (!ddcb_queue_initialized(queue)) + return 0; + + /* Do not wipe out the error state. */ + if (cd->card_state == GENWQE_CARD_USED) + cd->card_state = GENWQE_CARD_UNUSED; + + /* Wake up all requests in the DDCB queue such that they + should be removed nicely. */ + queue_wake_up_all(cd); + + /* We must wait to get rid of the DDCBs in flight */ + for (i = 0; i < waitmax; i++) { + in_flight = genwqe_ddcbs_in_flight(cd); + + if (in_flight == 0) + break; + + dev_dbg(&pci_dev->dev, + " DEBUG [%d/%d] waiting for queue to get empty: %d requests!\n", + i, waitmax, in_flight); + + /* + * Severe severe error situation: The card itself has + * 16 DDCB queues, each queue has e.g. 32 entries, + * each DDBC has a hardware timeout of currently 250 + * msec but the PFs have a hardware timeout of 8 sec + * ... so I take something large. + */ + msleep(1000); + } + if (i == waitmax) { + dev_err(&pci_dev->dev, " [%s] err: queue is not empty!!\n", + __func__); + rc = -EIO; + } + return rc; +} + +/** + * genwqe_release_service_layer() - Shutdown DDCB queue + * @cd: genwqe device descriptor + * + * This function must be robust enough to be called twice. + */ +int genwqe_release_service_layer(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (!ddcb_queue_initialized(&cd->queue)) + return 1; + + free_irq(pci_dev->irq, cd); + genwqe_reset_interrupt_capability(cd); + + if (cd->card_thread != NULL) { + kthread_stop(cd->card_thread); + cd->card_thread = NULL; + } + + free_ddcb_queue(cd, &cd->queue); + return 0; +} diff --git a/kernel/drivers/misc/genwqe/card_ddcb.h b/kernel/drivers/misc/genwqe/card_ddcb.h new file mode 100644 index 000000000..0361a68d7 --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_ddcb.h @@ -0,0 +1,188 @@ +#ifndef __CARD_DDCB_H__ +#define __CARD_DDCB_H__ + +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/types.h> +#include <asm/byteorder.h> + +#include "genwqe_driver.h" +#include "card_base.h" + +/** + * struct ddcb - Device Driver Control Block DDCB + * @hsi: Hardware software interlock + * @shi: Software hardware interlock. Hsi and shi are used to interlock + * software and hardware activities. We are using a compare and + * swap operation to ensure that there are no races when + * activating new DDCBs on the queue, or when we need to + * purge a DDCB from a running queue. + * @acfunc: Accelerator function addresses a unit within the chip + * @cmd: Command to work on + * @cmdopts_16: Options for the command + * @asiv: Input data + * @asv: Output data + * + * The DDCB data format is big endian. Multiple consequtive DDBCs form + * a DDCB queue. + */ +#define ASIV_LENGTH 104 /* Old specification without ATS field */ +#define ASIV_LENGTH_ATS 96 /* New specification with ATS field */ +#define ASV_LENGTH 64 + +struct ddcb { + union { + __be32 icrc_hsi_shi_32; /* iCRC, Hardware/SW interlock */ + struct { + __be16 icrc_16; + u8 hsi; + u8 shi; + }; + }; + u8 pre; /* Preamble */ + u8 xdir; /* Execution Directives */ + __be16 seqnum_16; /* Sequence Number */ + + u8 acfunc; /* Accelerator Function.. */ + u8 cmd; /* Command. */ + __be16 cmdopts_16; /* Command Options */ + u8 sur; /* Status Update Rate */ + u8 psp; /* Protection Section Pointer */ + __be16 rsvd_0e_16; /* Reserved invariant */ + + __be64 fwiv_64; /* Firmware Invariant. */ + + union { + struct { + __be64 ats_64; /* Address Translation Spec */ + u8 asiv[ASIV_LENGTH_ATS]; /* New ASIV */ + } n; + u8 __asiv[ASIV_LENGTH]; /* obsolete */ + }; + u8 asv[ASV_LENGTH]; /* Appl Spec Variant */ + + __be16 rsvd_c0_16; /* Reserved Variant */ + __be16 vcrc_16; /* Variant CRC */ + __be32 rsvd_32; /* Reserved unprotected */ + + __be64 deque_ts_64; /* Deque Time Stamp. */ + + __be16 retc_16; /* Return Code */ + __be16 attn_16; /* Attention/Extended Error Codes */ + __be32 progress_32; /* Progress indicator. */ + + __be64 cmplt_ts_64; /* Completion Time Stamp. */ + + /* The following layout matches the new service layer format */ + __be32 ibdc_32; /* Inbound Data Count (* 256) */ + __be32 obdc_32; /* Outbound Data Count (* 256) */ + + __be64 rsvd_SLH_64; /* Reserved for hardware */ + union { /* private data for driver */ + u8 priv[8]; + __be64 priv_64; + }; + __be64 disp_ts_64; /* Dispatch TimeStamp */ +} __attribute__((__packed__)); + +/* CRC polynomials for DDCB */ +#define CRC16_POLYNOMIAL 0x1021 + +/* + * SHI: Software to Hardware Interlock + * This 1 byte field is written by software to interlock the + * movement of one queue entry to another with the hardware in the + * chip. + */ +#define DDCB_SHI_INTR 0x04 /* Bit 2 */ +#define DDCB_SHI_PURGE 0x02 /* Bit 1 */ +#define DDCB_SHI_NEXT 0x01 /* Bit 0 */ + +/* + * HSI: Hardware to Software interlock + * This 1 byte field is written by hardware to interlock the movement + * of one queue entry to another with the software in the chip. + */ +#define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */ +#define DDCB_HSI_FETCHED 0x04 /* Bit 2 */ + +/* + * Accessing HSI/SHI is done 32-bit wide + * Normally 16-bit access would work too, but on some platforms the + * 16 compare and swap operation is not supported. Therefore + * switching to 32-bit such that those platforms will work too. + * + * iCRC HSI/SHI + */ +#define DDCB_INTR_BE32 cpu_to_be32(0x00000004) +#define DDCB_PURGE_BE32 cpu_to_be32(0x00000002) +#define DDCB_NEXT_BE32 cpu_to_be32(0x00000001) +#define DDCB_COMPLETED_BE32 cpu_to_be32(0x00004000) +#define DDCB_FETCHED_BE32 cpu_to_be32(0x00000400) + +/* Definitions of DDCB presets */ +#define DDCB_PRESET_PRE 0x80 +#define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */ +#define VCRC_LENGTH(n) ((n)) /* used ASV */ + +/* + * Genwqe Scatter Gather list + * Each element has up to 8 entries. + * The chaining element is element 0 cause of prefetching needs. + */ + +/* + * 0b0110 Chained descriptor. The descriptor is describing the next + * descriptor list. + */ +#define SG_CHAINED (0x6) + +/* + * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty + * condition. + */ +#define SG_DATA (0x2) + +/* + * 0b0000 Early terminator. This is the last entry on the list + * irregardless of the length indicated. + */ +#define SG_END_LIST (0x0) + +/** + * struct sglist - Scatter gather list + * @target_addr: Either a dma addr of memory to work on or a + * dma addr or a subsequent sglist block. + * @len: Length of the data block. + * @flags: See above. + * + * Depending on the command the GenWQE card can use a scatter gather + * list to describe the memory it works on. Always 8 sg_entry's form + * a block. + */ +struct sg_entry { + __be64 target_addr; + __be32 len; + __be32 flags; +}; + +#endif /* __CARD_DDCB_H__ */ diff --git a/kernel/drivers/misc/genwqe/card_debugfs.c b/kernel/drivers/misc/genwqe/card_debugfs.c new file mode 100644 index 000000000..c715534e7 --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_debugfs.c @@ -0,0 +1,508 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Debugfs interfaces for the GenWQE card. Help to debug potential + * problems. Dump internal chip state for debugging and failure + * determination. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> + +#include "card_base.h" +#include "card_ddcb.h" + +#define GENWQE_DEBUGFS_RO(_name, _showfn) \ + static int genwqe_debugfs_##_name##_open(struct inode *inode, \ + struct file *file) \ + { \ + return single_open(file, _showfn, inode->i_private); \ + } \ + static const struct file_operations genwqe_##_name##_fops = { \ + .open = genwqe_debugfs_##_name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } + +static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs, + int entries) +{ + unsigned int i; + u32 v_hi, v_lo; + + for (i = 0; i < entries; i++) { + v_hi = (regs[i].val >> 32) & 0xffffffff; + v_lo = (regs[i].val) & 0xffffffff; + + seq_printf(s, " 0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n", + regs[i].addr, regs[i].idx, v_hi, v_lo); + } +} + +static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid) +{ + struct genwqe_dev *cd = s->private; + int entries; + struct genwqe_reg *regs; + + entries = genwqe_ffdc_buff_size(cd, uid); + if (entries < 0) + return -EINVAL; + + if (entries == 0) + return 0; + + regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL); + if (regs == NULL) + return -ENOMEM; + + genwqe_stop_traps(cd); /* halt the traps while dumping data */ + genwqe_ffdc_buff_read(cd, uid, regs, entries); + genwqe_start_traps(cd); + + dbg_uidn_show(s, regs, entries); + kfree(regs); + return 0; +} + +static int genwqe_curr_dbg_uid0_show(struct seq_file *s, void *unused) +{ + return curr_dbg_uidn_show(s, unused, 0); +} + +GENWQE_DEBUGFS_RO(curr_dbg_uid0, genwqe_curr_dbg_uid0_show); + +static int genwqe_curr_dbg_uid1_show(struct seq_file *s, void *unused) +{ + return curr_dbg_uidn_show(s, unused, 1); +} + +GENWQE_DEBUGFS_RO(curr_dbg_uid1, genwqe_curr_dbg_uid1_show); + +static int genwqe_curr_dbg_uid2_show(struct seq_file *s, void *unused) +{ + return curr_dbg_uidn_show(s, unused, 2); +} + +GENWQE_DEBUGFS_RO(curr_dbg_uid2, genwqe_curr_dbg_uid2_show); + +static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid) +{ + struct genwqe_dev *cd = s->private; + + dbg_uidn_show(s, cd->ffdc[uid].regs, cd->ffdc[uid].entries); + return 0; +} + +static int genwqe_prev_dbg_uid0_show(struct seq_file *s, void *unused) +{ + return prev_dbg_uidn_show(s, unused, 0); +} + +GENWQE_DEBUGFS_RO(prev_dbg_uid0, genwqe_prev_dbg_uid0_show); + +static int genwqe_prev_dbg_uid1_show(struct seq_file *s, void *unused) +{ + return prev_dbg_uidn_show(s, unused, 1); +} + +GENWQE_DEBUGFS_RO(prev_dbg_uid1, genwqe_prev_dbg_uid1_show); + +static int genwqe_prev_dbg_uid2_show(struct seq_file *s, void *unused) +{ + return prev_dbg_uidn_show(s, unused, 2); +} + +GENWQE_DEBUGFS_RO(prev_dbg_uid2, genwqe_prev_dbg_uid2_show); + +static int genwqe_curr_regs_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int i; + struct genwqe_reg *regs; + + regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL); + if (regs == NULL) + return -ENOMEM; + + genwqe_stop_traps(cd); + genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1); + genwqe_start_traps(cd); + + for (i = 0; i < GENWQE_FFDC_REGS; i++) { + if (regs[i].addr == 0xffffffff) + break; /* invalid entries */ + + if (regs[i].val == 0x0ull) + continue; /* do not print 0x0 FIRs */ + + seq_printf(s, " 0x%08x 0x%016llx\n", + regs[i].addr, regs[i].val); + } + return 0; +} + +GENWQE_DEBUGFS_RO(curr_regs, genwqe_curr_regs_show); + +static int genwqe_prev_regs_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int i; + struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs; + + if (regs == NULL) + return -EINVAL; + + for (i = 0; i < GENWQE_FFDC_REGS; i++) { + if (regs[i].addr == 0xffffffff) + break; /* invalid entries */ + + if (regs[i].val == 0x0ull) + continue; /* do not print 0x0 FIRs */ + + seq_printf(s, " 0x%08x 0x%016llx\n", + regs[i].addr, regs[i].val); + } + return 0; +} + +GENWQE_DEBUGFS_RO(prev_regs, genwqe_prev_regs_show); + +static int genwqe_jtimer_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int vf_num; + u64 jtimer; + + jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0); + seq_printf(s, " PF 0x%016llx %d msec\n", jtimer, + genwqe_pf_jobtimeout_msec); + + for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) { + jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, + vf_num + 1); + seq_printf(s, " VF%-2d 0x%016llx %d msec\n", vf_num, jtimer, + cd->vf_jobtimeout_msec[vf_num]); + } + return 0; +} + +GENWQE_DEBUGFS_RO(jtimer, genwqe_jtimer_show); + +static int genwqe_queue_working_time_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int vf_num; + u64 t; + + t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0); + seq_printf(s, " PF 0x%016llx\n", t); + + for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) { + t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1); + seq_printf(s, " VF%-2d 0x%016llx\n", vf_num, t); + } + return 0; +} + +GENWQE_DEBUGFS_RO(queue_working_time, genwqe_queue_working_time_show); + +static int genwqe_ddcb_info_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int i; + struct ddcb_queue *queue; + struct ddcb *pddcb; + + queue = &cd->queue; + seq_puts(s, "DDCB QUEUE:\n"); + seq_printf(s, " ddcb_max: %d\n" + " ddcb_daddr: %016llx - %016llx\n" + " ddcb_vaddr: %016llx\n" + " ddcbs_in_flight: %u\n" + " ddcbs_max_in_flight: %u\n" + " ddcbs_completed: %u\n" + " return_on_busy: %u\n" + " wait_on_busy: %u\n" + " irqs_processed: %u\n", + queue->ddcb_max, (long long)queue->ddcb_daddr, + (long long)queue->ddcb_daddr + + (queue->ddcb_max * DDCB_LENGTH), + (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight, + queue->ddcbs_max_in_flight, queue->ddcbs_completed, + queue->return_on_busy, queue->wait_on_busy, + cd->irqs_processed); + + /* Hardware State */ + seq_printf(s, " 0x%08x 0x%016llx IO_QUEUE_CONFIG\n" + " 0x%08x 0x%016llx IO_QUEUE_STATUS\n" + " 0x%08x 0x%016llx IO_QUEUE_SEGMENT\n" + " 0x%08x 0x%016llx IO_QUEUE_INITSQN\n" + " 0x%08x 0x%016llx IO_QUEUE_WRAP\n" + " 0x%08x 0x%016llx IO_QUEUE_OFFSET\n" + " 0x%08x 0x%016llx IO_QUEUE_WTIME\n" + " 0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n" + " 0x%08x 0x%016llx IO_QUEUE_LRW\n", + queue->IO_QUEUE_CONFIG, + __genwqe_readq(cd, queue->IO_QUEUE_CONFIG), + queue->IO_QUEUE_STATUS, + __genwqe_readq(cd, queue->IO_QUEUE_STATUS), + queue->IO_QUEUE_SEGMENT, + __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT), + queue->IO_QUEUE_INITSQN, + __genwqe_readq(cd, queue->IO_QUEUE_INITSQN), + queue->IO_QUEUE_WRAP, + __genwqe_readq(cd, queue->IO_QUEUE_WRAP), + queue->IO_QUEUE_OFFSET, + __genwqe_readq(cd, queue->IO_QUEUE_OFFSET), + queue->IO_QUEUE_WTIME, + __genwqe_readq(cd, queue->IO_QUEUE_WTIME), + queue->IO_QUEUE_ERRCNTS, + __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS), + queue->IO_QUEUE_LRW, + __genwqe_readq(cd, queue->IO_QUEUE_LRW)); + + seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n", + queue->ddcb_act, queue->ddcb_next); + + pddcb = queue->ddcb_vaddr; + for (i = 0; i < queue->ddcb_max; i++) { + seq_printf(s, " %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ", + i, be16_to_cpu(pddcb->retc_16), + be16_to_cpu(pddcb->seqnum_16), + pddcb->hsi, pddcb->shi); + seq_printf(s, "PRIV=%06llx CMD=%02x\n", + be64_to_cpu(pddcb->priv_64), pddcb->cmd); + pddcb++; + } + return 0; +} + +GENWQE_DEBUGFS_RO(ddcb_info, genwqe_ddcb_info_show); + +static int genwqe_info_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + u16 val16, type; + u64 app_id, slu_id, bitstream = -1; + struct pci_dev *pci_dev = cd->pci_dev; + + slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG); + app_id = __genwqe_readq(cd, IO_APP_UNITCFG); + + if (genwqe_is_privileged(cd)) + bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM); + + val16 = (u16)(slu_id & 0x0fLLU); + type = (u16)((slu_id >> 20) & 0xffLLU); + + seq_printf(s, "%s driver version: %s\n" + " Device Name/Type: %s %s CardIdx: %d\n" + " SLU/APP Config : 0x%016llx/0x%016llx\n" + " Build Date : %u/%x/%u\n" + " Base Clock : %u MHz\n" + " Arch/SVN Release: %u/%llx\n" + " Bitstream : %llx\n", + GENWQE_DEVNAME, DRV_VERSION, dev_name(&pci_dev->dev), + genwqe_is_privileged(cd) ? + "Physical" : "Virtual or no SR-IOV", + cd->card_idx, slu_id, app_id, + (u16)((slu_id >> 12) & 0x0fLLU), /* month */ + (u16)((slu_id >> 4) & 0xffLLU), /* day */ + (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */ + genwqe_base_clock_frequency(cd), + (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40, + bitstream); + + return 0; +} + +GENWQE_DEBUGFS_RO(info, genwqe_info_show); + +int genwqe_init_debugfs(struct genwqe_dev *cd) +{ + struct dentry *root; + struct dentry *file; + int ret; + char card_name[64]; + char name[64]; + unsigned int i; + + sprintf(card_name, "%s%d_card", GENWQE_DEVNAME, cd->card_idx); + + root = debugfs_create_dir(card_name, cd->debugfs_genwqe); + if (!root) { + ret = -ENOMEM; + goto err0; + } + + /* non privileged interfaces are done here */ + file = debugfs_create_file("ddcb_info", S_IRUGO, root, cd, + &genwqe_ddcb_info_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("info", S_IRUGO, root, cd, + &genwqe_info_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_x64("err_inject", 0666, root, &cd->err_inject); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("ddcb_software_timeout", 0666, root, + &cd->ddcb_software_timeout); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("kill_timeout", 0666, root, + &cd->kill_timeout); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + /* privileged interfaces follow here */ + if (!genwqe_is_privileged(cd)) { + cd->debugfs_root = root; + return 0; + } + + file = debugfs_create_file("curr_regs", S_IRUGO, root, cd, + &genwqe_curr_regs_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd, + &genwqe_curr_dbg_uid0_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd, + &genwqe_curr_dbg_uid1_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd, + &genwqe_curr_dbg_uid2_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_regs", S_IRUGO, root, cd, + &genwqe_prev_regs_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd, + &genwqe_prev_dbg_uid0_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd, + &genwqe_prev_dbg_uid1_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd, + &genwqe_prev_dbg_uid2_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + for (i = 0; i < GENWQE_MAX_VFS; i++) { + sprintf(name, "vf%u_jobtimeout_msec", i); + + file = debugfs_create_u32(name, 0666, root, + &cd->vf_jobtimeout_msec[i]); + if (!file) { + ret = -ENOMEM; + goto err1; + } + } + + file = debugfs_create_file("jobtimer", S_IRUGO, root, cd, + &genwqe_jtimer_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("queue_working_time", S_IRUGO, root, cd, + &genwqe_queue_working_time_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("skip_recovery", 0666, root, + &cd->skip_recovery); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("use_platform_recovery", 0666, root, + &cd->use_platform_recovery); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + cd->debugfs_root = root; + return 0; +err1: + debugfs_remove_recursive(root); +err0: + return ret; +} + +void genqwe_exit_debugfs(struct genwqe_dev *cd) +{ + debugfs_remove_recursive(cd->debugfs_root); +} diff --git a/kernel/drivers/misc/genwqe/card_dev.c b/kernel/drivers/misc/genwqe/card_dev.c new file mode 100644 index 000000000..c49d24426 --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_dev.c @@ -0,0 +1,1413 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Character device representation of the GenWQE device. This allows + * user-space applications to communicate with the card. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/delay.h> +#include <linux/atomic.h> + +#include "card_base.h" +#include "card_ddcb.h" + +static int genwqe_open_files(struct genwqe_dev *cd) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&cd->file_lock, flags); + rc = list_empty(&cd->file_list); + spin_unlock_irqrestore(&cd->file_lock, flags); + return !rc; +} + +static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile) +{ + unsigned long flags; + + cfile->owner = current; + spin_lock_irqsave(&cd->file_lock, flags); + list_add(&cfile->list, &cd->file_list); + spin_unlock_irqrestore(&cd->file_lock, flags); +} + +static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile) +{ + unsigned long flags; + + spin_lock_irqsave(&cd->file_lock, flags); + list_del(&cfile->list); + spin_unlock_irqrestore(&cd->file_lock, flags); + + return 0; +} + +static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->pin_lock, flags); + list_add(&m->pin_list, &cfile->pin_list); + spin_unlock_irqrestore(&cfile->pin_lock, flags); +} + +static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->pin_lock, flags); + list_del(&m->pin_list); + spin_unlock_irqrestore(&cfile->pin_lock, flags); + + return 0; +} + +/** + * genwqe_search_pin() - Search for the mapping for a userspace address + * @cfile: Descriptor of opened file + * @u_addr: User virtual address + * @size: Size of buffer + * @dma_addr: DMA address to be updated + * + * Return: Pointer to the corresponding mapping NULL if not found + */ +static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile, + unsigned long u_addr, + unsigned int size, + void **virt_addr) +{ + unsigned long flags; + struct dma_mapping *m; + + spin_lock_irqsave(&cfile->pin_lock, flags); + + list_for_each_entry(m, &cfile->pin_list, pin_list) { + if ((((u64)m->u_vaddr) <= (u_addr)) && + (((u64)m->u_vaddr + m->size) >= (u_addr + size))) { + + if (virt_addr) + *virt_addr = m->k_vaddr + + (u_addr - (u64)m->u_vaddr); + + spin_unlock_irqrestore(&cfile->pin_lock, flags); + return m; + } + } + spin_unlock_irqrestore(&cfile->pin_lock, flags); + return NULL; +} + +static void __genwqe_add_mapping(struct genwqe_file *cfile, + struct dma_mapping *dma_map) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->map_lock, flags); + list_add(&dma_map->card_list, &cfile->map_list); + spin_unlock_irqrestore(&cfile->map_lock, flags); +} + +static void __genwqe_del_mapping(struct genwqe_file *cfile, + struct dma_mapping *dma_map) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->map_lock, flags); + list_del(&dma_map->card_list); + spin_unlock_irqrestore(&cfile->map_lock, flags); +} + + +/** + * __genwqe_search_mapping() - Search for the mapping for a userspace address + * @cfile: descriptor of opened file + * @u_addr: user virtual address + * @size: size of buffer + * @dma_addr: DMA address to be updated + * Return: Pointer to the corresponding mapping NULL if not found + */ +static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile, + unsigned long u_addr, + unsigned int size, + dma_addr_t *dma_addr, + void **virt_addr) +{ + unsigned long flags; + struct dma_mapping *m; + struct pci_dev *pci_dev = cfile->cd->pci_dev; + + spin_lock_irqsave(&cfile->map_lock, flags); + list_for_each_entry(m, &cfile->map_list, card_list) { + + if ((((u64)m->u_vaddr) <= (u_addr)) && + (((u64)m->u_vaddr + m->size) >= (u_addr + size))) { + + /* match found: current is as expected and + addr is in range */ + if (dma_addr) + *dma_addr = m->dma_addr + + (u_addr - (u64)m->u_vaddr); + + if (virt_addr) + *virt_addr = m->k_vaddr + + (u_addr - (u64)m->u_vaddr); + + spin_unlock_irqrestore(&cfile->map_lock, flags); + return m; + } + } + spin_unlock_irqrestore(&cfile->map_lock, flags); + + dev_err(&pci_dev->dev, + "[%s] Entry not found: u_addr=%lx, size=%x\n", + __func__, u_addr, size); + + return NULL; +} + +static void genwqe_remove_mappings(struct genwqe_file *cfile) +{ + int i = 0; + struct list_head *node, *next; + struct dma_mapping *dma_map; + struct genwqe_dev *cd = cfile->cd; + struct pci_dev *pci_dev = cfile->cd->pci_dev; + + list_for_each_safe(node, next, &cfile->map_list) { + dma_map = list_entry(node, struct dma_mapping, card_list); + + list_del_init(&dma_map->card_list); + + /* + * This is really a bug, because those things should + * have been already tidied up. + * + * GENWQE_MAPPING_RAW should have been removed via mmunmap(). + * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code. + */ + dev_err(&pci_dev->dev, + "[%s] %d. cleanup mapping: u_vaddr=%p u_kaddr=%016lx dma_addr=%lx\n", + __func__, i++, dma_map->u_vaddr, + (unsigned long)dma_map->k_vaddr, + (unsigned long)dma_map->dma_addr); + + if (dma_map->type == GENWQE_MAPPING_RAW) { + /* we allocated this dynamically */ + __genwqe_free_consistent(cd, dma_map->size, + dma_map->k_vaddr, + dma_map->dma_addr); + kfree(dma_map); + } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) { + /* we use dma_map statically from the request */ + genwqe_user_vunmap(cd, dma_map, NULL); + } + } +} + +static void genwqe_remove_pinnings(struct genwqe_file *cfile) +{ + struct list_head *node, *next; + struct dma_mapping *dma_map; + struct genwqe_dev *cd = cfile->cd; + + list_for_each_safe(node, next, &cfile->pin_list) { + dma_map = list_entry(node, struct dma_mapping, pin_list); + + /* + * This is not a bug, because a killed processed might + * not call the unpin ioctl, which is supposed to free + * the resources. + * + * Pinnings are dymically allocated and need to be + * deleted. + */ + list_del_init(&dma_map->pin_list); + genwqe_user_vunmap(cd, dma_map, NULL); + kfree(dma_map); + } +} + +/** + * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files + * + * E.g. genwqe_send_signal(cd, SIGIO); + */ +static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig) +{ + unsigned int files = 0; + unsigned long flags; + struct genwqe_file *cfile; + + spin_lock_irqsave(&cd->file_lock, flags); + list_for_each_entry(cfile, &cd->file_list, list) { + if (cfile->async_queue) + kill_fasync(&cfile->async_queue, sig, POLL_HUP); + files++; + } + spin_unlock_irqrestore(&cd->file_lock, flags); + return files; +} + +static int genwqe_force_sig(struct genwqe_dev *cd, int sig) +{ + unsigned int files = 0; + unsigned long flags; + struct genwqe_file *cfile; + + spin_lock_irqsave(&cd->file_lock, flags); + list_for_each_entry(cfile, &cd->file_list, list) { + force_sig(sig, cfile->owner); + files++; + } + spin_unlock_irqrestore(&cd->file_lock, flags); + return files; +} + +/** + * genwqe_open() - file open + * @inode: file system information + * @filp: file handle + * + * This function is executed whenever an application calls + * open("/dev/genwqe",..). + * + * Return: 0 if successful or <0 if errors + */ +static int genwqe_open(struct inode *inode, struct file *filp) +{ + struct genwqe_dev *cd; + struct genwqe_file *cfile; + struct pci_dev *pci_dev; + + cfile = kzalloc(sizeof(*cfile), GFP_KERNEL); + if (cfile == NULL) + return -ENOMEM; + + cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe); + pci_dev = cd->pci_dev; + cfile->cd = cd; + cfile->filp = filp; + cfile->client = NULL; + + spin_lock_init(&cfile->map_lock); /* list of raw memory allocations */ + INIT_LIST_HEAD(&cfile->map_list); + + spin_lock_init(&cfile->pin_lock); /* list of user pinned memory */ + INIT_LIST_HEAD(&cfile->pin_list); + + filp->private_data = cfile; + + genwqe_add_file(cd, cfile); + return 0; +} + +/** + * genwqe_fasync() - Setup process to receive SIGIO. + * @fd: file descriptor + * @filp: file handle + * @mode: file mode + * + * Sending a signal is working as following: + * + * if (cdev->async_queue) + * kill_fasync(&cdev->async_queue, SIGIO, POLL_IN); + * + * Some devices also implement asynchronous notification to indicate + * when the device can be written; in this case, of course, + * kill_fasync must be called with a mode of POLL_OUT. + */ +static int genwqe_fasync(int fd, struct file *filp, int mode) +{ + struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data; + + return fasync_helper(fd, filp, mode, &cdev->async_queue); +} + + +/** + * genwqe_release() - file close + * @inode: file system information + * @filp: file handle + * + * This function is executed whenever an application calls 'close(fd_genwqe)' + * + * Return: always 0 + */ +static int genwqe_release(struct inode *inode, struct file *filp) +{ + struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; + struct genwqe_dev *cd = cfile->cd; + + /* there must be no entries in these lists! */ + genwqe_remove_mappings(cfile); + genwqe_remove_pinnings(cfile); + + /* remove this filp from the asynchronously notified filp's */ + genwqe_fasync(-1, filp, 0); + + /* + * For this to work we must not release cd when this cfile is + * not yet released, otherwise the list entry is invalid, + * because the list itself gets reinstantiated! + */ + genwqe_del_file(cd, cfile); + kfree(cfile); + return 0; +} + +static void genwqe_vma_open(struct vm_area_struct *vma) +{ + /* nothing ... */ +} + +/** + * genwqe_vma_close() - Called each time when vma is unmapped + * + * Free memory which got allocated by GenWQE mmap(). + */ +static void genwqe_vma_close(struct vm_area_struct *vma) +{ + unsigned long vsize = vma->vm_end - vma->vm_start; + struct inode *inode = file_inode(vma->vm_file); + struct dma_mapping *dma_map; + struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev, + cdev_genwqe); + struct pci_dev *pci_dev = cd->pci_dev; + dma_addr_t d_addr = 0; + struct genwqe_file *cfile = vma->vm_private_data; + + dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize, + &d_addr, NULL); + if (dma_map == NULL) { + dev_err(&pci_dev->dev, + " [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n", + __func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + vsize); + return; + } + __genwqe_del_mapping(cfile, dma_map); + __genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr, + dma_map->dma_addr); + kfree(dma_map); +} + +static struct vm_operations_struct genwqe_vma_ops = { + .open = genwqe_vma_open, + .close = genwqe_vma_close, +}; + +/** + * genwqe_mmap() - Provide contignous buffers to userspace + * + * We use mmap() to allocate contignous buffers used for DMA + * transfers. After the buffer is allocated we remap it to user-space + * and remember a reference to our dma_mapping data structure, where + * we store the associated DMA address and allocated size. + * + * When we receive a DDCB execution request with the ATS bits set to + * plain buffer, we lookup our dma_mapping list to find the + * corresponding DMA address for the associated user-space address. + */ +static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int rc; + unsigned long pfn, vsize = vma->vm_end - vma->vm_start; + struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; + struct genwqe_dev *cd = cfile->cd; + struct dma_mapping *dma_map; + + if (vsize == 0) + return -EINVAL; + + if (get_order(vsize) > MAX_ORDER) + return -ENOMEM; + + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); + if (dma_map == NULL) + return -ENOMEM; + + genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW); + dma_map->u_vaddr = (void *)vma->vm_start; + dma_map->size = vsize; + dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE); + dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize, + &dma_map->dma_addr); + if (dma_map->k_vaddr == NULL) { + rc = -ENOMEM; + goto free_dma_map; + } + + if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t))) + *(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr; + + pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT; + rc = remap_pfn_range(vma, + vma->vm_start, + pfn, + vsize, + vma->vm_page_prot); + if (rc != 0) { + rc = -EFAULT; + goto free_dma_mem; + } + + vma->vm_private_data = cfile; + vma->vm_ops = &genwqe_vma_ops; + __genwqe_add_mapping(cfile, dma_map); + + return 0; + + free_dma_mem: + __genwqe_free_consistent(cd, dma_map->size, + dma_map->k_vaddr, + dma_map->dma_addr); + free_dma_map: + kfree(dma_map); + return rc; +} + +/** + * do_flash_update() - Excute flash update (write image or CVPD) + * @cd: genwqe device + * @load: details about image load + * + * Return: 0 if successful + */ + +#define FLASH_BLOCK 0x40000 /* we use 256k blocks */ + +static int do_flash_update(struct genwqe_file *cfile, + struct genwqe_bitstream *load) +{ + int rc = 0; + int blocks_to_flash; + dma_addr_t dma_addr; + u64 flash = 0; + size_t tocopy = 0; + u8 __user *buf; + u8 *xbuf; + u32 crc; + u8 cmdopts; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + struct pci_dev *pci_dev = cd->pci_dev; + + if ((load->size & 0x3) != 0) + return -EINVAL; + + if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) + return -EINVAL; + + /* FIXME Bits have changed for new service layer! */ + switch ((char)load->partition) { + case '0': + cmdopts = 0x14; + break; /* download/erase_first/part_0 */ + case '1': + cmdopts = 0x1C; + break; /* download/erase_first/part_1 */ + case 'v': + cmdopts = 0x0C; + break; /* download/erase_first/vpd */ + default: + return -EINVAL; + } + + buf = (u8 __user *)load->data_addr; + xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr); + if (xbuf == NULL) + return -ENOMEM; + + blocks_to_flash = load->size / FLASH_BLOCK; + while (load->size) { + struct genwqe_ddcb_cmd *req; + + /* + * We must be 4 byte aligned. Buffer must be 0 appened + * to have defined values when calculating CRC. + */ + tocopy = min_t(size_t, load->size, FLASH_BLOCK); + + rc = copy_from_user(xbuf, buf, tocopy); + if (rc) { + rc = -EFAULT; + goto free_buffer; + } + crc = genwqe_crc32(xbuf, tocopy, 0xffffffff); + + dev_dbg(&pci_dev->dev, + "[%s] DMA: %lx CRC: %08x SZ: %ld %d\n", + __func__, (unsigned long)dma_addr, crc, tocopy, + blocks_to_flash); + + /* prepare DDCB for SLU process */ + req = ddcb_requ_alloc(); + if (req == NULL) { + rc = -ENOMEM; + goto free_buffer; + } + + req->cmd = SLCMD_MOVE_FLASH; + req->cmdopts = cmdopts; + + /* prepare invariant values */ + if (genwqe_get_slu_id(cd) <= 0x2) { + *(__be64 *)&req->__asiv[0] = cpu_to_be64(dma_addr); + *(__be64 *)&req->__asiv[8] = cpu_to_be64(tocopy); + *(__be64 *)&req->__asiv[16] = cpu_to_be64(flash); + *(__be32 *)&req->__asiv[24] = cpu_to_be32(0); + req->__asiv[24] = load->uid; + *(__be32 *)&req->__asiv[28] = cpu_to_be32(crc); + + /* for simulation only */ + *(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id); + *(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id); + req->asiv_length = 32; /* bytes included in crc calc */ + } else { /* setup DDCB for ATS architecture */ + *(__be64 *)&req->asiv[0] = cpu_to_be64(dma_addr); + *(__be32 *)&req->asiv[8] = cpu_to_be32(tocopy); + *(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */ + *(__be64 *)&req->asiv[16] = cpu_to_be64(flash); + *(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24); + *(__be32 *)&req->asiv[28] = cpu_to_be32(crc); + + /* for simulation only */ + *(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id); + *(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id); + + /* Rd only */ + req->ats = 0x4ULL << 44; + req->asiv_length = 40; /* bytes included in crc calc */ + } + req->asv_length = 8; + + /* For Genwqe5 we get back the calculated CRC */ + *(u64 *)&req->asv[0] = 0ULL; /* 0x80 */ + + rc = __genwqe_execute_raw_ddcb(cd, req, filp->f_flags); + + load->retc = req->retc; + load->attn = req->attn; + load->progress = req->progress; + + if (rc < 0) { + ddcb_requ_free(req); + goto free_buffer; + } + + if (req->retc != DDCB_RETC_COMPLETE) { + rc = -EIO; + ddcb_requ_free(req); + goto free_buffer; + } + + load->size -= tocopy; + flash += tocopy; + buf += tocopy; + blocks_to_flash--; + ddcb_requ_free(req); + } + + free_buffer: + __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr); + return rc; +} + +static int do_flash_read(struct genwqe_file *cfile, + struct genwqe_bitstream *load) +{ + int rc, blocks_to_flash; + dma_addr_t dma_addr; + u64 flash = 0; + size_t tocopy = 0; + u8 __user *buf; + u8 *xbuf; + u8 cmdopts; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + struct pci_dev *pci_dev = cd->pci_dev; + struct genwqe_ddcb_cmd *cmd; + + if ((load->size & 0x3) != 0) + return -EINVAL; + + if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) + return -EINVAL; + + /* FIXME Bits have changed for new service layer! */ + switch ((char)load->partition) { + case '0': + cmdopts = 0x12; + break; /* upload/part_0 */ + case '1': + cmdopts = 0x1A; + break; /* upload/part_1 */ + case 'v': + cmdopts = 0x0A; + break; /* upload/vpd */ + default: + return -EINVAL; + } + + buf = (u8 __user *)load->data_addr; + xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr); + if (xbuf == NULL) + return -ENOMEM; + + blocks_to_flash = load->size / FLASH_BLOCK; + while (load->size) { + /* + * We must be 4 byte aligned. Buffer must be 0 appened + * to have defined values when calculating CRC. + */ + tocopy = min_t(size_t, load->size, FLASH_BLOCK); + + dev_dbg(&pci_dev->dev, + "[%s] DMA: %lx SZ: %ld %d\n", + __func__, (unsigned long)dma_addr, tocopy, + blocks_to_flash); + + /* prepare DDCB for SLU process */ + cmd = ddcb_requ_alloc(); + if (cmd == NULL) { + rc = -ENOMEM; + goto free_buffer; + } + cmd->cmd = SLCMD_MOVE_FLASH; + cmd->cmdopts = cmdopts; + + /* prepare invariant values */ + if (genwqe_get_slu_id(cd) <= 0x2) { + *(__be64 *)&cmd->__asiv[0] = cpu_to_be64(dma_addr); + *(__be64 *)&cmd->__asiv[8] = cpu_to_be64(tocopy); + *(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash); + *(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0); + cmd->__asiv[24] = load->uid; + *(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */; + cmd->asiv_length = 32; /* bytes included in crc calc */ + } else { /* setup DDCB for ATS architecture */ + *(__be64 *)&cmd->asiv[0] = cpu_to_be64(dma_addr); + *(__be32 *)&cmd->asiv[8] = cpu_to_be32(tocopy); + *(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */ + *(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash); + *(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24); + *(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */ + + /* rd/wr */ + cmd->ats = 0x5ULL << 44; + cmd->asiv_length = 40; /* bytes included in crc calc */ + } + cmd->asv_length = 8; + + /* we only get back the calculated CRC */ + *(u64 *)&cmd->asv[0] = 0ULL; /* 0x80 */ + + rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags); + + load->retc = cmd->retc; + load->attn = cmd->attn; + load->progress = cmd->progress; + + if ((rc < 0) && (rc != -EBADMSG)) { + ddcb_requ_free(cmd); + goto free_buffer; + } + + rc = copy_to_user(buf, xbuf, tocopy); + if (rc) { + rc = -EFAULT; + ddcb_requ_free(cmd); + goto free_buffer; + } + + /* We know that we can get retc 0x104 with CRC err */ + if (((cmd->retc == DDCB_RETC_FAULT) && + (cmd->attn != 0x02)) || /* Normally ignore CRC error */ + ((cmd->retc == DDCB_RETC_COMPLETE) && + (cmd->attn != 0x00))) { /* Everything was fine */ + rc = -EIO; + ddcb_requ_free(cmd); + goto free_buffer; + } + + load->size -= tocopy; + flash += tocopy; + buf += tocopy; + blocks_to_flash--; + ddcb_requ_free(cmd); + } + rc = 0; + + free_buffer: + __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr); + return rc; +} + +static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) +{ + int rc; + struct genwqe_dev *cd = cfile->cd; + struct pci_dev *pci_dev = cfile->cd->pci_dev; + struct dma_mapping *dma_map; + unsigned long map_addr; + unsigned long map_size; + + if ((m->addr == 0x0) || (m->size == 0)) + return -EINVAL; + + map_addr = (m->addr & PAGE_MASK); + map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); + + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); + if (dma_map == NULL) + return -ENOMEM; + + genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED); + rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL); + if (rc != 0) { + dev_err(&pci_dev->dev, + "[%s] genwqe_user_vmap rc=%d\n", __func__, rc); + kfree(dma_map); + return rc; + } + + genwqe_add_pin(cfile, dma_map); + return 0; +} + +static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) +{ + struct genwqe_dev *cd = cfile->cd; + struct dma_mapping *dma_map; + unsigned long map_addr; + unsigned long map_size; + + if (m->addr == 0x0) + return -EINVAL; + + map_addr = (m->addr & PAGE_MASK); + map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); + + dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL); + if (dma_map == NULL) + return -ENOENT; + + genwqe_del_pin(cfile, dma_map); + genwqe_user_vunmap(cd, dma_map, NULL); + kfree(dma_map); + return 0; +} + +/** + * ddcb_cmd_cleanup() - Remove dynamically created fixup entries + * + * Only if there are any. Pinnings are not removed. + */ +static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req) +{ + unsigned int i; + struct dma_mapping *dma_map; + struct genwqe_dev *cd = cfile->cd; + + for (i = 0; i < DDCB_FIXUPS; i++) { + dma_map = &req->dma_mappings[i]; + + if (dma_mapping_used(dma_map)) { + __genwqe_del_mapping(cfile, dma_map); + genwqe_user_vunmap(cd, dma_map, req); + } + if (req->sgls[i].sgl != NULL) + genwqe_free_sync_sgl(cd, &req->sgls[i]); + } + return 0; +} + +/** + * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references + * + * Before the DDCB gets executed we need to handle the fixups. We + * replace the user-space addresses with DMA addresses or do + * additional setup work e.g. generating a scatter-gather list which + * is used to describe the memory referred to in the fixup. + */ +static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req) +{ + int rc; + unsigned int asiv_offs, i; + struct genwqe_dev *cd = cfile->cd; + struct genwqe_ddcb_cmd *cmd = &req->cmd; + struct dma_mapping *m; + const char *type = "UNKNOWN"; + + for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58; + i++, asiv_offs += 0x08) { + + u64 u_addr; + dma_addr_t d_addr; + u32 u_size = 0; + u64 ats_flags; + + ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs); + + switch (ats_flags) { + + case ATS_TYPE_DATA: + break; /* nothing to do here */ + + case ATS_TYPE_FLAT_RDWR: + case ATS_TYPE_FLAT_RD: { + u_addr = be64_to_cpu(*((__be64 *)&cmd-> + asiv[asiv_offs])); + u_size = be32_to_cpu(*((__be32 *)&cmd-> + asiv[asiv_offs + 0x08])); + + /* + * No data available. Ignore u_addr in this + * case and set addr to 0. Hardware must not + * fetch the buffer. + */ + if (u_size == 0x0) { + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(0x0); + break; + } + + m = __genwqe_search_mapping(cfile, u_addr, u_size, + &d_addr, NULL); + if (m == NULL) { + rc = -EFAULT; + goto err_out; + } + + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(d_addr); + break; + } + + case ATS_TYPE_SGL_RDWR: + case ATS_TYPE_SGL_RD: { + int page_offs; + + u_addr = be64_to_cpu(*((__be64 *) + &cmd->asiv[asiv_offs])); + u_size = be32_to_cpu(*((__be32 *) + &cmd->asiv[asiv_offs + 0x08])); + + /* + * No data available. Ignore u_addr in this + * case and set addr to 0. Hardware must not + * fetch the empty sgl. + */ + if (u_size == 0x0) { + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(0x0); + break; + } + + m = genwqe_search_pin(cfile, u_addr, u_size, NULL); + if (m != NULL) { + type = "PINNING"; + page_offs = (u_addr - + (u64)m->u_vaddr)/PAGE_SIZE; + } else { + type = "MAPPING"; + m = &req->dma_mappings[i]; + + genwqe_mapping_init(m, + GENWQE_MAPPING_SGL_TEMP); + rc = genwqe_user_vmap(cd, m, (void *)u_addr, + u_size, req); + if (rc != 0) + goto err_out; + + __genwqe_add_mapping(cfile, m); + page_offs = 0; + } + + /* create genwqe style scatter gather list */ + rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i], + (void __user *)u_addr, + u_size); + if (rc != 0) + goto err_out; + + genwqe_setup_sgl(cd, &req->sgls[i], + &m->dma_list[page_offs]); + + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(req->sgls[i].sgl_dma_addr); + + break; + } + default: + rc = -EINVAL; + goto err_out; + } + } + return 0; + + err_out: + ddcb_cmd_cleanup(cfile, req); + return rc; +} + +/** + * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups + * + * The code will build up the translation tables or lookup the + * contignous memory allocation table to find the right translations + * and DMA addresses. + */ +static int genwqe_execute_ddcb(struct genwqe_file *cfile, + struct genwqe_ddcb_cmd *cmd) +{ + int rc; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); + + rc = ddcb_cmd_fixups(cfile, req); + if (rc != 0) + return rc; + + rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags); + ddcb_cmd_cleanup(cfile, req); + return rc; +} + +static int do_execute_ddcb(struct genwqe_file *cfile, + unsigned long arg, int raw) +{ + int rc; + struct genwqe_ddcb_cmd *cmd; + struct ddcb_requ *req; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + + cmd = ddcb_requ_alloc(); + if (cmd == NULL) + return -ENOMEM; + + req = container_of(cmd, struct ddcb_requ, cmd); + + if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) { + ddcb_requ_free(cmd); + return -EFAULT; + } + + if (!raw) + rc = genwqe_execute_ddcb(cfile, cmd); + else + rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags); + + /* Copy back only the modifed fields. Do not copy ASIV + back since the copy got modified by the driver. */ + if (copy_to_user((void __user *)arg, cmd, + sizeof(*cmd) - DDCB_ASIV_LENGTH)) { + ddcb_requ_free(cmd); + return -EFAULT; + } + + ddcb_requ_free(cmd); + return rc; +} + +/** + * genwqe_ioctl() - IO control + * @filp: file handle + * @cmd: command identifier (passed from user) + * @arg: argument (passed from user) + * + * Return: 0 success + */ +static long genwqe_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int rc = 0; + struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; + struct genwqe_dev *cd = cfile->cd; + struct pci_dev *pci_dev = cd->pci_dev; + struct genwqe_reg_io __user *io; + u64 val; + u32 reg_offs; + + /* Return -EIO if card hit EEH */ + if (pci_channel_offline(pci_dev)) + return -EIO; + + if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) + return -EINVAL; + + switch (cmd) { + + case GENWQE_GET_CARD_STATE: + put_user(cd->card_state, (enum genwqe_card_state __user *)arg); + return 0; + + /* Register access */ + case GENWQE_READ_REG64: { + io = (struct genwqe_reg_io __user *)arg; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7)) + return -EINVAL; + + val = __genwqe_readq(cd, reg_offs); + put_user(val, &io->val64); + return 0; + } + + case GENWQE_WRITE_REG64: { + io = (struct genwqe_reg_io __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) + return -EPERM; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7)) + return -EINVAL; + + if (get_user(val, &io->val64)) + return -EFAULT; + + __genwqe_writeq(cd, reg_offs, val); + return 0; + } + + case GENWQE_READ_REG32: { + io = (struct genwqe_reg_io __user *)arg; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3)) + return -EINVAL; + + val = __genwqe_readl(cd, reg_offs); + put_user(val, &io->val64); + return 0; + } + + case GENWQE_WRITE_REG32: { + io = (struct genwqe_reg_io __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) + return -EPERM; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3)) + return -EINVAL; + + if (get_user(val, &io->val64)) + return -EFAULT; + + __genwqe_writel(cd, reg_offs, val); + return 0; + } + + /* Flash update/reading */ + case GENWQE_SLU_UPDATE: { + struct genwqe_bitstream load; + + if (!genwqe_is_privileged(cd)) + return -EPERM; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) + return -EPERM; + + if (copy_from_user(&load, (void __user *)arg, + sizeof(load))) + return -EFAULT; + + rc = do_flash_update(cfile, &load); + + if (copy_to_user((void __user *)arg, &load, sizeof(load))) + return -EFAULT; + + return rc; + } + + case GENWQE_SLU_READ: { + struct genwqe_bitstream load; + + if (!genwqe_is_privileged(cd)) + return -EPERM; + + if (genwqe_flash_readback_fails(cd)) + return -ENOSPC; /* known to fail for old versions */ + + if (copy_from_user(&load, (void __user *)arg, sizeof(load))) + return -EFAULT; + + rc = do_flash_read(cfile, &load); + + if (copy_to_user((void __user *)arg, &load, sizeof(load))) + return -EFAULT; + + return rc; + } + + /* memory pinning and unpinning */ + case GENWQE_PIN_MEM: { + struct genwqe_mem m; + + if (copy_from_user(&m, (void __user *)arg, sizeof(m))) + return -EFAULT; + + return genwqe_pin_mem(cfile, &m); + } + + case GENWQE_UNPIN_MEM: { + struct genwqe_mem m; + + if (copy_from_user(&m, (void __user *)arg, sizeof(m))) + return -EFAULT; + + return genwqe_unpin_mem(cfile, &m); + } + + /* launch an DDCB and wait for completion */ + case GENWQE_EXECUTE_DDCB: + return do_execute_ddcb(cfile, arg, 0); + + case GENWQE_EXECUTE_RAW_DDCB: { + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return do_execute_ddcb(cfile, arg, 1); + } + + default: + return -EINVAL; + } + + return rc; +} + +#if defined(CONFIG_COMPAT) +/** + * genwqe_compat_ioctl() - Compatibility ioctl + * + * Called whenever a 32-bit process running under a 64-bit kernel + * performs an ioctl on /dev/genwqe<n>_card. + * + * @filp: file pointer. + * @cmd: command. + * @arg: user argument. + * Return: zero on success or negative number on failure. + */ +static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + return genwqe_ioctl(filp, cmd, arg); +} +#endif /* defined(CONFIG_COMPAT) */ + +static const struct file_operations genwqe_fops = { + .owner = THIS_MODULE, + .open = genwqe_open, + .fasync = genwqe_fasync, + .mmap = genwqe_mmap, + .unlocked_ioctl = genwqe_ioctl, +#if defined(CONFIG_COMPAT) + .compat_ioctl = genwqe_compat_ioctl, +#endif + .release = genwqe_release, +}; + +static int genwqe_device_initialized(struct genwqe_dev *cd) +{ + return cd->dev != NULL; +} + +/** + * genwqe_device_create() - Create and configure genwqe char device + * @cd: genwqe device descriptor + * + * This function must be called before we create any more genwqe + * character devices, because it is allocating the major and minor + * number which are supposed to be used by the client drivers. + */ +int genwqe_device_create(struct genwqe_dev *cd) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + /* + * Here starts the individual setup per client. It must + * initialize its own cdev data structure with its own fops. + * The appropriate devnum needs to be created. The ranges must + * not overlap. + */ + rc = alloc_chrdev_region(&cd->devnum_genwqe, 0, + GENWQE_MAX_MINOR, GENWQE_DEVNAME); + if (rc < 0) { + dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n"); + goto err_dev; + } + + cdev_init(&cd->cdev_genwqe, &genwqe_fops); + cd->cdev_genwqe.owner = THIS_MODULE; + + rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1); + if (rc < 0) { + dev_err(&pci_dev->dev, "err: cdev_add failed\n"); + goto err_add; + } + + /* + * Finally the device in /dev/... must be created. The rule is + * to use card%d_clientname for each created device. + */ + cd->dev = device_create_with_groups(cd->class_genwqe, + &cd->pci_dev->dev, + cd->devnum_genwqe, cd, + genwqe_attribute_groups, + GENWQE_DEVNAME "%u_card", + cd->card_idx); + if (IS_ERR(cd->dev)) { + rc = PTR_ERR(cd->dev); + goto err_cdev; + } + + rc = genwqe_init_debugfs(cd); + if (rc != 0) + goto err_debugfs; + + return 0; + + err_debugfs: + device_destroy(cd->class_genwqe, cd->devnum_genwqe); + err_cdev: + cdev_del(&cd->cdev_genwqe); + err_add: + unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR); + err_dev: + cd->dev = NULL; + return rc; +} + +static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) +{ + int rc; + unsigned int i; + struct pci_dev *pci_dev = cd->pci_dev; + + if (!genwqe_open_files(cd)) + return 0; + + dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__); + + rc = genwqe_kill_fasync(cd, SIGIO); + if (rc > 0) { + /* give kill_timeout seconds to close file descriptors ... */ + for (i = 0; (i < genwqe_kill_timeout) && + genwqe_open_files(cd); i++) { + dev_info(&pci_dev->dev, " %d sec ...", i); + + cond_resched(); + msleep(1000); + } + + /* if no open files we can safely continue, else ... */ + if (!genwqe_open_files(cd)) + return 0; + + dev_warn(&pci_dev->dev, + "[%s] send SIGKILL and wait ...\n", __func__); + + rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */ + if (rc) { + /* Give kill_timout more seconds to end processes */ + for (i = 0; (i < genwqe_kill_timeout) && + genwqe_open_files(cd); i++) { + dev_warn(&pci_dev->dev, " %d sec ...", i); + + cond_resched(); + msleep(1000); + } + } + } + return 0; +} + +/** + * genwqe_device_remove() - Remove genwqe's char device + * + * This function must be called after the client devices are removed + * because it will free the major/minor number range for the genwqe + * drivers. + * + * This function must be robust enough to be called twice. + */ +int genwqe_device_remove(struct genwqe_dev *cd) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + if (!genwqe_device_initialized(cd)) + return 1; + + genwqe_inform_and_stop_processes(cd); + + /* + * We currently do wait until all filedescriptors are + * closed. This leads to a problem when we abort the + * application which will decrease this reference from + * 1/unused to 0/illegal and not from 2/used 1/empty. + */ + rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount); + if (rc != 1) { + dev_err(&pci_dev->dev, + "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc); + panic("Fatal err: cannot free resources with pending references!"); + } + + genqwe_exit_debugfs(cd); + device_destroy(cd->class_genwqe, cd->devnum_genwqe); + cdev_del(&cd->cdev_genwqe); + unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR); + cd->dev = NULL; + + return 0; +} diff --git a/kernel/drivers/misc/genwqe/card_sysfs.c b/kernel/drivers/misc/genwqe/card_sysfs.c new file mode 100644 index 000000000..6ab31eff0 --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_sysfs.c @@ -0,0 +1,303 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Sysfs interfaces for the GenWQE card. There are attributes to query + * the version of the bitstream as well as some for the driver. For + * debugging, please also see the debugfs interfaces of this driver. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/sysfs.h> +#include <linux/ctype.h> +#include <linux/device.h> + +#include "card_base.h" +#include "card_ddcb.h" + +static const char * const genwqe_types[] = { + [GENWQE_TYPE_ALTERA_230] = "GenWQE4-230", + [GENWQE_TYPE_ALTERA_530] = "GenWQE4-530", + [GENWQE_TYPE_ALTERA_A4] = "GenWQE5-A4", + [GENWQE_TYPE_ALTERA_A7] = "GenWQE5-A7", +}; + +static ssize_t status_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct genwqe_dev *cd = dev_get_drvdata(dev); + const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" }; + + return sprintf(buf, "%s\n", cs[cd->card_state]); +} +static DEVICE_ATTR_RO(status); + +static ssize_t appid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + char app_name[5]; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + genwqe_read_app_id(cd, app_name, sizeof(app_name)); + return sprintf(buf, "%s\n", app_name); +} +static DEVICE_ATTR_RO(appid); + +static ssize_t version_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u64 slu_id, app_id; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG); + app_id = __genwqe_readq(cd, IO_APP_UNITCFG); + + return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id); +} +static DEVICE_ATTR_RO(version); + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u8 card_type; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + card_type = genwqe_card_type(cd); + return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ? + "invalid" : genwqe_types[card_type]); +} +static DEVICE_ATTR_RO(type); + +static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u64 tempsens; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR); + return sprintf(buf, "%016llx\n", tempsens); +} +static DEVICE_ATTR_RO(tempsens); + +static ssize_t freerunning_timer_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 t; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER); + return sprintf(buf, "%016llx\n", t); +} +static DEVICE_ATTR_RO(freerunning_timer); + +static ssize_t queue_working_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 t; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME); + return sprintf(buf, "%016llx\n", t); +} +static DEVICE_ATTR_RO(queue_working_time); + +static ssize_t base_clock_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 base_clock; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + base_clock = genwqe_base_clock_frequency(cd); + return sprintf(buf, "%lld\n", base_clock); +} +static DEVICE_ATTR_RO(base_clock); + +/** + * curr_bitstream_show() - Show the current bitstream id + * + * There is a bug in some old versions of the CPLD which selects the + * bitstream, which causes the IO_SLU_BITSTREAM register to report + * unreliable data in very rare cases. This makes this sysfs + * unreliable up to the point were a new CPLD version is being used. + * + * Unfortunately there is no automatic way yet to query the CPLD + * version, such that you need to manually ensure via programming + * tools that you have a recent version of the CPLD software. + * + * The proposed circumvention is to use a special recovery bitstream + * on the backup partition (0) to identify problems while loading the + * image. + */ +static ssize_t curr_bitstream_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int curr_bitstream; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; + return sprintf(buf, "%d\n", curr_bitstream); +} +static DEVICE_ATTR_RO(curr_bitstream); + +/** + * next_bitstream_show() - Show the next activated bitstream + * + * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF. + */ +static ssize_t next_bitstream_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int next_bitstream; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + switch ((cd->softreset & 0xc) >> 2) { + case 0x2: + next_bitstream = 0; + break; + case 0x3: + next_bitstream = 1; + break; + default: + next_bitstream = -1; + break; /* error */ + } + return sprintf(buf, "%d\n", next_bitstream); +} + +static ssize_t next_bitstream_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int partition; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + if (kstrtoint(buf, 0, &partition) < 0) + return -EINVAL; + + switch (partition) { + case 0x0: + cd->softreset = 0x78; + break; + case 0x1: + cd->softreset = 0x7c; + break; + default: + return -EINVAL; + } + + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); + return count; +} +static DEVICE_ATTR_RW(next_bitstream); + +static ssize_t reload_bitstream_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int reload; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + if (kstrtoint(buf, 0, &reload) < 0) + return -EINVAL; + + if (reload == 0x1) { + if (cd->card_state == GENWQE_CARD_UNUSED || + cd->card_state == GENWQE_CARD_USED) + cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM; + else + return -EIO; + } else { + return -EINVAL; + } + + return count; +} +static DEVICE_ATTR_WO(reload_bitstream); + +/* + * Create device_attribute structures / params: name, mode, show, store + * additional flag if valid in VF + */ +static struct attribute *genwqe_attributes[] = { + &dev_attr_tempsens.attr, + &dev_attr_next_bitstream.attr, + &dev_attr_curr_bitstream.attr, + &dev_attr_base_clock.attr, + &dev_attr_type.attr, + &dev_attr_version.attr, + &dev_attr_appid.attr, + &dev_attr_status.attr, + &dev_attr_freerunning_timer.attr, + &dev_attr_queue_working_time.attr, + &dev_attr_reload_bitstream.attr, + NULL, +}; + +static struct attribute *genwqe_normal_attributes[] = { + &dev_attr_type.attr, + &dev_attr_version.attr, + &dev_attr_appid.attr, + &dev_attr_status.attr, + &dev_attr_freerunning_timer.attr, + &dev_attr_queue_working_time.attr, + NULL, +}; + +/** + * genwqe_is_visible() - Determine if sysfs attribute should be visible or not + * + * VFs have restricted mmio capabilities, so not all sysfs entries + * are allowed in VFs. + */ +static umode_t genwqe_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + unsigned int j; + struct device *dev = container_of(kobj, struct device, kobj); + struct genwqe_dev *cd = dev_get_drvdata(dev); + umode_t mode = attr->mode; + + if (genwqe_is_privileged(cd)) + return mode; + + for (j = 0; genwqe_normal_attributes[j] != NULL; j++) + if (genwqe_normal_attributes[j] == attr) + return mode; + + return 0; +} + +static struct attribute_group genwqe_attribute_group = { + .is_visible = genwqe_is_visible, + .attrs = genwqe_attributes, +}; + +const struct attribute_group *genwqe_attribute_groups[] = { + &genwqe_attribute_group, + NULL, +}; diff --git a/kernel/drivers/misc/genwqe/card_utils.c b/kernel/drivers/misc/genwqe/card_utils.c new file mode 100644 index 000000000..1ca94e6fa --- /dev/null +++ b/kernel/drivers/misc/genwqe/card_utils.c @@ -0,0 +1,1049 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Miscelanous functionality used in the other GenWQE driver parts. + */ + +#include <linux/kernel.h> +#include <linux/dma-mapping.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> +#include <linux/page-flags.h> +#include <linux/scatterlist.h> +#include <linux/hugetlb.h> +#include <linux/iommu.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/delay.h> +#include <asm/pgtable.h> + +#include "genwqe_driver.h" +#include "card_base.h" +#include "card_ddcb.h" + +/** + * __genwqe_writeq() - Write 64-bit register + * @cd: genwqe device descriptor + * @byte_offs: byte offset within BAR + * @val: 64-bit value + * + * Return: 0 if success; < 0 if error + */ +int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return -EIO; + + if (cd->mmio == NULL) + return -EIO; + + if (pci_channel_offline(pci_dev)) + return -EIO; + + __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); + return 0; +} + +/** + * __genwqe_readq() - Read 64-bit register + * @cd: genwqe device descriptor + * @byte_offs: offset within BAR + * + * Return: value from register + */ +u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) +{ + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return 0xffffffffffffffffull; + + if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && + (byte_offs == IO_SLC_CFGREG_GFIR)) + return 0x000000000000ffffull; + + if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && + (byte_offs == IO_SLC_CFGREG_GFIR)) + return 0x00000000ffff0000ull; + + if (cd->mmio == NULL) + return 0xffffffffffffffffull; + + return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs)); +} + +/** + * __genwqe_writel() - Write 32-bit register + * @cd: genwqe device descriptor + * @byte_offs: byte offset within BAR + * @val: 32-bit value + * + * Return: 0 if success; < 0 if error + */ +int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return -EIO; + + if (cd->mmio == NULL) + return -EIO; + + if (pci_channel_offline(pci_dev)) + return -EIO; + + __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); + return 0; +} + +/** + * __genwqe_readl() - Read 32-bit register + * @cd: genwqe device descriptor + * @byte_offs: offset within BAR + * + * Return: Value from register + */ +u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) +{ + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return 0xffffffff; + + if (cd->mmio == NULL) + return 0xffffffff; + + return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs)); +} + +/** + * genwqe_read_app_id() - Extract app_id + * + * app_unitcfg need to be filled with valid data first + */ +int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) +{ + int i, j; + u32 app_id = (u32)cd->app_unitcfg; + + memset(app_name, 0, len); + for (i = 0, j = 0; j < min(len, 4); j++) { + char ch = (char)((app_id >> (24 - j*8)) & 0xff); + + if (ch == ' ') + continue; + app_name[i++] = isprint(ch) ? ch : 'X'; + } + return i; +} + +/** + * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations + * + * Existing kernel functions seem to use a different polynom, + * therefore we could not use them here. + * + * Genwqe's Polynomial = 0x20044009 + */ +#define CRC32_POLYNOMIAL 0x20044009 +static u32 crc32_tab[256]; /* crc32 lookup table */ + +void genwqe_init_crc32(void) +{ + int i, j; + u32 crc; + + for (i = 0; i < 256; i++) { + crc = i << 24; + for (j = 0; j < 8; j++) { + if (crc & 0x80000000) + crc = (crc << 1) ^ CRC32_POLYNOMIAL; + else + crc = (crc << 1); + } + crc32_tab[i] = crc; + } +} + +/** + * genwqe_crc32() - Generate 32-bit crc as required for DDCBs + * @buff: pointer to data buffer + * @len: length of data for calculation + * @init: initial crc (0xffffffff at start) + * + * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) + + * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should + * result in a crc32 of 0xf33cb7d3. + * + * The existing kernel crc functions did not cover this polynom yet. + * + * Return: crc32 checksum. + */ +u32 genwqe_crc32(u8 *buff, size_t len, u32 init) +{ + int i; + u32 crc; + + crc = init; + while (len--) { + i = ((crc >> 24) ^ *buff++) & 0xFF; + crc = (crc << 8) ^ crc32_tab[i]; + } + return crc; +} + +void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, + dma_addr_t *dma_handle) +{ + if (get_order(size) > MAX_ORDER) + return NULL; + + return pci_alloc_consistent(cd->pci_dev, size, dma_handle); +} + +void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + if (vaddr == NULL) + return; + + pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle); +} + +static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, + int num_pages) +{ + int i; + struct pci_dev *pci_dev = cd->pci_dev; + + for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { + pci_unmap_page(pci_dev, dma_list[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_list[i] = 0x0; + } +} + +static int genwqe_map_pages(struct genwqe_dev *cd, + struct page **page_list, int num_pages, + dma_addr_t *dma_list) +{ + int i; + struct pci_dev *pci_dev = cd->pci_dev; + + /* establish DMA mapping for requested pages */ + for (i = 0; i < num_pages; i++) { + dma_addr_t daddr; + + dma_list[i] = 0x0; + daddr = pci_map_page(pci_dev, page_list[i], + 0, /* map_offs */ + PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */ + + if (pci_dma_mapping_error(pci_dev, daddr)) { + dev_err(&pci_dev->dev, + "[%s] err: no dma addr daddr=%016llx!\n", + __func__, (long long)daddr); + goto err; + } + + dma_list[i] = daddr; + } + return 0; + + err: + genwqe_unmap_pages(cd, dma_list, num_pages); + return -EIO; +} + +static int genwqe_sgl_size(int num_pages) +{ + int len, num_tlb = num_pages / 7; + + len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); + return roundup(len, PAGE_SIZE); +} + +/** + * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages + * + * Allocates memory for sgl and overlapping pages. Pages which might + * overlap other user-space memory blocks are being cached for DMAs, + * such that we do not run into syncronization issues. Data is copied + * from user-space into the cached pages. + */ +int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + void __user *user_addr, size_t user_size) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + sgl->fpage_offs = offset_in_page((unsigned long)user_addr); + sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size); + sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE); + sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE; + + dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n", + __func__, user_addr, user_size, sgl->nr_pages, + sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size); + + sgl->user_addr = user_addr; + sgl->user_size = user_size; + sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); + + if (get_order(sgl->sgl_size) > MAX_ORDER) { + dev_err(&pci_dev->dev, + "[%s] err: too much memory requested!\n", __func__); + return -ENOMEM; + } + + sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, + &sgl->sgl_dma_addr); + if (sgl->sgl == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: no memory available!\n", __func__); + return -ENOMEM; + } + + /* Only use buffering on incomplete pages */ + if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) { + sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, + &sgl->fpage_dma_addr); + if (sgl->fpage == NULL) + goto err_out; + + /* Sync with user memory */ + if (copy_from_user(sgl->fpage + sgl->fpage_offs, + user_addr, sgl->fpage_size)) { + rc = -EFAULT; + goto err_out; + } + } + if (sgl->lpage_size != 0) { + sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, + &sgl->lpage_dma_addr); + if (sgl->lpage == NULL) + goto err_out1; + + /* Sync with user memory */ + if (copy_from_user(sgl->lpage, user_addr + user_size - + sgl->lpage_size, sgl->lpage_size)) { + rc = -EFAULT; + goto err_out1; + } + } + return 0; + + err_out1: + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, + sgl->fpage_dma_addr); + err_out: + __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, + sgl->sgl_dma_addr); + return -ENOMEM; +} + +int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + dma_addr_t *dma_list) +{ + int i = 0, j = 0, p; + unsigned long dma_offs, map_offs; + dma_addr_t prev_daddr = 0; + struct sg_entry *s, *last_s = NULL; + size_t size = sgl->user_size; + + dma_offs = 128; /* next block if needed/dma_offset */ + map_offs = sgl->fpage_offs; /* offset in first page */ + + s = &sgl->sgl[0]; /* first set of 8 entries */ + p = 0; /* page */ + while (p < sgl->nr_pages) { + dma_addr_t daddr; + unsigned int size_to_map; + + /* always write the chaining entry, cleanup is done later */ + j = 0; + s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs); + s[j].len = cpu_to_be32(128); + s[j].flags = cpu_to_be32(SG_CHAINED); + j++; + + while (j < 8) { + /* DMA mapping for requested page, offs, size */ + size_to_map = min(size, PAGE_SIZE - map_offs); + + if ((p == 0) && (sgl->fpage != NULL)) { + daddr = sgl->fpage_dma_addr + map_offs; + + } else if ((p == sgl->nr_pages - 1) && + (sgl->lpage != NULL)) { + daddr = sgl->lpage_dma_addr; + } else { + daddr = dma_list[p] + map_offs; + } + + size -= size_to_map; + map_offs = 0; + + if (prev_daddr == daddr) { + u32 prev_len = be32_to_cpu(last_s->len); + + /* pr_info("daddr combining: " + "%016llx/%08x -> %016llx\n", + prev_daddr, prev_len, daddr); */ + + last_s->len = cpu_to_be32(prev_len + + size_to_map); + + p++; /* process next page */ + if (p == sgl->nr_pages) + goto fixup; /* nothing to do */ + + prev_daddr = daddr + size_to_map; + continue; + } + + /* start new entry */ + s[j].target_addr = cpu_to_be64(daddr); + s[j].len = cpu_to_be32(size_to_map); + s[j].flags = cpu_to_be32(SG_DATA); + prev_daddr = daddr + size_to_map; + last_s = &s[j]; + j++; + + p++; /* process next page */ + if (p == sgl->nr_pages) + goto fixup; /* nothing to do */ + } + dma_offs += 128; + s += 8; /* continue 8 elements further */ + } + fixup: + if (j == 1) { /* combining happend on last entry! */ + s -= 8; /* full shift needed on previous sgl block */ + j = 7; /* shift all elements */ + } + + for (i = 0; i < j; i++) /* move elements 1 up */ + s[i] = s[i + 1]; + + s[i].target_addr = cpu_to_be64(0); + s[i].len = cpu_to_be32(0); + s[i].flags = cpu_to_be32(SG_END_LIST); + return 0; +} + +/** + * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages + * + * After the DMA transfer has been completed we free the memory for + * the sgl and the cached pages. Data is being transfered from cached + * pages into user-space buffers. + */ +int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) +{ + int rc = 0; + struct pci_dev *pci_dev = cd->pci_dev; + + if (sgl->fpage) { + if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs, + sgl->fpage_size)) { + dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n", + __func__); + rc = -EFAULT; + } + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, + sgl->fpage_dma_addr); + sgl->fpage = NULL; + sgl->fpage_dma_addr = 0; + } + if (sgl->lpage) { + if (copy_to_user(sgl->user_addr + sgl->user_size - + sgl->lpage_size, sgl->lpage, + sgl->lpage_size)) { + dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n", + __func__); + rc = -EFAULT; + } + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, + sgl->lpage_dma_addr); + sgl->lpage = NULL; + sgl->lpage_dma_addr = 0; + } + __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, + sgl->sgl_dma_addr); + + sgl->sgl = NULL; + sgl->sgl_dma_addr = 0x0; + sgl->sgl_size = 0; + return rc; +} + +/** + * free_user_pages() - Give pinned pages back + * + * Documentation of get_user_pages is in mm/memory.c: + * + * If the page is written to, set_page_dirty (or set_page_dirty_lock, + * as appropriate) must be called after the page is finished with, and + * before put_page is called. + * + * FIXME Could be of use to others and might belong in the generic + * code, if others agree. E.g. + * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c + * ceph_put_page_vector in net/ceph/pagevec.c + * maybe more? + */ +static int free_user_pages(struct page **page_list, unsigned int nr_pages, + int dirty) +{ + unsigned int i; + + for (i = 0; i < nr_pages; i++) { + if (page_list[i] != NULL) { + if (dirty) + set_page_dirty_lock(page_list[i]); + put_page(page_list[i]); + } + } + return 0; +} + +/** + * genwqe_user_vmap() - Map user-space memory to virtual kernel memory + * @cd: pointer to genwqe device + * @m: mapping params + * @uaddr: user virtual address + * @size: size of memory to be mapped + * + * We need to think about how we could speed this up. Of course it is + * not a good idea to do this over and over again, like we are + * currently doing it. Nevertheless, I am curious where on the path + * the performance is spend. Most probably within the memory + * allocation functions, but maybe also in the DMA mapping code. + * + * Restrictions: The maximum size of the possible mapping currently depends + * on the amount of memory we can get using kzalloc() for the + * page_list and pci_alloc_consistent for the sg_list. + * The sg_list is currently itself not scattered, which could + * be fixed with some effort. The page_list must be split into + * PAGE_SIZE chunks too. All that will make the complicated + * code more complicated. + * + * Return: 0 if success + */ +int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, + unsigned long size, struct ddcb_requ *req) +{ + int rc = -EINVAL; + unsigned long data, offs; + struct pci_dev *pci_dev = cd->pci_dev; + + if ((uaddr == NULL) || (size == 0)) { + m->size = 0; /* mark unused and not added */ + return -EINVAL; + } + m->u_vaddr = uaddr; + m->size = size; + + /* determine space needed for page_list. */ + data = (unsigned long)uaddr; + offs = offset_in_page(data); + m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); + + m->page_list = kcalloc(m->nr_pages, + sizeof(struct page *) + sizeof(dma_addr_t), + GFP_KERNEL); + if (!m->page_list) { + dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); + m->nr_pages = 0; + m->u_vaddr = NULL; + m->size = 0; /* mark unused and not added */ + return -ENOMEM; + } + m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); + + /* pin user pages in memory */ + rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ + m->nr_pages, + 1, /* write by caller */ + m->page_list); /* ptrs to pages */ + if (rc < 0) + goto fail_get_user_pages; + + /* assumption: get_user_pages can be killed by signals. */ + if (rc < m->nr_pages) { + free_user_pages(m->page_list, rc, 0); + rc = -EFAULT; + goto fail_get_user_pages; + } + + rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); + if (rc != 0) + goto fail_free_user_pages; + + return 0; + + fail_free_user_pages: + free_user_pages(m->page_list, m->nr_pages, 0); + + fail_get_user_pages: + kfree(m->page_list); + m->page_list = NULL; + m->dma_list = NULL; + m->nr_pages = 0; + m->u_vaddr = NULL; + m->size = 0; /* mark unused and not added */ + return rc; +} + +/** + * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel + * memory + * @cd: pointer to genwqe device + * @m: mapping params + */ +int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, + struct ddcb_requ *req) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (!dma_mapping_used(m)) { + dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", + __func__, m); + return -EINVAL; + } + + if (m->dma_list) + genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); + + if (m->page_list) { + free_user_pages(m->page_list, m->nr_pages, 1); + + kfree(m->page_list); + m->page_list = NULL; + m->dma_list = NULL; + m->nr_pages = 0; + } + + m->u_vaddr = NULL; + m->size = 0; /* mark as unused and not added */ + return 0; +} + +/** + * genwqe_card_type() - Get chip type SLU Configuration Register + * @cd: pointer to the genwqe device descriptor + * Return: 0: Altera Stratix-IV 230 + * 1: Altera Stratix-IV 530 + * 2: Altera Stratix-V A4 + * 3: Altera Stratix-V A7 + */ +u8 genwqe_card_type(struct genwqe_dev *cd) +{ + u64 card_type = cd->slu_unitcfg; + + return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); +} + +/** + * genwqe_card_reset() - Reset the card + * @cd: pointer to the genwqe device descriptor + */ +int genwqe_card_reset(struct genwqe_dev *cd) +{ + u64 softrst; + struct pci_dev *pci_dev = cd->pci_dev; + + if (!genwqe_is_privileged(cd)) + return -ENODEV; + + /* new SL */ + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); + msleep(1000); + __genwqe_readq(cd, IO_HSU_FIR_CLR); + __genwqe_readq(cd, IO_APP_FIR_CLR); + __genwqe_readq(cd, IO_SLU_FIR_CLR); + + /* + * Read-modify-write to preserve the stealth bits + * + * For SL >= 039, Stealth WE bit allows removing + * the read-modify-wrote. + * r-m-w may require a mask 0x3C to avoid hitting hard + * reset again for error reset (should be 0, chicken). + */ + softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); + + /* give ERRORRESET some time to finish */ + msleep(50); + + if (genwqe_need_err_masking(cd)) { + dev_info(&pci_dev->dev, + "[%s] masking errors for old bitstreams\n", __func__); + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); + } + return 0; +} + +int genwqe_read_softreset(struct genwqe_dev *cd) +{ + u64 bitstream; + + if (!genwqe_is_privileged(cd)) + return -ENODEV; + + bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; + cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; + return 0; +} + +/** + * genwqe_set_interrupt_capability() - Configure MSI capability structure + * @cd: pointer to the device + * Return: 0 if no error + */ +int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + rc = pci_enable_msi_range(pci_dev, 1, count); + if (rc < 0) + return rc; + + cd->flags |= GENWQE_FLAG_MSI_ENABLED; + return 0; +} + +/** + * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() + * @cd: pointer to the device + */ +void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->flags & GENWQE_FLAG_MSI_ENABLED) { + pci_disable_msi(pci_dev); + cd->flags &= ~GENWQE_FLAG_MSI_ENABLED; + } +} + +/** + * set_reg_idx() - Fill array with data. Ignore illegal offsets. + * @cd: card device + * @r: debug register array + * @i: index to desired entry + * @m: maximum possible entries + * @addr: addr which is read + * @index: index in debug array + * @val: read value + */ +static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, + unsigned int *i, unsigned int m, u32 addr, u32 idx, + u64 val) +{ + if (WARN_ON_ONCE(*i >= m)) + return -EFAULT; + + r[*i].addr = addr; + r[*i].idx = idx; + r[*i].val = val; + ++*i; + return 0; +} + +static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, + unsigned int *i, unsigned int m, u32 addr, u64 val) +{ + return set_reg_idx(cd, r, i, m, addr, 0, val); +} + +int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, + unsigned int max_regs, int all) +{ + unsigned int i, j, idx = 0; + u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; + u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; + + /* Global FIR */ + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); + + /* UnitCfg for SLU */ + sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ + set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); + + /* UnitCfg for APP */ + appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ + set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); + + /* Check all chip Units */ + for (i = 0; i < GENWQE_MAX_UNITS; i++) { + + /* Unit FIR */ + ufir_addr = (i << 24) | 0x008; + ufir = __genwqe_readq(cd, ufir_addr); + set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); + + /* Unit FEC */ + ufec_addr = (i << 24) | 0x018; + ufec = __genwqe_readq(cd, ufec_addr); + set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); + + for (j = 0; j < 64; j++) { + /* wherever there is a primary 1, read the 2ndary */ + if (!all && (!(ufir & (1ull << j)))) + continue; + + sfir_addr = (i << 24) | (0x100 + 8 * j); + sfir = __genwqe_readq(cd, sfir_addr); + set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); + + sfec_addr = (i << 24) | (0x300 + 8 * j); + sfec = __genwqe_readq(cd, sfec_addr); + set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); + } + } + + /* fill with invalid data until end */ + for (i = idx; i < max_regs; i++) { + regs[i].addr = 0xffffffff; + regs[i].val = 0xffffffffffffffffull; + } + return idx; +} + +/** + * genwqe_ffdc_buff_size() - Calculates the number of dump registers + */ +int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) +{ + int entries = 0, ring, traps, traces, trace_entries; + u32 eevptr_addr, l_addr, d_len, d_type; + u64 eevptr, val, addr; + + eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; + eevptr = __genwqe_readq(cd, eevptr_addr); + + if ((eevptr != 0x0) && (eevptr != -1ull)) { + l_addr = GENWQE_UID_OFFS(uid) | eevptr; + + while (1) { + val = __genwqe_readq(cd, l_addr); + + if ((val == 0x0) || (val == -1ull)) + break; + + /* 38:24 */ + d_len = (val & 0x0000007fff000000ull) >> 24; + + /* 39 */ + d_type = (val & 0x0000008000000000ull) >> 36; + + if (d_type) { /* repeat */ + entries += d_len; + } else { /* size in bytes! */ + entries += d_len >> 3; + } + + l_addr += 8; + } + } + + for (ring = 0; ring < 8; ring++) { + addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); + val = __genwqe_readq(cd, addr); + + if ((val == 0x0ull) || (val == -1ull)) + continue; + + traps = (val >> 24) & 0xff; + traces = (val >> 16) & 0xff; + trace_entries = val & 0xffff; + + entries += traps + (traces * trace_entries); + } + return entries; +} + +/** + * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure + */ +int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, + struct genwqe_reg *regs, unsigned int max_regs) +{ + int i, traps, traces, trace, trace_entries, trace_entry, ring; + unsigned int idx = 0; + u32 eevptr_addr, l_addr, d_addr, d_len, d_type; + u64 eevptr, e, val, addr; + + eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; + eevptr = __genwqe_readq(cd, eevptr_addr); + + if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { + l_addr = GENWQE_UID_OFFS(uid) | eevptr; + while (1) { + e = __genwqe_readq(cd, l_addr); + if ((e == 0x0) || (e == 0xffffffffffffffffull)) + break; + + d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ + d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ + d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ + d_addr |= GENWQE_UID_OFFS(uid); + + if (d_type) { + for (i = 0; i < (int)d_len; i++) { + val = __genwqe_readq(cd, d_addr); + set_reg_idx(cd, regs, &idx, max_regs, + d_addr, i, val); + } + } else { + d_len >>= 3; /* Size in bytes! */ + for (i = 0; i < (int)d_len; i++, d_addr += 8) { + val = __genwqe_readq(cd, d_addr); + set_reg_idx(cd, regs, &idx, max_regs, + d_addr, 0, val); + } + } + l_addr += 8; + } + } + + /* + * To save time, there are only 6 traces poplulated on Uid=2, + * Ring=1. each with iters=512. + */ + for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, + 2...7 are ASI rings */ + addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); + val = __genwqe_readq(cd, addr); + + if ((val == 0x0ull) || (val == -1ull)) + continue; + + traps = (val >> 24) & 0xff; /* Number of Traps */ + traces = (val >> 16) & 0xff; /* Number of Traces */ + trace_entries = val & 0xffff; /* Entries per trace */ + + /* Note: This is a combined loop that dumps both the traps */ + /* (for the trace == 0 case) as well as the traces 1 to */ + /* 'traces'. */ + for (trace = 0; trace <= traces; trace++) { + u32 diag_sel = + GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); + + addr = (GENWQE_UID_OFFS(uid) | + IO_EXTENDED_DIAG_SELECTOR); + __genwqe_writeq(cd, addr, diag_sel); + + for (trace_entry = 0; + trace_entry < (trace ? trace_entries : traps); + trace_entry++) { + addr = (GENWQE_UID_OFFS(uid) | + IO_EXTENDED_DIAG_READ_MBX); + val = __genwqe_readq(cd, addr); + set_reg_idx(cd, regs, &idx, max_regs, addr, + (diag_sel<<16) | trace_entry, val); + } + } + } + return 0; +} + +/** + * genwqe_write_vreg() - Write register in virtual window + * + * Note, these registers are only accessible to the PF through the + * VF-window. It is not intended for the VF to access. + */ +int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) +{ + __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); + __genwqe_writeq(cd, reg, val); + return 0; +} + +/** + * genwqe_read_vreg() - Read register in virtual window + * + * Note, these registers are only accessible to the PF through the + * VF-window. It is not intended for the VF to access. + */ +u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) +{ + __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); + return __genwqe_readq(cd, reg); +} + +/** + * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card + * + * Note: From a design perspective it turned out to be a bad idea to + * use codes here to specifiy the frequency/speed values. An old + * driver cannot understand new codes and is therefore always a + * problem. Better is to measure out the value or put the + * speed/frequency directly into a register which is always a valid + * value for old as well as for new software. + * + * Return: Card clock in MHz + */ +int genwqe_base_clock_frequency(struct genwqe_dev *cd) +{ + u16 speed; /* MHz MHz MHz MHz */ + static const int speed_grade[] = { 250, 200, 166, 175 }; + + speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); + if (speed >= ARRAY_SIZE(speed_grade)) + return 0; /* illegal value */ + + return speed_grade[speed]; +} + +/** + * genwqe_stop_traps() - Stop traps + * + * Before reading out the analysis data, we need to stop the traps. + */ +void genwqe_stop_traps(struct genwqe_dev *cd) +{ + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); +} + +/** + * genwqe_start_traps() - Start traps + * + * After having read the data, we can/must enable the traps again. + */ +void genwqe_start_traps(struct genwqe_dev *cd) +{ + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); + + if (genwqe_need_err_masking(cd)) + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); +} diff --git a/kernel/drivers/misc/genwqe/genwqe_driver.h b/kernel/drivers/misc/genwqe/genwqe_driver.h new file mode 100644 index 000000000..15355350e --- /dev/null +++ b/kernel/drivers/misc/genwqe/genwqe_driver.h @@ -0,0 +1,77 @@ +#ifndef __GENWQE_DRIVER_H__ +#define __GENWQE_DRIVER_H__ + +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> + * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> + * Author: Michael Jung <mijung@gmx.net> + * Author: Michael Ruettger <michael@ibmra.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/cdev.h> +#include <linux/list.h> +#include <linux/kthread.h> +#include <linux/scatterlist.h> +#include <linux/iommu.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/platform_device.h> +#include <linux/printk.h> + +#include <asm/byteorder.h> +#include <linux/genwqe/genwqe_card.h> + +#define DRV_VERSION "2.0.25" + +/* + * Static minor number assignement, until we decide/implement + * something dynamic. + */ +#define GENWQE_MAX_MINOR 128 /* up to 128 possible genwqe devices */ + +/** + * genwqe_requ_alloc() - Allocate a new DDCB execution request + * + * This data structure contains the user visiable fields of the DDCB + * to be executed. + * + * Return: ptr to genwqe_ddcb_cmd data structure + */ +struct genwqe_ddcb_cmd *ddcb_requ_alloc(void); + +/** + * ddcb_requ_free() - Free DDCB execution request. + * @req: ptr to genwqe_ddcb_cmd data structure. + */ +void ddcb_requ_free(struct genwqe_ddcb_cmd *req); + +u32 genwqe_crc32(u8 *buff, size_t len, u32 init); + +static inline void genwqe_hexdump(struct pci_dev *pci_dev, + const void *buff, unsigned int size) +{ + char prefix[32]; + + scnprintf(prefix, sizeof(prefix), "%s %s: ", + GENWQE_DEVNAME, pci_name(pci_dev)); + + print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff, + size, true); +} + +#endif /* __GENWQE_DRIVER_H__ */ diff --git a/kernel/drivers/misc/hmc6352.c b/kernel/drivers/misc/hmc6352.c new file mode 100644 index 000000000..90520d766 --- /dev/null +++ b/kernel/drivers/misc/hmc6352.c @@ -0,0 +1,155 @@ +/* + * hmc6352.c - Honeywell Compass Driver + * + * Copyright (C) 2009 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/sysfs.h> + +static DEFINE_MUTEX(compass_mutex); + +static int compass_command(struct i2c_client *c, u8 cmd) +{ + int ret = i2c_master_send(c, &cmd, 1); + if (ret < 0) + dev_warn(&c->dev, "command '%c' failed.\n", cmd); + return ret; +} + +static int compass_store(struct device *dev, const char *buf, size_t count, + const char *map) +{ + struct i2c_client *c = to_i2c_client(dev); + int ret; + unsigned long val; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + if (val >= strlen(map)) + return -EINVAL; + mutex_lock(&compass_mutex); + ret = compass_command(c, map[val]); + mutex_unlock(&compass_mutex); + if (ret < 0) + return ret; + return count; +} + +static ssize_t compass_calibration_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return compass_store(dev, buf, count, "EC"); +} + +static ssize_t compass_power_mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return compass_store(dev, buf, count, "SW"); +} + +static ssize_t compass_heading_data_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned char i2c_data[2]; + int ret; + + mutex_lock(&compass_mutex); + ret = compass_command(client, 'A'); + if (ret != 1) { + mutex_unlock(&compass_mutex); + return ret; + } + msleep(10); /* sending 'A' cmd we need to wait for 7-10 millisecs */ + ret = i2c_master_recv(client, i2c_data, 2); + mutex_unlock(&compass_mutex); + if (ret < 0) { + dev_warn(dev, "i2c read data cmd failed\n"); + return ret; + } + ret = (i2c_data[0] << 8) | i2c_data[1]; + return sprintf(buf, "%d.%d\n", ret/10, ret%10); +} + + +static DEVICE_ATTR(heading0_input, S_IRUGO, compass_heading_data_show, NULL); +static DEVICE_ATTR(calibration, S_IWUSR, NULL, compass_calibration_store); +static DEVICE_ATTR(power_state, S_IWUSR, NULL, compass_power_mode_store); + +static struct attribute *mid_att_compass[] = { + &dev_attr_heading0_input.attr, + &dev_attr_calibration.attr, + &dev_attr_power_state.attr, + NULL +}; + +static const struct attribute_group m_compass_gr = { + .name = "hmc6352", + .attrs = mid_att_compass +}; + +static int hmc6352_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int res; + + res = sysfs_create_group(&client->dev.kobj, &m_compass_gr); + if (res) { + dev_err(&client->dev, "device_create_file failed\n"); + return res; + } + dev_info(&client->dev, "%s HMC6352 compass chip found\n", + client->name); + return 0; +} + +static int hmc6352_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &m_compass_gr); + return 0; +} + +static struct i2c_device_id hmc6352_id[] = { + { "hmc6352", 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, hmc6352_id); + +static struct i2c_driver hmc6352_driver = { + .driver = { + .name = "hmc6352", + }, + .probe = hmc6352_probe, + .remove = hmc6352_remove, + .id_table = hmc6352_id, +}; + +module_i2c_driver(hmc6352_driver); + +MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com"); +MODULE_DESCRIPTION("hmc6352 Compass Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/hpilo.c b/kernel/drivers/misc/hpilo.c new file mode 100644 index 000000000..b83e3ca12 --- /dev/null +++ b/kernel/drivers/misc/hpilo.c @@ -0,0 +1,912 @@ +/* + * Driver for the HP iLO management processor. + * + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * David Altobelli <david.altobelli@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/cdev.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/io.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include "hpilo.h" + +static struct class *ilo_class; +static unsigned int ilo_major; +static unsigned int max_ccb = 16; +static char ilo_hwdev[MAX_ILO_DEV]; + +static inline int get_entry_id(int entry) +{ + return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR; +} + +static inline int get_entry_len(int entry) +{ + return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3; +} + +static inline int mk_entry(int id, int len) +{ + int qlen = len & 7 ? (len >> 3) + 1 : len >> 3; + return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS; +} + +static inline int desc_mem_sz(int nr_entry) +{ + return nr_entry << L2_QENTRY_SZ; +} + +/* + * FIFO queues, shared with hardware. + * + * If a queue has empty slots, an entry is added to the queue tail, + * and that entry is marked as occupied. + * Entries can be dequeued from the head of the list, when the device + * has marked the entry as consumed. + * + * Returns true on successful queue/dequeue, false on failure. + */ +static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&hw->fifo_lock, flags); + if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask] + & ENTRY_MASK_O)) { + fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |= + (entry & ENTRY_MASK_NOSTATE) | fifo_q->merge; + fifo_q->tail += 1; + ret = 1; + } + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + u64 c; + + spin_lock_irqsave(&hw->fifo_lock, flags); + c = fifo_q->fifobar[fifo_q->head & fifo_q->imask]; + if (c & ENTRY_MASK_C) { + if (entry) + *entry = c & ENTRY_MASK_NOSTATE; + + fifo_q->fifobar[fifo_q->head & fifo_q->imask] = + (c | ENTRY_MASK) + 1; + fifo_q->head += 1; + ret = 1; + } + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + u64 c; + + spin_lock_irqsave(&hw->fifo_lock, flags); + c = fifo_q->fifobar[fifo_q->head & fifo_q->imask]; + if (c & ENTRY_MASK_C) + ret = 1; + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb, + int dir, int id, int len) +{ + char *fifobar; + int entry; + + if (dir == SENDQ) + fifobar = ccb->ccb_u1.send_fifobar; + else + fifobar = ccb->ccb_u3.recv_fifobar; + + entry = mk_entry(id, len); + return fifo_enqueue(hw, fifobar, entry); +} + +static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb, + int dir, int *id, int *len, void **pkt) +{ + char *fifobar, *desc; + int entry = 0, pkt_id = 0; + int ret; + + if (dir == SENDQ) { + fifobar = ccb->ccb_u1.send_fifobar; + desc = ccb->ccb_u2.send_desc; + } else { + fifobar = ccb->ccb_u3.recv_fifobar; + desc = ccb->ccb_u4.recv_desc; + } + + ret = fifo_dequeue(hw, fifobar, &entry); + if (ret) { + pkt_id = get_entry_id(entry); + if (id) + *id = pkt_id; + if (len) + *len = get_entry_len(entry); + if (pkt) + *pkt = (void *)(desc + desc_mem_sz(pkt_id)); + } + + return ret; +} + +static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb) +{ + char *fifobar = ccb->ccb_u3.recv_fifobar; + + return fifo_check_recv(hw, fifobar); +} + +static inline void doorbell_set(struct ccb *ccb) +{ + iowrite8(1, ccb->ccb_u5.db_base); +} + +static inline void doorbell_clr(struct ccb *ccb) +{ + iowrite8(2, ccb->ccb_u5.db_base); +} + +static inline int ctrl_set(int l2sz, int idxmask, int desclim) +{ + int active = 0, go = 1; + return l2sz << CTRL_BITPOS_L2SZ | + idxmask << CTRL_BITPOS_FIFOINDEXMASK | + desclim << CTRL_BITPOS_DESCLIMIT | + active << CTRL_BITPOS_A | + go << CTRL_BITPOS_G; +} + +static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz) +{ + /* for simplicity, use the same parameters for send and recv ctrls */ + ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1); + ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1); +} + +static inline int fifo_sz(int nr_entry) +{ + /* size of a fifo is determined by the number of entries it contains */ + return (nr_entry * sizeof(u64)) + FIFOHANDLESIZE; +} + +static void fifo_setup(void *base_addr, int nr_entry) +{ + struct fifo *fifo_q = base_addr; + int i; + + /* set up an empty fifo */ + fifo_q->head = 0; + fifo_q->tail = 0; + fifo_q->reset = 0; + fifo_q->nrents = nr_entry; + fifo_q->imask = nr_entry - 1; + fifo_q->merge = ENTRY_MASK_O; + + for (i = 0; i < nr_entry; i++) + fifo_q->fifobar[i] = 0; +} + +static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data) +{ + struct ccb *driver_ccb = &data->driver_ccb; + struct ccb __iomem *device_ccb = data->mapped_ccb; + int retries; + + /* complicated dance to tell the hw we are stopping */ + doorbell_clr(driver_ccb); + iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G), + &device_ccb->send_ctrl); + iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G), + &device_ccb->recv_ctrl); + + /* give iLO some time to process stop request */ + for (retries = MAX_WAIT; retries > 0; retries--) { + doorbell_set(driver_ccb); + udelay(WAIT_TIME); + if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A)) + && + !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A))) + break; + } + if (retries == 0) + dev_err(&pdev->dev, "Closing, but controller still active\n"); + + /* clear the hw ccb */ + memset_io(device_ccb, 0, sizeof(struct ccb)); + + /* free resources used to back send/recv queues */ + pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa); +} + +static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) +{ + char *dma_va; + dma_addr_t dma_pa; + struct ccb *driver_ccb, *ilo_ccb; + + driver_ccb = &data->driver_ccb; + ilo_ccb = &data->ilo_ccb; + + data->dma_size = 2 * fifo_sz(NR_QENTRY) + + 2 * desc_mem_sz(NR_QENTRY) + + ILO_START_ALIGN + ILO_CACHE_SZ; + + data->dma_va = pci_alloc_consistent(hw->ilo_dev, data->dma_size, + &data->dma_pa); + if (!data->dma_va) + return -ENOMEM; + + dma_va = (char *)data->dma_va; + dma_pa = data->dma_pa; + + memset(dma_va, 0, data->dma_size); + + dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN); + dma_pa = roundup(dma_pa, ILO_START_ALIGN); + + /* + * Create two ccb's, one with virt addrs, one with phys addrs. + * Copy the phys addr ccb to device shared mem. + */ + ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ); + ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ); + + fifo_setup(dma_va, NR_QENTRY); + driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE; + ilo_ccb->ccb_u1.send_fifobar_pa = dma_pa + FIFOHANDLESIZE; + dma_va += fifo_sz(NR_QENTRY); + dma_pa += fifo_sz(NR_QENTRY); + + dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ); + dma_pa = roundup(dma_pa, ILO_CACHE_SZ); + + fifo_setup(dma_va, NR_QENTRY); + driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE; + ilo_ccb->ccb_u3.recv_fifobar_pa = dma_pa + FIFOHANDLESIZE; + dma_va += fifo_sz(NR_QENTRY); + dma_pa += fifo_sz(NR_QENTRY); + + driver_ccb->ccb_u2.send_desc = dma_va; + ilo_ccb->ccb_u2.send_desc_pa = dma_pa; + dma_pa += desc_mem_sz(NR_QENTRY); + dma_va += desc_mem_sz(NR_QENTRY); + + driver_ccb->ccb_u4.recv_desc = dma_va; + ilo_ccb->ccb_u4.recv_desc_pa = dma_pa; + + driver_ccb->channel = slot; + ilo_ccb->channel = slot; + + driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE); + ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */ + + return 0; +} + +static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) +{ + int pkt_id, pkt_sz; + struct ccb *driver_ccb = &data->driver_ccb; + + /* copy the ccb with physical addrs to device memory */ + data->mapped_ccb = (struct ccb __iomem *) + (hw->ram_vaddr + (slot * ILOHW_CCB_SZ)); + memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb)); + + /* put packets on the send and receive queues */ + pkt_sz = 0; + for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) { + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz); + doorbell_set(driver_ccb); + } + + pkt_sz = desc_mem_sz(1); + for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) + ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz); + + /* the ccb is ready to use */ + doorbell_clr(driver_ccb); +} + +static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data) +{ + int pkt_id, i; + struct ccb *driver_ccb = &data->driver_ccb; + + /* make sure iLO is really handling requests */ + for (i = MAX_WAIT; i > 0; i--) { + if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL)) + break; + udelay(WAIT_TIME); + } + + if (i == 0) { + dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n"); + return -EBUSY; + } + + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0); + doorbell_set(driver_ccb); + return 0; +} + +static inline int is_channel_reset(struct ccb *ccb) +{ + /* check for this particular channel needing a reset */ + return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset; +} + +static inline void set_channel_reset(struct ccb *ccb) +{ + /* set a flag indicating this channel needs a reset */ + FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1; +} + +static inline int get_device_outbound(struct ilo_hwinfo *hw) +{ + return ioread32(&hw->mmio_vaddr[DB_OUT]); +} + +static inline int is_db_reset(int db_out) +{ + return db_out & (1 << DB_RESET); +} + +static inline int is_device_reset(struct ilo_hwinfo *hw) +{ + /* check for global reset condition */ + return is_db_reset(get_device_outbound(hw)); +} + +static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr) +{ + iowrite32(clr, &hw->mmio_vaddr[DB_OUT]); +} + +static inline void clear_device(struct ilo_hwinfo *hw) +{ + /* clear the device (reset bits, pending channel entries) */ + clear_pending_db(hw, -1); +} + +static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw) +{ + iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]); +} + +static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw) +{ + iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1, + &hw->mmio_vaddr[DB_IRQ]); +} + +static void ilo_set_reset(struct ilo_hwinfo *hw) +{ + int slot; + + /* + * Mapped memory is zeroed on ilo reset, so set a per ccb flag + * to indicate that this ccb needs to be closed and reopened. + */ + for (slot = 0; slot < max_ccb; slot++) { + if (!hw->ccb_alloc[slot]) + continue; + set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb); + } +} + +static ssize_t ilo_read(struct file *fp, char __user *buf, + size_t len, loff_t *off) +{ + int err, found, cnt, pkt_id, pkt_len; + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + struct ilo_hwinfo *hw = data->ilo_hw; + void *pkt; + + if (is_channel_reset(driver_ccb)) { + /* + * If the device has been reset, applications + * need to close and reopen all ccbs. + */ + return -ENODEV; + } + + /* + * This function is to be called when data is expected + * in the channel, and will return an error if no packet is found + * during the loop below. The sleep/retry logic is to allow + * applications to call read() immediately post write(), + * and give iLO some time to process the sent packet. + */ + cnt = 20; + do { + /* look for a received packet */ + found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id, + &pkt_len, &pkt); + if (found) + break; + cnt--; + msleep(100); + } while (!found && cnt); + + if (!found) + return -EAGAIN; + + /* only copy the length of the received packet */ + if (pkt_len < len) + len = pkt_len; + + err = copy_to_user(buf, pkt, len); + + /* return the received packet to the queue */ + ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1)); + + return err ? -EFAULT : len; +} + +static ssize_t ilo_write(struct file *fp, const char __user *buf, + size_t len, loff_t *off) +{ + int err, pkt_id, pkt_len; + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + struct ilo_hwinfo *hw = data->ilo_hw; + void *pkt; + + if (is_channel_reset(driver_ccb)) + return -ENODEV; + + /* get a packet to send the user command */ + if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt)) + return -EBUSY; + + /* limit the length to the length of the packet */ + if (pkt_len < len) + len = pkt_len; + + /* on failure, set the len to 0 to return empty packet to the device */ + err = copy_from_user(pkt, buf, len); + if (err) + len = 0; + + /* send the packet */ + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len); + doorbell_set(driver_ccb); + + return err ? -EFAULT : len; +} + +static unsigned int ilo_poll(struct file *fp, poll_table *wait) +{ + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + + poll_wait(fp, &data->ccb_waitq, wait); + + if (is_channel_reset(driver_ccb)) + return POLLERR; + else if (ilo_pkt_recv(data->ilo_hw, driver_ccb)) + return POLLIN | POLLRDNORM; + + return 0; +} + +static int ilo_close(struct inode *ip, struct file *fp) +{ + int slot; + struct ccb_data *data; + struct ilo_hwinfo *hw; + unsigned long flags; + + slot = iminor(ip) % max_ccb; + hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev); + + spin_lock(&hw->open_lock); + + if (hw->ccb_alloc[slot]->ccb_cnt == 1) { + + data = fp->private_data; + + spin_lock_irqsave(&hw->alloc_lock, flags); + hw->ccb_alloc[slot] = NULL; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + ilo_ccb_close(hw->ilo_dev, data); + + kfree(data); + } else + hw->ccb_alloc[slot]->ccb_cnt--; + + spin_unlock(&hw->open_lock); + + return 0; +} + +static int ilo_open(struct inode *ip, struct file *fp) +{ + int slot, error; + struct ccb_data *data; + struct ilo_hwinfo *hw; + unsigned long flags; + + slot = iminor(ip) % max_ccb; + hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev); + + /* new ccb allocation */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock(&hw->open_lock); + + /* each fd private_data holds sw/hw view of ccb */ + if (hw->ccb_alloc[slot] == NULL) { + /* create a channel control block for this minor */ + error = ilo_ccb_setup(hw, data, slot); + if (error) { + kfree(data); + goto out; + } + + data->ccb_cnt = 1; + data->ccb_excl = fp->f_flags & O_EXCL; + data->ilo_hw = hw; + init_waitqueue_head(&data->ccb_waitq); + + /* write the ccb to hw */ + spin_lock_irqsave(&hw->alloc_lock, flags); + ilo_ccb_open(hw, data, slot); + hw->ccb_alloc[slot] = data; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + /* make sure the channel is functional */ + error = ilo_ccb_verify(hw, data); + if (error) { + + spin_lock_irqsave(&hw->alloc_lock, flags); + hw->ccb_alloc[slot] = NULL; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + ilo_ccb_close(hw->ilo_dev, data); + + kfree(data); + goto out; + } + + } else { + kfree(data); + if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) { + /* + * The channel exists, and either this open + * or a previous open of this channel wants + * exclusive access. + */ + error = -EBUSY; + } else { + hw->ccb_alloc[slot]->ccb_cnt++; + error = 0; + } + } +out: + spin_unlock(&hw->open_lock); + + if (!error) + fp->private_data = hw->ccb_alloc[slot]; + + return error; +} + +static const struct file_operations ilo_fops = { + .owner = THIS_MODULE, + .read = ilo_read, + .write = ilo_write, + .poll = ilo_poll, + .open = ilo_open, + .release = ilo_close, + .llseek = noop_llseek, +}; + +static irqreturn_t ilo_isr(int irq, void *data) +{ + struct ilo_hwinfo *hw = data; + int pending, i; + + spin_lock(&hw->alloc_lock); + + /* check for ccbs which have data */ + pending = get_device_outbound(hw); + if (!pending) { + spin_unlock(&hw->alloc_lock); + return IRQ_NONE; + } + + if (is_db_reset(pending)) { + /* wake up all ccbs if the device was reset */ + pending = -1; + ilo_set_reset(hw); + } + + for (i = 0; i < max_ccb; i++) { + if (!hw->ccb_alloc[i]) + continue; + if (pending & (1 << i)) + wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq); + } + + /* clear the device of the channels that have been handled */ + clear_pending_db(hw, pending); + + spin_unlock(&hw->alloc_lock); + + return IRQ_HANDLED; +} + +static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw) +{ + pci_iounmap(pdev, hw->db_vaddr); + pci_iounmap(pdev, hw->ram_vaddr); + pci_iounmap(pdev, hw->mmio_vaddr); +} + +static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw) +{ + int error = -ENOMEM; + + /* map the memory mapped i/o registers */ + hw->mmio_vaddr = pci_iomap(pdev, 1, 0); + if (hw->mmio_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping mmio\n"); + goto out; + } + + /* map the adapter shared memory region */ + hw->ram_vaddr = pci_iomap(pdev, 2, max_ccb * ILOHW_CCB_SZ); + if (hw->ram_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping shared mem\n"); + goto mmio_free; + } + + /* map the doorbell aperture */ + hw->db_vaddr = pci_iomap(pdev, 3, max_ccb * ONE_DB_SIZE); + if (hw->db_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping doorbell\n"); + goto ram_free; + } + + return 0; +ram_free: + pci_iounmap(pdev, hw->ram_vaddr); +mmio_free: + pci_iounmap(pdev, hw->mmio_vaddr); +out: + return error; +} + +static void ilo_remove(struct pci_dev *pdev) +{ + int i, minor; + struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev); + + if (!ilo_hw) + return; + + clear_device(ilo_hw); + + minor = MINOR(ilo_hw->cdev.dev); + for (i = minor; i < minor + max_ccb; i++) + device_destroy(ilo_class, MKDEV(ilo_major, i)); + + cdev_del(&ilo_hw->cdev); + ilo_disable_interrupts(ilo_hw); + free_irq(pdev->irq, ilo_hw); + ilo_unmap_device(pdev, ilo_hw); + pci_release_regions(pdev); + /* + * pci_disable_device(pdev) used to be here. But this PCI device has + * two functions with interrupt lines connected to a single pin. The + * other one is a USB host controller. So when we disable the PIN here + * e.g. by rmmod hpilo, the controller stops working. It is because + * the interrupt link is disabled in ACPI since it is not refcounted + * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable. + */ + kfree(ilo_hw); + ilo_hwdev[(minor / max_ccb)] = 0; +} + +static int ilo_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int devnum, minor, start, error = 0; + struct ilo_hwinfo *ilo_hw; + + /* Ignore subsystem_device = 0x1979 (set by BIOS) */ + if (pdev->subsystem_device == 0x1979) + return 0; + + if (max_ccb > MAX_CCB) + max_ccb = MAX_CCB; + else if (max_ccb < MIN_CCB) + max_ccb = MIN_CCB; + + /* find a free range for device files */ + for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) { + if (ilo_hwdev[devnum] == 0) { + ilo_hwdev[devnum] = 1; + break; + } + } + + if (devnum == MAX_ILO_DEV) { + dev_err(&pdev->dev, "Error finding free device\n"); + return -ENODEV; + } + + /* track global allocations for this device */ + error = -ENOMEM; + ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL); + if (!ilo_hw) + goto out; + + ilo_hw->ilo_dev = pdev; + spin_lock_init(&ilo_hw->alloc_lock); + spin_lock_init(&ilo_hw->fifo_lock); + spin_lock_init(&ilo_hw->open_lock); + + error = pci_enable_device(pdev); + if (error) + goto free; + + pci_set_master(pdev); + + error = pci_request_regions(pdev, ILO_NAME); + if (error) + goto disable; + + error = ilo_map_device(pdev, ilo_hw); + if (error) + goto free_regions; + + pci_set_drvdata(pdev, ilo_hw); + clear_device(ilo_hw); + + error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw); + if (error) + goto unmap; + + ilo_enable_interrupts(ilo_hw); + + cdev_init(&ilo_hw->cdev, &ilo_fops); + ilo_hw->cdev.owner = THIS_MODULE; + start = devnum * max_ccb; + error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), max_ccb); + if (error) { + dev_err(&pdev->dev, "Could not add cdev\n"); + goto remove_isr; + } + + for (minor = 0 ; minor < max_ccb; minor++) { + struct device *dev; + dev = device_create(ilo_class, &pdev->dev, + MKDEV(ilo_major, minor), NULL, + "hpilo!d%dccb%d", devnum, minor); + if (IS_ERR(dev)) + dev_err(&pdev->dev, "Could not create files\n"); + } + + return 0; +remove_isr: + ilo_disable_interrupts(ilo_hw); + free_irq(pdev->irq, ilo_hw); +unmap: + ilo_unmap_device(pdev, ilo_hw); +free_regions: + pci_release_regions(pdev); +disable: +/* pci_disable_device(pdev); see comment in ilo_remove */ +free: + kfree(ilo_hw); +out: + ilo_hwdev[devnum] = 0; + return error; +} + +static struct pci_device_id ilo_devices[] = { + { PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) }, + { PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) }, + { } +}; +MODULE_DEVICE_TABLE(pci, ilo_devices); + +static struct pci_driver ilo_driver = { + .name = ILO_NAME, + .id_table = ilo_devices, + .probe = ilo_probe, + .remove = ilo_remove, +}; + +static int __init ilo_init(void) +{ + int error; + dev_t dev; + + ilo_class = class_create(THIS_MODULE, "iLO"); + if (IS_ERR(ilo_class)) { + error = PTR_ERR(ilo_class); + goto out; + } + + error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME); + if (error) + goto class_destroy; + + ilo_major = MAJOR(dev); + + error = pci_register_driver(&ilo_driver); + if (error) + goto chr_remove; + + return 0; +chr_remove: + unregister_chrdev_region(dev, MAX_OPEN); +class_destroy: + class_destroy(ilo_class); +out: + return error; +} + +static void __exit ilo_exit(void) +{ + pci_unregister_driver(&ilo_driver); + unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN); + class_destroy(ilo_class); +} + +MODULE_VERSION("1.4.1"); +MODULE_ALIAS(ILO_NAME); +MODULE_DESCRIPTION(ILO_NAME); +MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>"); +MODULE_LICENSE("GPL v2"); + +module_param(max_ccb, uint, 0444); +MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (16)"); + +module_init(ilo_init); +module_exit(ilo_exit); diff --git a/kernel/drivers/misc/hpilo.h b/kernel/drivers/misc/hpilo.h new file mode 100644 index 000000000..b97672e0c --- /dev/null +++ b/kernel/drivers/misc/hpilo.h @@ -0,0 +1,214 @@ +/* + * linux/drivers/char/hpilo.h + * + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * David Altobelli <david.altobelli@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __HPILO_H +#define __HPILO_H + +#define ILO_NAME "hpilo" + +/* max number of open channel control blocks per device, hw limited to 32 */ +#define MAX_CCB 24 +/* min number of open channel control blocks per device, hw limited to 32 */ +#define MIN_CCB 8 +/* max number of supported devices */ +#define MAX_ILO_DEV 1 +/* max number of files */ +#define MAX_OPEN (MAX_CCB * MAX_ILO_DEV) +/* total wait time in usec */ +#define MAX_WAIT_TIME 10000 +/* per spin wait time in usec */ +#define WAIT_TIME 10 +/* spin counter for open/close delay */ +#define MAX_WAIT (MAX_WAIT_TIME / WAIT_TIME) + +/* + * Per device, used to track global memory allocations. + */ +struct ilo_hwinfo { + /* mmio registers on device */ + char __iomem *mmio_vaddr; + + /* doorbell registers on device */ + char __iomem *db_vaddr; + + /* shared memory on device used for channel control blocks */ + char __iomem *ram_vaddr; + + /* files corresponding to this device */ + struct ccb_data *ccb_alloc[MAX_CCB]; + + struct pci_dev *ilo_dev; + + /* + * open_lock serializes ccb_cnt during open and close + * [ irq disabled ] + * -> alloc_lock used when adding/removing/searching ccb_alloc, + * which represents all ccbs open on the device + * --> fifo_lock controls access to fifo queues shared with hw + * + * Locks must be taken in this order, but open_lock and alloc_lock + * are optional, they do not need to be held in order to take a + * lower level lock. + */ + spinlock_t open_lock; + spinlock_t alloc_lock; + spinlock_t fifo_lock; + + struct cdev cdev; +}; + +/* offset from mmio_vaddr for enabling doorbell interrupts */ +#define DB_IRQ 0xB2 +/* offset from mmio_vaddr for outbound communications */ +#define DB_OUT 0xD4 +/* DB_OUT reset bit */ +#define DB_RESET 26 + +/* + * Channel control block. Used to manage hardware queues. + * The format must match hw's version. The hw ccb is 128 bytes, + * but the context area shouldn't be touched by the driver. + */ +#define ILOSW_CCB_SZ 64 +#define ILOHW_CCB_SZ 128 +struct ccb { + union { + char *send_fifobar; + u64 send_fifobar_pa; + } ccb_u1; + union { + char *send_desc; + u64 send_desc_pa; + } ccb_u2; + u64 send_ctrl; + + union { + char *recv_fifobar; + u64 recv_fifobar_pa; + } ccb_u3; + union { + char *recv_desc; + u64 recv_desc_pa; + } ccb_u4; + u64 recv_ctrl; + + union { + char __iomem *db_base; + u64 padding5; + } ccb_u5; + + u64 channel; + + /* unused context area (64 bytes) */ +}; + +/* ccb queue parameters */ +#define SENDQ 1 +#define RECVQ 2 +#define NR_QENTRY 4 +#define L2_QENTRY_SZ 12 + +/* ccb ctrl bitfields */ +#define CTRL_BITPOS_L2SZ 0 +#define CTRL_BITPOS_FIFOINDEXMASK 4 +#define CTRL_BITPOS_DESCLIMIT 18 +#define CTRL_BITPOS_A 30 +#define CTRL_BITPOS_G 31 + +/* ccb doorbell macros */ +#define L2_DB_SIZE 14 +#define ONE_DB_SIZE (1 << L2_DB_SIZE) + +/* + * Per fd structure used to track the ccb allocated to that dev file. + */ +struct ccb_data { + /* software version of ccb, using virtual addrs */ + struct ccb driver_ccb; + + /* hardware version of ccb, using physical addrs */ + struct ccb ilo_ccb; + + /* hardware ccb is written to this shared mapped device memory */ + struct ccb __iomem *mapped_ccb; + + /* dma'able memory used for send/recv queues */ + void *dma_va; + dma_addr_t dma_pa; + size_t dma_size; + + /* pointer to hardware device info */ + struct ilo_hwinfo *ilo_hw; + + /* queue for this ccb to wait for recv data */ + wait_queue_head_t ccb_waitq; + + /* usage count, to allow for shared ccb's */ + int ccb_cnt; + + /* open wanted exclusive access to this ccb */ + int ccb_excl; +}; + +/* + * FIFO queue structure, shared with hw. + */ +#define ILO_START_ALIGN 4096 +#define ILO_CACHE_SZ 128 +struct fifo { + u64 nrents; /* user requested number of fifo entries */ + u64 imask; /* mask to extract valid fifo index */ + u64 merge; /* O/C bits to merge in during enqueue operation */ + u64 reset; /* set to non-zero when the target device resets */ + u8 pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)]; + + u64 head; + u8 pad_1[ILO_CACHE_SZ - (sizeof(u64))]; + + u64 tail; + u8 pad_2[ILO_CACHE_SZ - (sizeof(u64))]; + + u64 fifobar[1]; +}; + +/* convert between struct fifo, and the fifobar, which is saved in the ccb */ +#define FIFOHANDLESIZE (sizeof(struct fifo) - sizeof(u64)) +#define FIFOBARTOHANDLE(_fifo) \ + ((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE)) + +/* the number of qwords to consume from the entry descriptor */ +#define ENTRY_BITPOS_QWORDS 0 +/* descriptor index number (within a specified queue) */ +#define ENTRY_BITPOS_DESCRIPTOR 10 +/* state bit, fifo entry consumed by consumer */ +#define ENTRY_BITPOS_C 22 +/* state bit, fifo entry is occupied */ +#define ENTRY_BITPOS_O 23 + +#define ENTRY_BITS_QWORDS 10 +#define ENTRY_BITS_DESCRIPTOR 12 +#define ENTRY_BITS_C 1 +#define ENTRY_BITS_O 1 +#define ENTRY_BITS_TOTAL \ + (ENTRY_BITS_C + ENTRY_BITS_O + \ + ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR) + +/* extract various entry fields */ +#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1) +#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C) +#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O) +#define ENTRY_MASK_QWORDS \ + (((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS) +#define ENTRY_MASK_DESCRIPTOR \ + (((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR) + +#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O)) + +#endif /* __HPILO_H */ diff --git a/kernel/drivers/misc/hwlat_detector.c b/kernel/drivers/misc/hwlat_detector.c new file mode 100644 index 000000000..2429c4331 --- /dev/null +++ b/kernel/drivers/misc/hwlat_detector.c @@ -0,0 +1,1240 @@ +/* + * hwlat_detector.c - A simple Hardware Latency detector. + * + * Use this module to detect large system latencies induced by the behavior of + * certain underlying system hardware or firmware, independent of Linux itself. + * The code was developed originally to detect the presence of SMIs on Intel + * and AMD systems, although there is no dependency upon x86 herein. + * + * The classical example usage of this module is in detecting the presence of + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a + * somewhat special form of hardware interrupt spawned from earlier CPU debug + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge + * LPC (or other device) to generate a special interrupt under certain + * circumstances, for example, upon expiration of a special SMI timer device, + * due to certain external thermal readings, on certain I/O address accesses, + * and other situations. An SMI hits a special CPU pin, triggers a special + * SMI mode (complete with special memory map), and the OS is unaware. + * + * Although certain hardware-inducing latencies are necessary (for example, + * a modern system often requires an SMI handler for correct thermal control + * and remote management) they can wreak havoc upon any OS-level performance + * guarantees toward low-latency, especially when the OS is not even made + * aware of the presence of these interrupts. For this reason, we need a + * somewhat brute force mechanism to detect these interrupts. In this case, + * we do it by hogging all of the CPU(s) for configurable timer intervals, + * sampling the built-in CPU timer, looking for discontiguous readings. + * + * WARNING: This implementation necessarily introduces latencies. Therefore, + * you should NEVER use this module in a production environment + * requiring any kind of low-latency performance guarantee(s). + * + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> + * + * Includes useful feedback from Clark Williams <clark@redhat.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/ring_buffer.h> +#include <linux/time.h> +#include <linux/hrtimer.h> +#include <linux/kthread.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/version.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/trace_clock.h> + +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ +#define U64STR_SIZE 22 /* 20 digits max */ + +#define VERSION "1.0.0" +#define BANNER "hwlat_detector: " +#define DRVNAME "hwlat_detector" +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ + +/* Module metadata */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); +MODULE_DESCRIPTION("A simple hardware latency detector"); +MODULE_VERSION(VERSION); + +/* Module parameters */ + +static int debug; +static int enabled; +static int threshold; + +module_param(debug, int, 0); /* enable debug */ +module_param(enabled, int, 0); /* enable detector */ +module_param(threshold, int, 0); /* latency threshold */ + +/* Buffering and sampling */ + +static struct ring_buffer *ring_buffer; /* sample buffer */ +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ +static unsigned long buf_size = BUF_SIZE_DEFAULT; +static struct task_struct *kthread; /* sampling thread */ + +/* DebugFS filesystem entries */ + +static struct dentry *debug_dir; /* debugfs directory */ +static struct dentry *debug_max; /* maximum TSC delta */ +static struct dentry *debug_count; /* total detect count */ +static struct dentry *debug_sample_width; /* sample width us */ +static struct dentry *debug_sample_window; /* sample window us */ +static struct dentry *debug_sample; /* raw samples us */ +static struct dentry *debug_threshold; /* threshold us */ +static struct dentry *debug_enable; /* enable/disable */ + +/* Individual samples and global state */ + +struct sample; /* latency sample */ +struct data; /* Global state */ + +/* Sampling functions */ +static int __buffer_add_sample(struct sample *sample); +static struct sample *buffer_get_sample(struct sample *sample); + +/* Threading and state */ +static int kthread_fn(void *unused); +static int start_kthread(void); +static int stop_kthread(void); +static void __reset_stats(void); +static int init_stats(void); + +/* Debugfs interface */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry); +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry); +static int debug_sample_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static int debug_sample_release(struct inode *inode, struct file *filp); +static int debug_enable_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static ssize_t debug_enable_fwrite(struct file *file, + const char __user *user_buffer, + size_t user_size, loff_t *offset); + +/* Initialization functions */ +static int init_debugfs(void); +static void free_debugfs(void); +static int detector_init(void); +static void detector_exit(void); + +/* Individual latency samples are stored here when detected and packed into + * the ring_buffer circular buffer, where they are overwritten when + * more than buf_size/sizeof(sample) samples are received. */ +struct sample { + u64 seqnum; /* unique sequence */ + u64 duration; /* ktime delta */ + u64 outer_duration; /* ktime delta (outer loop) */ + struct timespec timestamp; /* wall time */ + unsigned long lost; +}; + +/* keep the global state somewhere. */ +static struct data { + + struct mutex lock; /* protect changes */ + + u64 count; /* total since reset */ + u64 max_sample; /* max hardware latency */ + u64 threshold; /* sample threshold level */ + + u64 sample_window; /* total sampling window (on+off) */ + u64 sample_width; /* active sampling portion of window */ + + atomic_t sample_open; /* whether the sample file is open */ + + wait_queue_head_t wq; /* waitqeue for new sample values */ + +} data; + +/** + * __buffer_add_sample - add a new latency sample recording to the ring buffer + * @sample: The new latency sample value + * + * This receives a new latency sample and records it in a global ring buffer. + * No additional locking is used in this case. + */ +static int __buffer_add_sample(struct sample *sample) +{ + return ring_buffer_write(ring_buffer, + sizeof(struct sample), sample); +} + +/** + * buffer_get_sample - remove a hardware latency sample from the ring buffer + * @sample: Pre-allocated storage for the sample + * + * This retrieves a hardware latency sample from the global circular buffer + */ +static struct sample *buffer_get_sample(struct sample *sample) +{ + struct ring_buffer_event *e = NULL; + struct sample *s = NULL; + unsigned int cpu = 0; + + if (!sample) + return NULL; + + mutex_lock(&ring_buffer_mutex); + for_each_online_cpu(cpu) { + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); + if (e) + break; + } + + if (e) { + s = ring_buffer_event_data(e); + memcpy(sample, s, sizeof(struct sample)); + } else + sample = NULL; + mutex_unlock(&ring_buffer_mutex); + + return sample; +} + +#ifndef CONFIG_TRACING +#define time_type ktime_t +#define time_get() ktime_get() +#define time_to_us(x) ktime_to_us(x) +#define time_sub(a, b) ktime_sub(a, b) +#define init_time(a, b) (a).tv64 = b +#define time_u64(a) ((a).tv64) +#else +#define time_type u64 +#define time_get() trace_clock_local() +#define time_to_us(x) div_u64(x, 1000) +#define time_sub(a, b) ((a) - (b)) +#define init_time(a, b) (a = b) +#define time_u64(a) a +#endif +/** + * get_sample - sample the CPU TSC and look for likely hardware latencies + * + * Used to repeatedly capture the CPU TSC (or similar), looking for potential + * hardware-induced latency. Called with interrupts disabled and with + * data.lock held. + */ +static int get_sample(void) +{ + time_type start, t1, t2, last_t2; + s64 diff, total = 0; + u64 sample = 0; + u64 outer_sample = 0; + int ret = -1; + + init_time(last_t2, 0); + start = time_get(); /* start timestamp */ + + do { + + t1 = time_get(); /* we'll look for a discontinuity */ + t2 = time_get(); + + if (time_u64(last_t2)) { + /* Check the delta from outer loop (t2 to next t1) */ + diff = time_to_us(time_sub(t1, last_t2)); + /* This shouldn't happen */ + if (diff < 0) { + pr_err(BANNER "time running backwards\n"); + goto out; + } + if (diff > outer_sample) + outer_sample = diff; + } + last_t2 = t2; + + total = time_to_us(time_sub(t2, start)); /* sample width */ + + /* This checks the inner loop (t1 to t2) */ + diff = time_to_us(time_sub(t2, t1)); /* current diff */ + + /* This shouldn't happen */ + if (diff < 0) { + pr_err(BANNER "time running backwards\n"); + goto out; + } + + if (diff > sample) + sample = diff; /* only want highest value */ + + } while (total <= data.sample_width); + + ret = 0; + + /* If we exceed the threshold value, we have found a hardware latency */ + if (sample > data.threshold || outer_sample > data.threshold) { + struct sample s; + + ret = 1; + + data.count++; + s.seqnum = data.count; + s.duration = sample; + s.outer_duration = outer_sample; + s.timestamp = CURRENT_TIME; + __buffer_add_sample(&s); + + /* Keep a running maximum ever recorded hardware latency */ + if (sample > data.max_sample) + data.max_sample = sample; + } + +out: + return ret; +} + +/* + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread + * @unused: A required part of the kthread API. + * + * Used to periodically sample the CPU TSC via a call to get_sample. We + * disable interrupts, which does (intentionally) introduce latency since we + * need to ensure nothing else might be running (and thus pre-empting). + * Obviously this should never be used in production environments. + * + * Currently this runs on which ever CPU it was scheduled on, but most + * real-worald hardware latency situations occur across several CPUs, + * but we might later generalize this if we find there are any actualy + * systems with alternate SMI delivery or other hardware latencies. + */ +static int kthread_fn(void *unused) +{ + int ret; + u64 interval; + + while (!kthread_should_stop()) { + + mutex_lock(&data.lock); + + local_irq_disable(); + ret = get_sample(); + local_irq_enable(); + + if (ret > 0) + wake_up(&data.wq); /* wake up reader(s) */ + + interval = data.sample_window - data.sample_width; + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ + + mutex_unlock(&data.lock); + + if (msleep_interruptible(interval)) + break; + } + + return 0; +} + +/** + * start_kthread - Kick off the hardware latency sampling/detector kthread + * + * This starts a kernel thread that will sit and sample the CPU timestamp + * counter (TSC or similar) and look for potential hardware latencies. + */ +static int start_kthread(void) +{ + kthread = kthread_run(kthread_fn, NULL, + DRVNAME); + if (IS_ERR(kthread)) { + pr_err(BANNER "could not start sampling thread\n"); + enabled = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * stop_kthread - Inform the hardware latency samping/detector kthread to stop + * + * This kicks the running hardware latency sampling/detector kernel thread and + * tells it to stop sampling now. Use this on unload and at system shutdown. + */ +static int stop_kthread(void) +{ + int ret; + + ret = kthread_stop(kthread); + + return ret; +} + +/** + * __reset_stats - Reset statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We call this + * function in order to reset those when "enable" is toggled on or off, and + * also at initialization. Should be called with data.lock held. + */ +static void __reset_stats(void) +{ + data.count = 0; + data.max_sample = 0; + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ +} + +/** + * init_stats - Setup global state statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We also use + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware + * induced system latencies. This function initializes these structures and + * allocates the global ring buffer also. + */ +static int init_stats(void) +{ + int ret = -ENOMEM; + + mutex_init(&data.lock); + init_waitqueue_head(&data.wq); + atomic_set(&data.sample_open, 0); + + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); + + if (WARN(!ring_buffer, KERN_ERR BANNER + "failed to allocate ring buffer!\n")) + goto out; + + __reset_stats(); + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */ + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ + + ret = 0; + +out: + return ret; + +} + +/* + * simple_data_read - Wrapper read function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * @entry: The entry to read from + * + * This function provides a generic read implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_read directly, but we need to make sure that the data.lock + * is held during the actual read. + */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry) +{ + char buf[U64STR_SIZE]; + u64 val = 0; + int len = 0; + + memset(buf, 0, sizeof(buf)); + + if (!entry) + return -EFAULT; + + mutex_lock(&data.lock); + val = *entry; + mutex_unlock(&data.lock); + + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + +} + +/* + * simple_data_write - Wrapper write function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to write value from + * @cnt: The maximum number of bytes to write + * @ppos: The current "file" position + * @entry: The entry to write to + * + * This function provides a generic write implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_write directly, but we need to make sure that the data.lock + * is held during the actual write. + */ +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + *entry = val; + mutex_unlock(&data.lock); + + return csize; +} + +/** + * debug_count_fopen - Open function for "count" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "count" debugfs + * interface to the hardware latency detector. + */ +static int debug_count_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_count_fread - Read function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to read the + * number of latency readings exceeding the configured threshold since + * the detector was last reset (e.g. by writing a zero into "count"). + */ +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_count_fwrite - Write function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to write a + * desired value, especially to zero the total count. + */ +static ssize_t debug_count_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_enable_fopen - Dummy open function for "enable" debugfs interface + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "enable" debugfs + * interface to the hardware latency detector. + */ +static int debug_enable_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_enable_fread - Read function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to determine + * whether the detector is currently enabled ("0\n" or "1\n" returned). + */ +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[4]; + + if ((cnt < sizeof(buf)) || (*ppos)) + return 0; + + buf[0] = enabled ? '1' : '0'; + buf[1] = '\n'; + buf[2] = '\0'; + if (copy_to_user(ubuf, buf, strlen(buf))) + return -EFAULT; + return *ppos = strlen(buf); +} + +/** + * debug_enable_fwrite - Write function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to enable or + * disable the detector, which will have the side-effect of possibly + * also resetting the global stats and kicking off the measuring + * kthread (on an enable) or the converse (upon a disable). + */ +static ssize_t debug_enable_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[4]; + int csize = min(cnt, sizeof(buf)); + long val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[sizeof(buf)-1] = '\0'; /* just in case */ + err = kstrtoul(buf, 10, &val); + if (0 != err) + return -EINVAL; + + if (val) { + if (enabled) + goto unlock; + enabled = 1; + __reset_stats(); + if (start_kthread()) + return -EFAULT; + } else { + if (!enabled) + goto unlock; + enabled = 0; + err = stop_kthread(); + if (err) { + pr_err(BANNER "cannot stop kthread\n"); + return -EFAULT; + } + wake_up(&data.wq); /* reader(s) should return */ + } +unlock: + return csize; +} + +/** + * debug_max_fopen - Open function for "max" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "max" debugfs + * interface to the hardware latency detector. + */ +static int debug_max_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_max_fread - Read function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to determine + * the maximum latency value observed since it was last reset. + */ +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); +} + +/** + * debug_max_fwrite - Write function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to reset the + * maximum or set it to some other desired value - if, then, subsequent + * measurements exceed this value, the maximum will be updated. + */ +static ssize_t debug_max_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); +} + + +/** + * debug_sample_fopen - An open function for "sample" debugfs interface + * @inode: The in-kernel inode representation of this debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function handles opening the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. Can be opened blocking or non-blocking, + * affecting whether it behaves as a buffer read pipe, or does not. + * Implements simple locking to prevent multiple simultaneous use. + */ +static int debug_sample_fopen(struct inode *inode, struct file *filp) +{ + if (!atomic_add_unless(&data.sample_open, 1, 1)) + return -EBUSY; + else + return 0; +} + +/** + * debug_sample_fread - A read function for "sample" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that will contain the samples read + * @cnt: The maximum bytes to read from the debugfs "file" + * @ppos: The current position in the debugfs "file" + * + * This function handles reading from the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. By default this will block pending a new + * value written into the sample buffer, unless there are already a + * number of value(s) waiting in the buffer, or the sample file was + * previously opened in a non-blocking mode of operation. + */ +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int len = 0; + char buf[64]; + struct sample *sample = NULL; + + if (!enabled) + return 0; + + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); + if (!sample) + return -ENOMEM; + + while (!buffer_get_sample(sample)) { + + DEFINE_WAIT(wait); + + if (filp->f_flags & O_NONBLOCK) { + len = -EAGAIN; + goto out; + } + + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&data.wq, &wait); + + if (signal_pending(current)) { + len = -EINTR; + goto out; + } + + if (!enabled) { /* enable was toggled */ + len = 0; + goto out; + } + } + + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", + sample->timestamp.tv_sec, + sample->timestamp.tv_nsec, + sample->duration, + sample->outer_duration); + + + /* handling partial reads is more trouble than it's worth */ + if (len > cnt) + goto out; + + if (copy_to_user(ubuf, buf, len)) + len = -EFAULT; + +out: + kfree(sample); + return len; +} + +/** + * debug_sample_release - Release function for "sample" debugfs interface + * @inode: The in-kernel inode represenation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function completes the close of the debugfs interface "sample" file. + * Frees the sample_open "lock" so that other users may open the interface. + */ +static int debug_sample_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&data.sample_open); + + return 0; +} + +/** + * debug_threshold_fopen - Open function for "threshold" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "threshold" debugfs + * interface to the hardware latency detector. + */ +static int debug_threshold_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_threshold_fread - Read function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to determine + * the current threshold level at which a latency will be recorded in the + * global ring buffer, typically on the order of 10us. + */ +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); +} + +/** + * debug_threshold_fwrite - Write function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to configure + * the threshold level at which any subsequently detected latencies will + * be recorded into the global ring buffer. + */ +static ssize_t debug_threshold_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + int ret; + + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); + + if (enabled) + wake_up_process(kthread); + + return ret; +} + +/** + * debug_width_fopen - Open function for "width" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "width" debugfs + * interface to the hardware latency detector. + */ +static int debug_width_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_width_fread - Read function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to determine + * for how many us of the total window us we will actively sample for any + * hardware-induced latecy periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. + */ +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); +} + +/** + * debug_width_fwrite - Write function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to configure + * for how many us of the total window us we will actively sample for any + * hardware-induced latency periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. It + * is enforced that width is less that the total window size. + */ +static ssize_t debug_width_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (val < data.sample_window) + data.sample_width = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + if (enabled) + wake_up_process(kthread); + + return csize; +} + +/** + * debug_window_fopen - Open function for "window" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. + */ +static int debug_window_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_window_fread - Read function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to read the total window size. + */ +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); +} + +/** + * debug_window_fwrite - Write function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "window" debufds + * interface to the hardware latency detetector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to write a new total window size. It + * is enfoced that any value written must be greater than the sample width + * size, or an error results. + */ +static ssize_t debug_window_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (data.sample_width < val) + data.sample_window = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + return csize; +} + +/* + * Function pointers for the "count" debugfs file operations + */ +static const struct file_operations count_fops = { + .open = debug_count_fopen, + .read = debug_count_fread, + .write = debug_count_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "enable" debugfs file operations + */ +static const struct file_operations enable_fops = { + .open = debug_enable_fopen, + .read = debug_enable_fread, + .write = debug_enable_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "max" debugfs file operations + */ +static const struct file_operations max_fops = { + .open = debug_max_fopen, + .read = debug_max_fread, + .write = debug_max_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "sample" debugfs file operations + */ +static const struct file_operations sample_fops = { + .open = debug_sample_fopen, + .read = debug_sample_fread, + .release = debug_sample_release, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "threshold" debugfs file operations + */ +static const struct file_operations threshold_fops = { + .open = debug_threshold_fopen, + .read = debug_threshold_fread, + .write = debug_threshold_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "width" debugfs file operations + */ +static const struct file_operations width_fops = { + .open = debug_width_fopen, + .read = debug_width_fread, + .write = debug_width_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "window" debugfs file operations + */ +static const struct file_operations window_fops = { + .open = debug_window_fopen, + .read = debug_window_fread, + .write = debug_window_fwrite, + .owner = THIS_MODULE, +}; + +/** + * init_debugfs - A function to initialize the debugfs interface files + * + * This function creates entries in debugfs for "hwlat_detector", including + * files to read values from the detector, current samples, and the + * maximum sample that has been captured since the hardware latency + * dectector was started. + */ +static int init_debugfs(void) +{ + int ret = -ENOMEM; + + debug_dir = debugfs_create_dir(DRVNAME, NULL); + if (!debug_dir) + goto err_debug_dir; + + debug_sample = debugfs_create_file("sample", 0444, + debug_dir, NULL, + &sample_fops); + if (!debug_sample) + goto err_sample; + + debug_count = debugfs_create_file("count", 0444, + debug_dir, NULL, + &count_fops); + if (!debug_count) + goto err_count; + + debug_max = debugfs_create_file("max", 0444, + debug_dir, NULL, + &max_fops); + if (!debug_max) + goto err_max; + + debug_sample_window = debugfs_create_file("window", 0644, + debug_dir, NULL, + &window_fops); + if (!debug_sample_window) + goto err_window; + + debug_sample_width = debugfs_create_file("width", 0644, + debug_dir, NULL, + &width_fops); + if (!debug_sample_width) + goto err_width; + + debug_threshold = debugfs_create_file("threshold", 0644, + debug_dir, NULL, + &threshold_fops); + if (!debug_threshold) + goto err_threshold; + + debug_enable = debugfs_create_file("enable", 0644, + debug_dir, &enabled, + &enable_fops); + if (!debug_enable) + goto err_enable; + + else { + ret = 0; + goto out; + } + +err_enable: + debugfs_remove(debug_threshold); +err_threshold: + debugfs_remove(debug_sample_width); +err_width: + debugfs_remove(debug_sample_window); +err_window: + debugfs_remove(debug_max); +err_max: + debugfs_remove(debug_count); +err_count: + debugfs_remove(debug_sample); +err_sample: + debugfs_remove(debug_dir); +err_debug_dir: +out: + return ret; +} + +/** + * free_debugfs - A function to cleanup the debugfs file interface + */ +static void free_debugfs(void) +{ + /* could also use a debugfs_remove_recursive */ + debugfs_remove(debug_enable); + debugfs_remove(debug_threshold); + debugfs_remove(debug_sample_width); + debugfs_remove(debug_sample_window); + debugfs_remove(debug_max); + debugfs_remove(debug_count); + debugfs_remove(debug_sample); + debugfs_remove(debug_dir); +} + +/** + * detector_init - Standard module initialization code + */ +static int detector_init(void) +{ + int ret = -ENOMEM; + + pr_info(BANNER "version %s\n", VERSION); + + ret = init_stats(); + if (0 != ret) + goto out; + + ret = init_debugfs(); + if (0 != ret) + goto err_stats; + + if (enabled) + ret = start_kthread(); + + goto out; + +err_stats: + ring_buffer_free(ring_buffer); +out: + return ret; + +} + +/** + * detector_exit - Standard module cleanup code + */ +static void detector_exit(void) +{ + int err; + + if (enabled) { + enabled = 0; + err = stop_kthread(); + if (err) + pr_err(BANNER "cannot stop kthread\n"); + } + + free_debugfs(); + ring_buffer_free(ring_buffer); /* free up the ring buffer */ + +} + +module_init(detector_init); +module_exit(detector_exit); diff --git a/kernel/drivers/misc/ibmasm/Makefile b/kernel/drivers/misc/ibmasm/Makefile new file mode 100644 index 000000000..9e63ade5f --- /dev/null +++ b/kernel/drivers/misc/ibmasm/Makefile @@ -0,0 +1,15 @@ + +obj-$(CONFIG_IBM_ASM) := ibmasm.o + +ibmasm-y := module.o \ + ibmasmfs.o \ + event.o \ + command.o \ + remote.o \ + heartbeat.o \ + r_heartbeat.o \ + dot_command.o \ + lowlevel.o + +ibmasm-$(CONFIG_SERIAL_8250) += uart.o + diff --git a/kernel/drivers/misc/ibmasm/command.c b/kernel/drivers/misc/ibmasm/command.c new file mode 100644 index 000000000..7d56f45de --- /dev/null +++ b/kernel/drivers/misc/ibmasm/command.c @@ -0,0 +1,187 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "ibmasm.h" +#include "lowlevel.h" + +static void exec_next_command(struct service_processor *sp); + +static atomic_t command_count = ATOMIC_INIT(0); + +struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size) +{ + struct command *cmd; + + if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE) + return NULL; + + cmd = kzalloc(sizeof(struct command), GFP_KERNEL); + if (cmd == NULL) + return NULL; + + + cmd->buffer = kzalloc(buffer_size, GFP_KERNEL); + if (cmd->buffer == NULL) { + kfree(cmd); + return NULL; + } + cmd->buffer_size = buffer_size; + + kref_init(&cmd->kref); + cmd->lock = &sp->lock; + + cmd->status = IBMASM_CMD_PENDING; + init_waitqueue_head(&cmd->wait); + INIT_LIST_HEAD(&cmd->queue_node); + + atomic_inc(&command_count); + dbg("command count: %d\n", atomic_read(&command_count)); + + return cmd; +} + +void ibmasm_free_command(struct kref *kref) +{ + struct command *cmd = to_command(kref); + + list_del(&cmd->queue_node); + atomic_dec(&command_count); + dbg("command count: %d\n", atomic_read(&command_count)); + kfree(cmd->buffer); + kfree(cmd); +} + +static void enqueue_command(struct service_processor *sp, struct command *cmd) +{ + list_add_tail(&cmd->queue_node, &sp->command_queue); +} + +static struct command *dequeue_command(struct service_processor *sp) +{ + struct command *cmd; + struct list_head *next; + + if (list_empty(&sp->command_queue)) + return NULL; + + next = sp->command_queue.next; + list_del_init(next); + cmd = list_entry(next, struct command, queue_node); + + return cmd; +} + +static inline void do_exec_command(struct service_processor *sp) +{ + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + if (ibmasm_send_i2o_message(sp)) { + sp->current_command->status = IBMASM_CMD_FAILED; + wake_up(&sp->current_command->wait); + command_put(sp->current_command); + exec_next_command(sp); + } +} + +/** + * exec_command + * send a command to a service processor + * Commands are executed sequentially. One command (sp->current_command) + * is sent to the service processor. Once the interrupt handler gets a + * message of type command_response, the message is copied into + * the current commands buffer, + */ +void ibmasm_exec_command(struct service_processor *sp, struct command *cmd) +{ + unsigned long flags; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + spin_lock_irqsave(&sp->lock, flags); + + if (!sp->current_command) { + sp->current_command = cmd; + command_get(sp->current_command); + spin_unlock_irqrestore(&sp->lock, flags); + do_exec_command(sp); + } else { + enqueue_command(sp, cmd); + spin_unlock_irqrestore(&sp->lock, flags); + } +} + +static void exec_next_command(struct service_processor *sp) +{ + unsigned long flags; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + spin_lock_irqsave(&sp->lock, flags); + sp->current_command = dequeue_command(sp); + if (sp->current_command) { + command_get(sp->current_command); + spin_unlock_irqrestore(&sp->lock, flags); + do_exec_command(sp); + } else { + spin_unlock_irqrestore(&sp->lock, flags); + } +} + +/** + * Sleep until a command has failed or a response has been received + * and the command status been updated by the interrupt handler. + * (see receive_response). + */ +void ibmasm_wait_for_response(struct command *cmd, int timeout) +{ + wait_event_interruptible_timeout(cmd->wait, + cmd->status == IBMASM_CMD_COMPLETE || + cmd->status == IBMASM_CMD_FAILED, + timeout * HZ); +} + +/** + * receive_command_response + * called by the interrupt handler when a dot command of type command_response + * was received. + */ +void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size) +{ + struct command *cmd = sp->current_command; + + if (!sp->current_command) + return; + + memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size)); + cmd->status = IBMASM_CMD_COMPLETE; + wake_up(&sp->current_command->wait); + command_put(sp->current_command); + exec_next_command(sp); +} diff --git a/kernel/drivers/misc/ibmasm/dot_command.c b/kernel/drivers/misc/ibmasm/dot_command.c new file mode 100644 index 000000000..d7b2ca358 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/dot_command.c @@ -0,0 +1,152 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include "ibmasm.h" +#include "dot_command.h" + +/** + * Dispatch an incoming message to the specific handler for the message. + * Called from interrupt context. + */ +void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size) +{ + u32 size; + struct dot_command_header *header = (struct dot_command_header *)message; + + if (message_size == 0) + return; + + size = get_dot_command_size(message); + if (size == 0) + return; + + if (size > message_size) + size = message_size; + + switch (header->type) { + case sp_event: + ibmasm_receive_event(sp, message, size); + break; + case sp_command_response: + ibmasm_receive_command_response(sp, message, size); + break; + case sp_heartbeat: + ibmasm_receive_heartbeat(sp, message, size); + break; + default: + dev_err(sp->dev, "Received unknown message from service processor\n"); + } +} + + +#define INIT_BUFFER_SIZE 32 + + +/** + * send the 4.3.5.10 dot command (driver VPD) to the service processor + */ +int ibmasm_send_driver_vpd(struct service_processor *sp) +{ + struct command *command; + struct dot_command_header *header; + u8 *vpd_command; + u8 *vpd_data; + int result = 0; + + command = ibmasm_new_command(sp, INIT_BUFFER_SIZE); + if (command == NULL) + return -ENOMEM; + + header = (struct dot_command_header *)command->buffer; + header->type = sp_write; + header->command_size = 4; + header->data_size = 16; + header->status = 0; + header->reserved = 0; + + vpd_command = command->buffer + sizeof(struct dot_command_header); + vpd_command[0] = 0x4; + vpd_command[1] = 0x3; + vpd_command[2] = 0x5; + vpd_command[3] = 0xa; + + vpd_data = vpd_command + header->command_size; + vpd_data[0] = 0; + strcat(vpd_data, IBMASM_DRIVER_VPD); + vpd_data[10] = 0; + vpd_data[15] = 0; + + ibmasm_exec_command(sp, command); + ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL); + + if (command->status != IBMASM_CMD_COMPLETE) + result = -ENODEV; + + command_put(command); + + return result; +} + +struct os_state_command { + struct dot_command_header header; + unsigned char command[3]; + unsigned char data; +}; + +/** + * send the 4.3.6 dot command (os state) to the service processor + * During driver init this function is called with os state "up". + * This causes the service processor to start sending heartbeats the + * driver. + * During driver exit the function is called with os state "down", + * causing the service processor to stop the heartbeats. + */ +int ibmasm_send_os_state(struct service_processor *sp, int os_state) +{ + struct command *cmd; + struct os_state_command *os_state_cmd; + int result = 0; + + cmd = ibmasm_new_command(sp, sizeof(struct os_state_command)); + if (cmd == NULL) + return -ENOMEM; + + os_state_cmd = (struct os_state_command *)cmd->buffer; + os_state_cmd->header.type = sp_write; + os_state_cmd->header.command_size = 3; + os_state_cmd->header.data_size = 1; + os_state_cmd->header.status = 0; + os_state_cmd->command[0] = 4; + os_state_cmd->command[1] = 3; + os_state_cmd->command[2] = 6; + os_state_cmd->data = os_state; + + ibmasm_exec_command(sp, cmd); + ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL); + + if (cmd->status != IBMASM_CMD_COMPLETE) + result = -ENODEV; + + command_put(cmd); + return result; +} diff --git a/kernel/drivers/misc/ibmasm/dot_command.h b/kernel/drivers/misc/ibmasm/dot_command.h new file mode 100644 index 000000000..fc9fc9d4e --- /dev/null +++ b/kernel/drivers/misc/ibmasm/dot_command.h @@ -0,0 +1,78 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#ifndef __DOT_COMMAND_H__ +#define __DOT_COMMAND_H__ + +/* + * dot commands are the protocol used to communicate with the service + * processor. + * They consist of header, a command of variable length and data of + * variable length. + */ + +/* dot command types */ +#define sp_write 0 +#define sp_write_next 1 +#define sp_read 2 +#define sp_read_next 3 +#define sp_command_response 4 +#define sp_event 5 +#define sp_heartbeat 6 + +#pragma pack(1) +struct dot_command_header { + u8 type; + u8 command_size; + u16 data_size; + u8 status; + u8 reserved; +}; +#pragma pack() + +static inline size_t get_dot_command_size(void *buffer) +{ + struct dot_command_header *cmd = (struct dot_command_header *)buffer; + return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size; +} + +static inline unsigned int get_dot_command_timeout(void *buffer) +{ + struct dot_command_header *header = (struct dot_command_header *)buffer; + unsigned char *cmd = buffer + sizeof(struct dot_command_header); + + /* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */ + + if (header->command_size == 3) { + if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1)) + return IBMASM_CMD_TIMEOUT_EXTRA; + } else if (header->command_size == 2) { + if ((cmd[0] == 7) && (cmd[1] == 1)) + return IBMASM_CMD_TIMEOUT_EXTRA; + if (cmd[0] == 8) + return IBMASM_CMD_TIMEOUT_EXTRA; + } + return IBMASM_CMD_TIMEOUT_NORMAL; +} + +#endif /* __DOT_COMMAND_H__ */ diff --git a/kernel/drivers/misc/ibmasm/event.c b/kernel/drivers/misc/ibmasm/event.c new file mode 100644 index 000000000..8e540f4e9 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/event.c @@ -0,0 +1,177 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include "ibmasm.h" +#include "lowlevel.h" + +/* + * ASM service processor event handling routines. + * + * Events are signalled to the device drivers through interrupts. + * They have the format of dot commands, with the type field set to + * sp_event. + * The driver does not interpret the events, it simply stores them in a + * circular buffer. + */ + +static void wake_up_event_readers(struct service_processor *sp) +{ + struct event_reader *reader; + + list_for_each_entry(reader, &sp->event_buffer->readers, node) + wake_up_interruptible(&reader->wait); +} + +/** + * receive_event + * Called by the interrupt handler when a dot command of type sp_event is + * received. + * Store the event in the circular event buffer, wake up any sleeping + * event readers. + * There is no reader marker in the buffer, therefore readers are + * responsible for keeping up with the writer, or they will lose events. + */ +void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size) +{ + struct event_buffer *buffer = sp->event_buffer; + struct ibmasm_event *event; + unsigned long flags; + + data_size = min(data_size, IBMASM_EVENT_MAX_SIZE); + + spin_lock_irqsave(&sp->lock, flags); + /* copy the event into the next slot in the circular buffer */ + event = &buffer->events[buffer->next_index]; + memcpy_fromio(event->data, data, data_size); + event->data_size = data_size; + event->serial_number = buffer->next_serial_number; + + /* advance indices in the buffer */ + buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS; + buffer->next_serial_number++; + spin_unlock_irqrestore(&sp->lock, flags); + + wake_up_event_readers(sp); +} + +static inline int event_available(struct event_buffer *b, struct event_reader *r) +{ + return (r->next_serial_number < b->next_serial_number); +} + +/** + * get_next_event + * Called by event readers (initiated from user space through the file + * system). + * Sleeps until a new event is available. + */ +int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader) +{ + struct event_buffer *buffer = sp->event_buffer; + struct ibmasm_event *event; + unsigned int index; + unsigned long flags; + + reader->cancelled = 0; + + if (wait_event_interruptible(reader->wait, + event_available(buffer, reader) || reader->cancelled)) + return -ERESTARTSYS; + + if (!event_available(buffer, reader)) + return 0; + + spin_lock_irqsave(&sp->lock, flags); + + index = buffer->next_index; + event = &buffer->events[index]; + while (event->serial_number < reader->next_serial_number) { + index = (index + 1) % IBMASM_NUM_EVENTS; + event = &buffer->events[index]; + } + memcpy(reader->data, event->data, event->data_size); + reader->data_size = event->data_size; + reader->next_serial_number = event->serial_number + 1; + + spin_unlock_irqrestore(&sp->lock, flags); + + return event->data_size; +} + +void ibmasm_cancel_next_event(struct event_reader *reader) +{ + reader->cancelled = 1; + wake_up_interruptible(&reader->wait); +} + +void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader) +{ + unsigned long flags; + + reader->next_serial_number = sp->event_buffer->next_serial_number; + init_waitqueue_head(&reader->wait); + spin_lock_irqsave(&sp->lock, flags); + list_add(&reader->node, &sp->event_buffer->readers); + spin_unlock_irqrestore(&sp->lock, flags); +} + +void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader) +{ + unsigned long flags; + + spin_lock_irqsave(&sp->lock, flags); + list_del(&reader->node); + spin_unlock_irqrestore(&sp->lock, flags); +} + +int ibmasm_event_buffer_init(struct service_processor *sp) +{ + struct event_buffer *buffer; + struct ibmasm_event *event; + int i; + + buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL); + if (!buffer) + return 1; + + buffer->next_index = 0; + buffer->next_serial_number = 1; + + event = buffer->events; + for (i=0; i<IBMASM_NUM_EVENTS; i++, event++) + event->serial_number = 0; + + INIT_LIST_HEAD(&buffer->readers); + + sp->event_buffer = buffer; + + return 0; +} + +void ibmasm_event_buffer_exit(struct service_processor *sp) +{ + kfree(sp->event_buffer); +} diff --git a/kernel/drivers/misc/ibmasm/heartbeat.c b/kernel/drivers/misc/ibmasm/heartbeat.c new file mode 100644 index 000000000..90746378f --- /dev/null +++ b/kernel/drivers/misc/ibmasm/heartbeat.c @@ -0,0 +1,101 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/notifier.h> +#include "ibmasm.h" +#include "dot_command.h" +#include "lowlevel.h" + +static int suspend_heartbeats = 0; + +/* + * Once the driver indicates to the service processor that it is running + * - see send_os_state() - the service processor sends periodic heartbeats + * to the driver. The driver must respond to the heartbeats or else the OS + * will be rebooted. + * In the case of a panic the interrupt handler continues to work and thus + * continues to respond to heartbeats, making the service processor believe + * the OS is still running and thus preventing a reboot. + * To prevent this from happening a callback is added the panic_notifier_list. + * Before responding to a heartbeat the driver checks if a panic has happened, + * if yes it suspends heartbeat, causing the service processor to reboot as + * expected. + */ +static int panic_happened(struct notifier_block *n, unsigned long val, void *v) +{ + suspend_heartbeats = 1; + return 0; +} + +static struct notifier_block panic_notifier = { panic_happened, NULL, 1 }; + +void ibmasm_register_panic_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier); +} + +void ibmasm_unregister_panic_notifier(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &panic_notifier); +} + + +int ibmasm_heartbeat_init(struct service_processor *sp) +{ + sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE); + if (sp->heartbeat == NULL) + return -ENOMEM; + + return 0; +} + +void ibmasm_heartbeat_exit(struct service_processor *sp) +{ + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + suspend_heartbeats = 1; + command_put(sp->heartbeat); +} + +void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size) +{ + struct command *cmd = sp->heartbeat; + struct dot_command_header *header = (struct dot_command_header *)cmd->buffer; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + if (suspend_heartbeats) + return; + + /* return the received dot command to sender */ + cmd->status = IBMASM_CMD_PENDING; + size = min(size, cmd->buffer_size); + memcpy_fromio(cmd->buffer, message, size); + header->type = sp_write; + ibmasm_exec_command(sp, cmd); +} diff --git a/kernel/drivers/misc/ibmasm/i2o.h b/kernel/drivers/misc/ibmasm/i2o.h new file mode 100644 index 000000000..2e9566dab --- /dev/null +++ b/kernel/drivers/misc/ibmasm/i2o.h @@ -0,0 +1,77 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#pragma pack(1) +struct i2o_header { + u8 version; + u8 message_flags; + u16 message_size; + u8 target; + u8 initiator_and_target; + u8 initiator; + u8 function; + u32 initiator_context; +}; +#pragma pack() + +#define I2O_HEADER_TEMPLATE \ + { .version = 0x01, \ + .message_flags = 0x00, \ + .function = 0xFF, \ + .initiator = 0x00, \ + .initiator_and_target = 0x40, \ + .target = 0x00, \ + .initiator_context = 0x0 } + +#define I2O_MESSAGE_SIZE 0x1000 +#define I2O_COMMAND_SIZE (I2O_MESSAGE_SIZE - sizeof(struct i2o_header)) + +#pragma pack(1) +struct i2o_message { + struct i2o_header header; + void *data; +}; +#pragma pack() + +static inline unsigned short outgoing_message_size(unsigned int data_size) +{ + unsigned int size; + unsigned short i2o_size; + + if (data_size > I2O_COMMAND_SIZE) + data_size = I2O_COMMAND_SIZE; + + size = sizeof(struct i2o_header) + data_size; + + i2o_size = size / sizeof(u32); + + if (size % sizeof(u32)) + i2o_size++; + + return i2o_size; +} + +static inline u32 incoming_data_size(struct i2o_message *i2o_message) +{ + return (sizeof(u32) * i2o_message->header.message_size); +} diff --git a/kernel/drivers/misc/ibmasm/ibmasm.h b/kernel/drivers/misc/ibmasm/ibmasm.h new file mode 100644 index 000000000..9b0834488 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/ibmasm.h @@ -0,0 +1,220 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/kref.h> +#include <linux/device.h> +#include <linux/input.h> + +/* Driver identification */ +#define DRIVER_NAME "ibmasm" +#define DRIVER_VERSION "1.0" +#define DRIVER_AUTHOR "Max Asbock <masbock@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>" +#define DRIVER_DESC "IBM ASM Service Processor Driver" + +#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME) +#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME) + +extern int ibmasm_debug; +#define dbg(STR, ARGS...) \ + do { \ + if (ibmasm_debug) \ + printk(KERN_DEBUG STR , ##ARGS); \ + } while (0) + +static inline char *get_timestamp(char *buf) +{ + struct timeval now; + do_gettimeofday(&now); + sprintf(buf, "%lu.%lu", now.tv_sec, now.tv_usec); + return buf; +} + +#define IBMASM_CMD_PENDING 0 +#define IBMASM_CMD_COMPLETE 1 +#define IBMASM_CMD_FAILED 2 + +#define IBMASM_CMD_TIMEOUT_NORMAL 45 +#define IBMASM_CMD_TIMEOUT_EXTRA 240 + +#define IBMASM_CMD_MAX_BUFFER_SIZE 0x8000 + +#define REVERSE_HEARTBEAT_TIMEOUT 120 + +#define HEARTBEAT_BUFFER_SIZE 0x400 + +#ifdef IA64 +#define IBMASM_DRIVER_VPD "Lin64 6.08 " +#else +#define IBMASM_DRIVER_VPD "Lin32 6.08 " +#endif + +#define SYSTEM_STATE_OS_UP 5 +#define SYSTEM_STATE_OS_DOWN 4 + +#define IBMASM_NAME_SIZE 16 + +#define IBMASM_NUM_EVENTS 10 +#define IBMASM_EVENT_MAX_SIZE 2048u + + +struct command { + struct list_head queue_node; + wait_queue_head_t wait; + unsigned char *buffer; + size_t buffer_size; + int status; + struct kref kref; + spinlock_t *lock; +}; +#define to_command(c) container_of(c, struct command, kref) + +void ibmasm_free_command(struct kref *kref); +static inline void command_put(struct command *cmd) +{ + unsigned long flags; + spinlock_t *lock = cmd->lock; + + spin_lock_irqsave(lock, flags); + kref_put(&cmd->kref, ibmasm_free_command); + spin_unlock_irqrestore(lock, flags); +} + +static inline void command_get(struct command *cmd) +{ + kref_get(&cmd->kref); +} + + +struct ibmasm_event { + unsigned int serial_number; + unsigned int data_size; + unsigned char data[IBMASM_EVENT_MAX_SIZE]; +}; + +struct event_buffer { + struct ibmasm_event events[IBMASM_NUM_EVENTS]; + unsigned int next_serial_number; + unsigned int next_index; + struct list_head readers; +}; + +struct event_reader { + int cancelled; + unsigned int next_serial_number; + wait_queue_head_t wait; + struct list_head node; + unsigned int data_size; + unsigned char data[IBMASM_EVENT_MAX_SIZE]; +}; + +struct reverse_heartbeat { + wait_queue_head_t wait; + unsigned int stopped; +}; + +struct ibmasm_remote { + struct input_dev *keybd_dev; + struct input_dev *mouse_dev; +}; + +struct service_processor { + struct list_head node; + spinlock_t lock; + void __iomem *base_address; + unsigned int irq; + struct command *current_command; + struct command *heartbeat; + struct list_head command_queue; + struct event_buffer *event_buffer; + char dirname[IBMASM_NAME_SIZE]; + char devname[IBMASM_NAME_SIZE]; + unsigned int number; + struct ibmasm_remote remote; + int serial_line; + struct device *dev; +}; + +/* command processing */ +struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size); +void ibmasm_exec_command(struct service_processor *sp, struct command *cmd); +void ibmasm_wait_for_response(struct command *cmd, int timeout); +void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size); + +/* event processing */ +int ibmasm_event_buffer_init(struct service_processor *sp); +void ibmasm_event_buffer_exit(struct service_processor *sp); +void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size); +void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader); +void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader); +int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader); +void ibmasm_cancel_next_event(struct event_reader *reader); + +/* heartbeat - from SP to OS */ +void ibmasm_register_panic_notifier(void); +void ibmasm_unregister_panic_notifier(void); +int ibmasm_heartbeat_init(struct service_processor *sp); +void ibmasm_heartbeat_exit(struct service_processor *sp); +void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size); + +/* reverse heartbeat - from OS to SP */ +void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb); +int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb); +void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb); + +/* dot commands */ +void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size); +int ibmasm_send_driver_vpd(struct service_processor *sp); +int ibmasm_send_os_state(struct service_processor *sp, int os_state); + +/* low level message processing */ +int ibmasm_send_i2o_message(struct service_processor *sp); +irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id); + +/* remote console */ +void ibmasm_handle_mouse_interrupt(struct service_processor *sp); +int ibmasm_init_remote_input_dev(struct service_processor *sp); +void ibmasm_free_remote_input_dev(struct service_processor *sp); + +/* file system */ +int ibmasmfs_register(void); +void ibmasmfs_unregister(void); +void ibmasmfs_add_sp(struct service_processor *sp); + +/* uart */ +#ifdef CONFIG_SERIAL_8250 +void ibmasm_register_uart(struct service_processor *sp); +void ibmasm_unregister_uart(struct service_processor *sp); +#else +#define ibmasm_register_uart(sp) do { } while(0) +#define ibmasm_unregister_uart(sp) do { } while(0) +#endif diff --git a/kernel/drivers/misc/ibmasm/ibmasmfs.c b/kernel/drivers/misc/ibmasm/ibmasmfs.c new file mode 100644 index 000000000..e8b933111 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/ibmasmfs.c @@ -0,0 +1,629 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +/* + * Parts of this code are based on an article by Jonathan Corbet + * that appeared in Linux Weekly News. + */ + + +/* + * The IBMASM file virtual filesystem. It creates the following hierarchy + * dynamically when mounted from user space: + * + * /ibmasm + * |-- 0 + * | |-- command + * | |-- event + * | |-- reverse_heartbeat + * | `-- remote_video + * | |-- depth + * | |-- height + * | `-- width + * . + * . + * . + * `-- n + * |-- command + * |-- event + * |-- reverse_heartbeat + * `-- remote_video + * |-- depth + * |-- height + * `-- width + * + * For each service processor the following files are created: + * + * command: execute dot commands + * write: execute a dot command on the service processor + * read: return the result of a previously executed dot command + * + * events: listen for service processor events + * read: sleep (interruptible) until an event occurs + * write: wakeup sleeping event listener + * + * reverse_heartbeat: send a heartbeat to the service processor + * read: sleep (interruptible) until the reverse heartbeat fails + * write: wakeup sleeping heartbeat listener + * + * remote_video/width + * remote_video/height + * remote_video/width: control remote display settings + * write: set value + * read: read value + */ + +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include "ibmasm.h" +#include "remote.h" +#include "dot_command.h" + +#define IBMASMFS_MAGIC 0x66726f67 + +static LIST_HEAD(service_processors); + +static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode); +static void ibmasmfs_create_files (struct super_block *sb); +static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent); + + +static struct dentry *ibmasmfs_mount(struct file_system_type *fst, + int flags, const char *name, void *data) +{ + return mount_single(fst, flags, data, ibmasmfs_fill_super); +} + +static const struct super_operations ibmasmfs_s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations; + +static struct file_system_type ibmasmfs_type = { + .owner = THIS_MODULE, + .name = "ibmasmfs", + .mount = ibmasmfs_mount, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("ibmasmfs"); + +static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) +{ + struct inode *root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = IBMASMFS_MAGIC; + sb->s_op = &ibmasmfs_s_ops; + sb->s_time_gran = 1; + + root = ibmasmfs_make_inode (sb, S_IFDIR | 0500); + if (!root) + return -ENOMEM; + + root->i_op = &simple_dir_inode_operations; + root->i_fop = ibmasmfs_dir_ops; + + sb->s_root = d_make_root(root); + if (!sb->s_root) + return -ENOMEM; + + ibmasmfs_create_files(sb); + return 0; +} + +static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + ret->i_ino = get_next_ino(); + ret->i_mode = mode; + ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; + } + return ret; +} + +static struct dentry *ibmasmfs_create_file(struct dentry *parent, + const char *name, + const struct file_operations *fops, + void *data, + int mode) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_fop = fops; + inode->i_private = data; + + d_add(dentry, inode); + return dentry; +} + +static struct dentry *ibmasmfs_create_dir(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = ibmasmfs_make_inode(parent->d_sb, S_IFDIR | 0500); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = ibmasmfs_dir_ops; + + d_add(dentry, inode); + return dentry; +} + +int ibmasmfs_register(void) +{ + return register_filesystem(&ibmasmfs_type); +} + +void ibmasmfs_unregister(void) +{ + unregister_filesystem(&ibmasmfs_type); +} + +void ibmasmfs_add_sp(struct service_processor *sp) +{ + list_add(&sp->node, &service_processors); +} + +/* struct to save state between command file operations */ +struct ibmasmfs_command_data { + struct service_processor *sp; + struct command *command; +}; + +/* struct to save state between event file operations */ +struct ibmasmfs_event_data { + struct service_processor *sp; + struct event_reader reader; + int active; +}; + +/* struct to save state between reverse heartbeat file operations */ +struct ibmasmfs_heartbeat_data { + struct service_processor *sp; + struct reverse_heartbeat heartbeat; + int active; +}; + +static int command_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_command_data *command_data; + + if (!inode->i_private) + return -ENODEV; + + command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL); + if (!command_data) + return -ENOMEM; + + command_data->command = NULL; + command_data->sp = inode->i_private; + file->private_data = command_data; + return 0; +} + +static int command_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + + if (command_data->command) + command_put(command_data->command); + + kfree(command_data); + return 0; +} + +static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + struct command *cmd; + int len; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) + return 0; + if (*offset != 0) + return 0; + + spin_lock_irqsave(&command_data->sp->lock, flags); + cmd = command_data->command; + if (cmd == NULL) { + spin_unlock_irqrestore(&command_data->sp->lock, flags); + return 0; + } + command_data->command = NULL; + spin_unlock_irqrestore(&command_data->sp->lock, flags); + + if (cmd->status != IBMASM_CMD_COMPLETE) { + command_put(cmd); + return -EIO; + } + len = min(count, cmd->buffer_size); + if (copy_to_user(buf, cmd->buffer, len)) { + command_put(cmd); + return -EFAULT; + } + command_put(cmd); + + return len; +} + +static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + struct command *cmd; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) + return 0; + if (*offset != 0) + return 0; + + /* commands are executed sequentially, only one command at a time */ + if (command_data->command) + return -EAGAIN; + + cmd = ibmasm_new_command(command_data->sp, count); + if (!cmd) + return -ENOMEM; + + if (copy_from_user(cmd->buffer, ubuff, count)) { + command_put(cmd); + return -EFAULT; + } + + spin_lock_irqsave(&command_data->sp->lock, flags); + if (command_data->command) { + spin_unlock_irqrestore(&command_data->sp->lock, flags); + command_put(cmd); + return -EAGAIN; + } + command_data->command = cmd; + spin_unlock_irqrestore(&command_data->sp->lock, flags); + + ibmasm_exec_command(command_data->sp, cmd); + ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer)); + + return count; +} + +static int event_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_event_data *event_data; + struct service_processor *sp; + + if (!inode->i_private) + return -ENODEV; + + sp = inode->i_private; + + event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL); + if (!event_data) + return -ENOMEM; + + ibmasm_event_reader_register(sp, &event_data->reader); + + event_data->sp = sp; + event_data->active = 0; + file->private_data = event_data; + return 0; +} + +static int event_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + + ibmasm_event_reader_unregister(event_data->sp, &event_data->reader); + kfree(event_data); + return 0; +} + +static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + struct event_reader *reader = &event_data->reader; + struct service_processor *sp = event_data->sp; + int ret; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_EVENT_MAX_SIZE) + return 0; + if (*offset != 0) + return 0; + + spin_lock_irqsave(&sp->lock, flags); + if (event_data->active) { + spin_unlock_irqrestore(&sp->lock, flags); + return -EBUSY; + } + event_data->active = 1; + spin_unlock_irqrestore(&sp->lock, flags); + + ret = ibmasm_get_next_event(sp, reader); + if (ret <= 0) + goto out; + + if (count < reader->data_size) { + ret = -EINVAL; + goto out; + } + + if (copy_to_user(buf, reader->data, reader->data_size)) { + ret = -EFAULT; + goto out; + } + ret = reader->data_size; + +out: + event_data->active = 0; + return ret; +} + +static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + + if (*offset < 0) + return -EINVAL; + if (count != 1) + return 0; + if (*offset != 0) + return 0; + + ibmasm_cancel_next_event(&event_data->reader); + return 0; +} + +static int r_heartbeat_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_heartbeat_data *rhbeat; + + if (!inode->i_private) + return -ENODEV; + + rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL); + if (!rhbeat) + return -ENOMEM; + + rhbeat->sp = inode->i_private; + rhbeat->active = 0; + ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); + file->private_data = rhbeat; + return 0; +} + +static int r_heartbeat_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + + kfree(rhbeat); + return 0; +} + +static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + unsigned long flags; + int result; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + /* allow only one reverse heartbeat per process */ + spin_lock_irqsave(&rhbeat->sp->lock, flags); + if (rhbeat->active) { + spin_unlock_irqrestore(&rhbeat->sp->lock, flags); + return -EBUSY; + } + rhbeat->active = 1; + spin_unlock_irqrestore(&rhbeat->sp->lock, flags); + + result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); + rhbeat->active = 0; + + return result; +} + +static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + + if (*offset < 0) + return -EINVAL; + if (count != 1) + return 0; + if (*offset != 0) + return 0; + + if (rhbeat->active) + ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat); + + return 1; +} + +static int remote_settings_file_close(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + void __iomem *address = (void __iomem *)file->private_data; + unsigned char *page; + int retval; + int len = 0; + unsigned int value; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + page = (unsigned char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + value = readl(address); + len = sprintf(page, "%d\n", value); + + if (copy_to_user(buf, page, len)) { + retval = -EFAULT; + goto exit; + } + *offset += len; + retval = len; + +exit: + free_page((unsigned long)page); + return retval; +} + +static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) +{ + void __iomem *address = (void __iomem *)file->private_data; + char *buff; + unsigned int value; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + buff = kzalloc (count + 1, GFP_KERNEL); + if (!buff) + return -ENOMEM; + + + if (copy_from_user(buff, ubuff, count)) { + kfree(buff); + return -EFAULT; + } + + value = simple_strtoul(buff, NULL, 10); + writel(value, address); + kfree(buff); + + return count; +} + +static const struct file_operations command_fops = { + .open = command_file_open, + .release = command_file_close, + .read = command_file_read, + .write = command_file_write, + .llseek = generic_file_llseek, +}; + +static const struct file_operations event_fops = { + .open = event_file_open, + .release = event_file_close, + .read = event_file_read, + .write = event_file_write, + .llseek = generic_file_llseek, +}; + +static const struct file_operations r_heartbeat_fops = { + .open = r_heartbeat_file_open, + .release = r_heartbeat_file_close, + .read = r_heartbeat_file_read, + .write = r_heartbeat_file_write, + .llseek = generic_file_llseek, +}; + +static const struct file_operations remote_settings_fops = { + .open = simple_open, + .release = remote_settings_file_close, + .read = remote_settings_file_read, + .write = remote_settings_file_write, + .llseek = generic_file_llseek, +}; + + +static void ibmasmfs_create_files (struct super_block *sb) +{ + struct list_head *entry; + struct service_processor *sp; + + list_for_each(entry, &service_processors) { + struct dentry *dir; + struct dentry *remote_dir; + sp = list_entry(entry, struct service_processor, node); + dir = ibmasmfs_create_dir(sb->s_root, sp->dirname); + if (!dir) + continue; + + ibmasmfs_create_file(dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR); + ibmasmfs_create_file(dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR); + ibmasmfs_create_file(dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR); + + remote_dir = ibmasmfs_create_dir(dir, "remote_video"); + if (!remote_dir) + continue; + + ibmasmfs_create_file(remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR); + ibmasmfs_create_file(remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR); + ibmasmfs_create_file(remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR); + } +} diff --git a/kernel/drivers/misc/ibmasm/lowlevel.c b/kernel/drivers/misc/ibmasm/lowlevel.c new file mode 100644 index 000000000..5319ea261 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/lowlevel.c @@ -0,0 +1,85 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include "ibmasm.h" +#include "lowlevel.h" +#include "i2o.h" +#include "dot_command.h" +#include "remote.h" + +static struct i2o_header header = I2O_HEADER_TEMPLATE; + + +int ibmasm_send_i2o_message(struct service_processor *sp) +{ + u32 mfa; + unsigned int command_size; + struct i2o_message *message; + struct command *command = sp->current_command; + + mfa = get_mfa_inbound(sp->base_address); + if (!mfa) + return 1; + + command_size = get_dot_command_size(command->buffer); + header.message_size = outgoing_message_size(command_size); + + message = get_i2o_message(sp->base_address, mfa); + + memcpy_toio(&message->header, &header, sizeof(struct i2o_header)); + memcpy_toio(&message->data, command->buffer, command_size); + + set_mfa_inbound(sp->base_address, mfa); + + return 0; +} + +irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id) +{ + u32 mfa; + struct service_processor *sp = (struct service_processor *)dev_id; + void __iomem *base_address = sp->base_address; + char tsbuf[32]; + + if (!sp_interrupt_pending(base_address)) + return IRQ_NONE; + + dbg("respond to interrupt at %s\n", get_timestamp(tsbuf)); + + if (mouse_interrupt_pending(sp)) { + ibmasm_handle_mouse_interrupt(sp); + clear_mouse_interrupt(sp); + } + + mfa = get_mfa_outbound(base_address); + if (valid_mfa(mfa)) { + struct i2o_message *msg = get_i2o_message(base_address, mfa); + ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg)); + } else + dbg("didn't get a valid MFA\n"); + + set_mfa_outbound(base_address, mfa); + dbg("finished interrupt at %s\n", get_timestamp(tsbuf)); + + return IRQ_HANDLED; +} diff --git a/kernel/drivers/misc/ibmasm/lowlevel.h b/kernel/drivers/misc/ibmasm/lowlevel.h new file mode 100644 index 000000000..e97848f51 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/lowlevel.h @@ -0,0 +1,137 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +/* Condor service processor specific hardware definitions */ + +#ifndef __IBMASM_CONDOR_H__ +#define __IBMASM_CONDOR_H__ + +#include <asm/io.h> + +#define VENDORID_IBM 0x1014 +#define DEVICEID_RSA 0x010F + +#define GET_MFA_ADDR(x) (x & 0xFFFFFF00) + +#define MAILBOX_FULL(x) (x & 0x00000001) + +#define NO_MFAS_AVAILABLE 0xFFFFFFFF + + +#define INBOUND_QUEUE_PORT 0x40 /* contains address of next free MFA */ +#define OUTBOUND_QUEUE_PORT 0x44 /* contains address of posted MFA */ + +#define SP_INTR_MASK 0x00000008 +#define UART_INTR_MASK 0x00000010 + +#define INTR_STATUS_REGISTER 0x13A0 +#define INTR_CONTROL_REGISTER 0x13A4 + +#define SCOUT_COM_A_BASE 0x0000 +#define SCOUT_COM_B_BASE 0x0100 +#define SCOUT_COM_C_BASE 0x0200 +#define SCOUT_COM_D_BASE 0x0300 + +static inline int sp_interrupt_pending(void __iomem *base_address) +{ + return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER); +} + +static inline int uart_interrupt_pending(void __iomem *base_address) +{ + return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER); +} + +static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask) +{ + void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER; + writel( readl(ctrl_reg) & ~mask, ctrl_reg); +} + +static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask) +{ + void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER; + writel( readl(ctrl_reg) | mask, ctrl_reg); +} + +static inline void enable_sp_interrupts(void __iomem *base_address) +{ + ibmasm_enable_interrupts(base_address, SP_INTR_MASK); +} + +static inline void disable_sp_interrupts(void __iomem *base_address) +{ + ibmasm_disable_interrupts(base_address, SP_INTR_MASK); +} + +static inline void enable_uart_interrupts(void __iomem *base_address) +{ + ibmasm_enable_interrupts(base_address, UART_INTR_MASK); +} + +static inline void disable_uart_interrupts(void __iomem *base_address) +{ + ibmasm_disable_interrupts(base_address, UART_INTR_MASK); +} + +#define valid_mfa(mfa) ( (mfa) != NO_MFAS_AVAILABLE ) + +static inline u32 get_mfa_outbound(void __iomem *base_address) +{ + int retry; + u32 mfa; + + for (retry=0; retry<=10; retry++) { + mfa = readl(base_address + OUTBOUND_QUEUE_PORT); + if (valid_mfa(mfa)) + break; + } + return mfa; +} + +static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa) +{ + writel(mfa, base_address + OUTBOUND_QUEUE_PORT); +} + +static inline u32 get_mfa_inbound(void __iomem *base_address) +{ + u32 mfa = readl(base_address + INBOUND_QUEUE_PORT); + + if (MAILBOX_FULL(mfa)) + return 0; + + return mfa; +} + +static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa) +{ + writel(mfa, base_address + INBOUND_QUEUE_PORT); +} + +static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa) +{ + return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address); +} + +#endif /* __IBMASM_CONDOR_H__ */ diff --git a/kernel/drivers/misc/ibmasm/module.c b/kernel/drivers/misc/ibmasm/module.c new file mode 100644 index 000000000..6b3bf9ab0 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/module.c @@ -0,0 +1,235 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + * This driver is based on code originally written by Pete Reynolds + * and others. + * + */ + +/* + * The ASM device driver does the following things: + * + * 1) When loaded it sends a message to the service processor, + * indicating that an OS is * running. This causes the service processor + * to send periodic heartbeats to the OS. + * + * 2) Answers the periodic heartbeats sent by the service processor. + * Failure to do so would result in system reboot. + * + * 3) Acts as a pass through for dot commands sent from user applications. + * The interface for this is the ibmasmfs file system. + * + * 4) Allows user applications to register for event notification. Events + * are sent to the driver through interrupts. They can be read from user + * space through the ibmasmfs file system. + * + * 5) Allows user space applications to send heartbeats to the service + * processor (aka reverse heartbeats). Again this happens through ibmasmfs. + * + * 6) Handles remote mouse and keyboard event interrupts and makes them + * available to user applications through ibmasmfs. + * + */ + +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/slab.h> +#include "ibmasm.h" +#include "lowlevel.h" +#include "remote.h" + +int ibmasm_debug = 0; +module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off"); + + +static int ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int result; + struct service_processor *sp; + + if ((result = pci_enable_device(pdev))) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + return result; + } + if ((result = pci_request_regions(pdev, DRIVER_NAME))) { + dev_err(&pdev->dev, "Failed to allocate PCI resources\n"); + goto error_resources; + } + /* vnc client won't work without bus-mastering */ + pci_set_master(pdev); + + sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL); + if (sp == NULL) { + dev_err(&pdev->dev, "Failed to allocate memory\n"); + result = -ENOMEM; + goto error_kmalloc; + } + + spin_lock_init(&sp->lock); + INIT_LIST_HEAD(&sp->command_queue); + + pci_set_drvdata(pdev, (void *)sp); + sp->dev = &pdev->dev; + sp->number = pdev->bus->number; + snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number); + snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number); + + if (ibmasm_event_buffer_init(sp)) { + dev_err(sp->dev, "Failed to allocate event buffer\n"); + goto error_eventbuffer; + } + + if (ibmasm_heartbeat_init(sp)) { + dev_err(sp->dev, "Failed to allocate heartbeat command\n"); + goto error_heartbeat; + } + + sp->irq = pdev->irq; + sp->base_address = pci_ioremap_bar(pdev, 0); + if (!sp->base_address) { + dev_err(sp->dev, "Failed to ioremap pci memory\n"); + result = -ENODEV; + goto error_ioremap; + } + + result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp); + if (result) { + dev_err(sp->dev, "Failed to register interrupt handler\n"); + goto error_request_irq; + } + + enable_sp_interrupts(sp->base_address); + + result = ibmasm_init_remote_input_dev(sp); + if (result) { + dev_err(sp->dev, "Failed to initialize remote queue\n"); + goto error_send_message; + } + + result = ibmasm_send_driver_vpd(sp); + if (result) { + dev_err(sp->dev, "Failed to send driver VPD to service processor\n"); + goto error_send_message; + } + result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP); + if (result) { + dev_err(sp->dev, "Failed to send OS state to service processor\n"); + goto error_send_message; + } + ibmasmfs_add_sp(sp); + + ibmasm_register_uart(sp); + + return 0; + +error_send_message: + disable_sp_interrupts(sp->base_address); + ibmasm_free_remote_input_dev(sp); + free_irq(sp->irq, (void *)sp); +error_request_irq: + iounmap(sp->base_address); +error_ioremap: + ibmasm_heartbeat_exit(sp); +error_heartbeat: + ibmasm_event_buffer_exit(sp); +error_eventbuffer: + kfree(sp); +error_kmalloc: + pci_release_regions(pdev); +error_resources: + pci_disable_device(pdev); + + return result; +} + +static void ibmasm_remove_one(struct pci_dev *pdev) +{ + struct service_processor *sp = pci_get_drvdata(pdev); + + dbg("Unregistering UART\n"); + ibmasm_unregister_uart(sp); + dbg("Sending OS down message\n"); + if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN)) + err("failed to get repsonse to 'Send OS State' command\n"); + dbg("Disabling heartbeats\n"); + ibmasm_heartbeat_exit(sp); + dbg("Disabling interrupts\n"); + disable_sp_interrupts(sp->base_address); + dbg("Freeing SP irq\n"); + free_irq(sp->irq, (void *)sp); + dbg("Cleaning up\n"); + ibmasm_free_remote_input_dev(sp); + iounmap(sp->base_address); + ibmasm_event_buffer_exit(sp); + kfree(sp); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_device_id ibmasm_pci_table[] = +{ + { PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) }, + {}, +}; + +static struct pci_driver ibmasm_driver = { + .name = DRIVER_NAME, + .id_table = ibmasm_pci_table, + .probe = ibmasm_init_one, + .remove = ibmasm_remove_one, +}; + +static void __exit ibmasm_exit (void) +{ + ibmasm_unregister_panic_notifier(); + ibmasmfs_unregister(); + pci_unregister_driver(&ibmasm_driver); + info(DRIVER_DESC " version " DRIVER_VERSION " unloaded"); +} + +static int __init ibmasm_init(void) +{ + int result = pci_register_driver(&ibmasm_driver); + if (result) + return result; + + result = ibmasmfs_register(); + if (result) { + pci_unregister_driver(&ibmasm_driver); + err("Failed to register ibmasmfs file system"); + return result; + } + + ibmasm_register_panic_notifier(); + info(DRIVER_DESC " version " DRIVER_VERSION " loaded"); + return 0; +} + +module_init(ibmasm_init); +module_exit(ibmasm_exit); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ibmasm_pci_table); + diff --git a/kernel/drivers/misc/ibmasm/r_heartbeat.c b/kernel/drivers/misc/ibmasm/r_heartbeat.c new file mode 100644 index 000000000..232034f5d --- /dev/null +++ b/kernel/drivers/misc/ibmasm/r_heartbeat.c @@ -0,0 +1,99 @@ + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/sched.h> +#include "ibmasm.h" +#include "dot_command.h" + +/* + * Reverse Heartbeat, i.e. heartbeats sent from the driver to the + * service processor. + * These heartbeats are initiated by user level programs. + */ + +/* the reverse heartbeat dot command */ +#pragma pack(1) +static struct { + struct dot_command_header header; + unsigned char command[3]; +} rhb_dot_cmd = { + .header = { + .type = sp_read, + .command_size = 3, + .data_size = 0, + .status = 0 + }, + .command = { 4, 3, 6 } +}; +#pragma pack() + +void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb) +{ + init_waitqueue_head(&rhb->wait); + rhb->stopped = 0; +} + +/** + * start_reverse_heartbeat + * Loop forever, sending a reverse heartbeat dot command to the service + * processor, then sleeping. The loop comes to an end if the service + * processor fails to respond 3 times or we were interrupted. + */ +int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb) +{ + struct command *cmd; + int times_failed = 0; + int result = 1; + + cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd); + if (!cmd) + return -ENOMEM; + + while (times_failed < 3) { + memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd); + cmd->status = IBMASM_CMD_PENDING; + ibmasm_exec_command(sp, cmd); + ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL); + + if (cmd->status != IBMASM_CMD_COMPLETE) + times_failed++; + + wait_event_interruptible_timeout(rhb->wait, + rhb->stopped, + REVERSE_HEARTBEAT_TIMEOUT * HZ); + + if (signal_pending(current) || rhb->stopped) { + result = -EINTR; + break; + } + } + command_put(cmd); + rhb->stopped = 0; + + return result; +} + +void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb) +{ + rhb->stopped = 1; + wake_up_interruptible(&rhb->wait); +} diff --git a/kernel/drivers/misc/ibmasm/remote.c b/kernel/drivers/misc/ibmasm/remote.c new file mode 100644 index 000000000..477bb43c8 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/remote.c @@ -0,0 +1,282 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Authors: Max Asböck <amax@us.ibm.com> + * Vernon Mauery <vernux@us.ibm.com> + * + */ + +/* Remote mouse and keyboard event handling functions */ + +#include <linux/pci.h> +#include "ibmasm.h" +#include "remote.h" + +#define MOUSE_X_MAX 1600 +#define MOUSE_Y_MAX 1200 + +static const unsigned short xlate_high[XLATE_SIZE] = { + [KEY_SYM_ENTER & 0xff] = KEY_ENTER, + [KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH, + [KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK, + [KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS, + [KEY_SYM_KPDOT & 0xff] = KEY_KPDOT, + [KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS, + [KEY_SYM_KP0 & 0xff] = KEY_KP0, + [KEY_SYM_KP1 & 0xff] = KEY_KP1, + [KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2, + [KEY_SYM_KP3 & 0xff] = KEY_KP3, + [KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4, + [KEY_SYM_KP5 & 0xff] = KEY_KP5, + [KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6, + [KEY_SYM_KP7 & 0xff] = KEY_KP7, + [KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8, + [KEY_SYM_KP9 & 0xff] = KEY_KP9, + [KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE, + [KEY_SYM_TAB & 0xff] = KEY_TAB, + [KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL, + [KEY_SYM_ALT & 0xff] = KEY_LEFTALT, + [KEY_SYM_INSERT & 0xff] = KEY_INSERT, + [KEY_SYM_DELETE & 0xff] = KEY_DELETE, + [KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT, + [KEY_SYM_UARROW & 0xff] = KEY_UP, + [KEY_SYM_DARROW & 0xff] = KEY_DOWN, + [KEY_SYM_LARROW & 0xff] = KEY_LEFT, + [KEY_SYM_RARROW & 0xff] = KEY_RIGHT, + [KEY_SYM_ESCAPE & 0xff] = KEY_ESC, + [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP, + [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN, + [KEY_SYM_HOME & 0xff] = KEY_HOME, + [KEY_SYM_END & 0xff] = KEY_END, + [KEY_SYM_F1 & 0xff] = KEY_F1, + [KEY_SYM_F2 & 0xff] = KEY_F2, + [KEY_SYM_F3 & 0xff] = KEY_F3, + [KEY_SYM_F4 & 0xff] = KEY_F4, + [KEY_SYM_F5 & 0xff] = KEY_F5, + [KEY_SYM_F6 & 0xff] = KEY_F6, + [KEY_SYM_F7 & 0xff] = KEY_F7, + [KEY_SYM_F8 & 0xff] = KEY_F8, + [KEY_SYM_F9 & 0xff] = KEY_F9, + [KEY_SYM_F10 & 0xff] = KEY_F10, + [KEY_SYM_F11 & 0xff] = KEY_F11, + [KEY_SYM_F12 & 0xff] = KEY_F12, + [KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK, + [KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK, + [KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK, +}; + +static const unsigned short xlate[XLATE_SIZE] = { + [NO_KEYCODE] = KEY_RESERVED, + [KEY_SYM_SPACE] = KEY_SPACE, + [KEY_SYM_TILDE] = KEY_GRAVE, [KEY_SYM_BKTIC] = KEY_GRAVE, + [KEY_SYM_ONE] = KEY_1, [KEY_SYM_BANG] = KEY_1, + [KEY_SYM_TWO] = KEY_2, [KEY_SYM_AT] = KEY_2, + [KEY_SYM_THREE] = KEY_3, [KEY_SYM_POUND] = KEY_3, + [KEY_SYM_FOUR] = KEY_4, [KEY_SYM_DOLLAR] = KEY_4, + [KEY_SYM_FIVE] = KEY_5, [KEY_SYM_PERCENT] = KEY_5, + [KEY_SYM_SIX] = KEY_6, [KEY_SYM_CARAT] = KEY_6, + [KEY_SYM_SEVEN] = KEY_7, [KEY_SYM_AMPER] = KEY_7, + [KEY_SYM_EIGHT] = KEY_8, [KEY_SYM_STAR] = KEY_8, + [KEY_SYM_NINE] = KEY_9, [KEY_SYM_LPAREN] = KEY_9, + [KEY_SYM_ZERO] = KEY_0, [KEY_SYM_RPAREN] = KEY_0, + [KEY_SYM_MINUS] = KEY_MINUS, [KEY_SYM_USCORE] = KEY_MINUS, + [KEY_SYM_EQUAL] = KEY_EQUAL, [KEY_SYM_PLUS] = KEY_EQUAL, + [KEY_SYM_LBRKT] = KEY_LEFTBRACE, [KEY_SYM_LCURLY] = KEY_LEFTBRACE, + [KEY_SYM_RBRKT] = KEY_RIGHTBRACE, [KEY_SYM_RCURLY] = KEY_RIGHTBRACE, + [KEY_SYM_SLASH] = KEY_BACKSLASH, [KEY_SYM_PIPE] = KEY_BACKSLASH, + [KEY_SYM_TIC] = KEY_APOSTROPHE, [KEY_SYM_QUOTE] = KEY_APOSTROPHE, + [KEY_SYM_SEMIC] = KEY_SEMICOLON, [KEY_SYM_COLON] = KEY_SEMICOLON, + [KEY_SYM_COMMA] = KEY_COMMA, [KEY_SYM_LT] = KEY_COMMA, + [KEY_SYM_PERIOD] = KEY_DOT, [KEY_SYM_GT] = KEY_DOT, + [KEY_SYM_BSLASH] = KEY_SLASH, [KEY_SYM_QMARK] = KEY_SLASH, + [KEY_SYM_A] = KEY_A, [KEY_SYM_a] = KEY_A, + [KEY_SYM_B] = KEY_B, [KEY_SYM_b] = KEY_B, + [KEY_SYM_C] = KEY_C, [KEY_SYM_c] = KEY_C, + [KEY_SYM_D] = KEY_D, [KEY_SYM_d] = KEY_D, + [KEY_SYM_E] = KEY_E, [KEY_SYM_e] = KEY_E, + [KEY_SYM_F] = KEY_F, [KEY_SYM_f] = KEY_F, + [KEY_SYM_G] = KEY_G, [KEY_SYM_g] = KEY_G, + [KEY_SYM_H] = KEY_H, [KEY_SYM_h] = KEY_H, + [KEY_SYM_I] = KEY_I, [KEY_SYM_i] = KEY_I, + [KEY_SYM_J] = KEY_J, [KEY_SYM_j] = KEY_J, + [KEY_SYM_K] = KEY_K, [KEY_SYM_k] = KEY_K, + [KEY_SYM_L] = KEY_L, [KEY_SYM_l] = KEY_L, + [KEY_SYM_M] = KEY_M, [KEY_SYM_m] = KEY_M, + [KEY_SYM_N] = KEY_N, [KEY_SYM_n] = KEY_N, + [KEY_SYM_O] = KEY_O, [KEY_SYM_o] = KEY_O, + [KEY_SYM_P] = KEY_P, [KEY_SYM_p] = KEY_P, + [KEY_SYM_Q] = KEY_Q, [KEY_SYM_q] = KEY_Q, + [KEY_SYM_R] = KEY_R, [KEY_SYM_r] = KEY_R, + [KEY_SYM_S] = KEY_S, [KEY_SYM_s] = KEY_S, + [KEY_SYM_T] = KEY_T, [KEY_SYM_t] = KEY_T, + [KEY_SYM_U] = KEY_U, [KEY_SYM_u] = KEY_U, + [KEY_SYM_V] = KEY_V, [KEY_SYM_v] = KEY_V, + [KEY_SYM_W] = KEY_W, [KEY_SYM_w] = KEY_W, + [KEY_SYM_X] = KEY_X, [KEY_SYM_x] = KEY_X, + [KEY_SYM_Y] = KEY_Y, [KEY_SYM_y] = KEY_Y, + [KEY_SYM_Z] = KEY_Z, [KEY_SYM_z] = KEY_Z, +}; + +static void print_input(struct remote_input *input) +{ + if (input->type == INPUT_TYPE_MOUSE) { + unsigned char buttons = input->mouse_buttons; + dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n", + input->data.mouse.x, input->data.mouse.y, + (buttons) ? " -- buttons:" : "", + (buttons & REMOTE_BUTTON_LEFT) ? "left " : "", + (buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "", + (buttons & REMOTE_BUTTON_RIGHT) ? "right" : "" + ); + } else { + dbg("remote keypress (code, flag, down):" + "%d (0x%x) [0x%x] [0x%x]\n", + input->data.keyboard.key_code, + input->data.keyboard.key_code, + input->data.keyboard.key_flag, + input->data.keyboard.key_down + ); + } +} + +static void send_mouse_event(struct input_dev *dev, struct remote_input *input) +{ + unsigned char buttons = input->mouse_buttons; + + input_report_abs(dev, ABS_X, input->data.mouse.x); + input_report_abs(dev, ABS_Y, input->data.mouse.y); + input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT); + input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE); + input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT); + input_sync(dev); +} + +static void send_keyboard_event(struct input_dev *dev, + struct remote_input *input) +{ + unsigned int key; + unsigned short code = input->data.keyboard.key_code; + + if (code & 0xff00) + key = xlate_high[code & 0xff]; + else + key = xlate[code]; + input_report_key(dev, key, input->data.keyboard.key_down); + input_sync(dev); +} + +void ibmasm_handle_mouse_interrupt(struct service_processor *sp) +{ + unsigned long reader; + unsigned long writer; + struct remote_input input; + + reader = get_queue_reader(sp); + writer = get_queue_writer(sp); + + while (reader != writer) { + memcpy_fromio(&input, get_queue_entry(sp, reader), + sizeof(struct remote_input)); + + print_input(&input); + if (input.type == INPUT_TYPE_MOUSE) { + send_mouse_event(sp->remote.mouse_dev, &input); + } else if (input.type == INPUT_TYPE_KEYBOARD) { + send_keyboard_event(sp->remote.keybd_dev, &input); + } else + break; + + reader = advance_queue_reader(sp, reader); + writer = get_queue_writer(sp); + } +} + +int ibmasm_init_remote_input_dev(struct service_processor *sp) +{ + /* set up the mouse input device */ + struct input_dev *mouse_dev, *keybd_dev; + struct pci_dev *pdev = to_pci_dev(sp->dev); + int error = -ENOMEM; + int i; + + sp->remote.mouse_dev = mouse_dev = input_allocate_device(); + sp->remote.keybd_dev = keybd_dev = input_allocate_device(); + + if (!mouse_dev || !keybd_dev) + goto err_free_devices; + + mouse_dev->id.bustype = BUS_PCI; + mouse_dev->id.vendor = pdev->vendor; + mouse_dev->id.product = pdev->device; + mouse_dev->id.version = 1; + mouse_dev->dev.parent = sp->dev; + mouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); + mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | + BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE); + set_bit(BTN_TOUCH, mouse_dev->keybit); + mouse_dev->name = "ibmasm RSA I remote mouse"; + input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0); + input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0); + + keybd_dev->id.bustype = BUS_PCI; + keybd_dev->id.vendor = pdev->vendor; + keybd_dev->id.product = pdev->device; + keybd_dev->id.version = 2; + keybd_dev->dev.parent = sp->dev; + keybd_dev->evbit[0] = BIT_MASK(EV_KEY); + keybd_dev->name = "ibmasm RSA I remote keyboard"; + + for (i = 0; i < XLATE_SIZE; i++) { + if (xlate_high[i]) + set_bit(xlate_high[i], keybd_dev->keybit); + if (xlate[i]) + set_bit(xlate[i], keybd_dev->keybit); + } + + error = input_register_device(mouse_dev); + if (error) + goto err_free_devices; + + error = input_register_device(keybd_dev); + if (error) + goto err_unregister_mouse_dev; + + enable_mouse_interrupts(sp); + + printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number); + + return 0; + + err_unregister_mouse_dev: + input_unregister_device(mouse_dev); + mouse_dev = NULL; /* so we don't try to free it again below */ + err_free_devices: + input_free_device(mouse_dev); + input_free_device(keybd_dev); + + return error; +} + +void ibmasm_free_remote_input_dev(struct service_processor *sp) +{ + disable_mouse_interrupts(sp); + input_unregister_device(sp->remote.mouse_dev); + input_unregister_device(sp->remote.keybd_dev); +} + diff --git a/kernel/drivers/misc/ibmasm/remote.h b/kernel/drivers/misc/ibmasm/remote.h new file mode 100644 index 000000000..a7729ef76 --- /dev/null +++ b/kernel/drivers/misc/ibmasm/remote.h @@ -0,0 +1,270 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + * Originally written by Pete Reynolds + */ + +#ifndef _IBMASM_REMOTE_H_ +#define _IBMASM_REMOTE_H_ + +#include <asm/io.h> + +/* pci offsets */ +#define CONDOR_MOUSE_DATA 0x000AC000 +#define CONDOR_MOUSE_ISR_CONTROL 0x00 +#define CONDOR_MOUSE_ISR_STATUS 0x04 +#define CONDOR_MOUSE_Q_READER 0x08 +#define CONDOR_MOUSE_Q_WRITER 0x0C +#define CONDOR_MOUSE_Q_BEGIN 0x10 +#define CONDOR_MOUSE_MAX_X 0x14 +#define CONDOR_MOUSE_MAX_Y 0x18 + +#define CONDOR_INPUT_DESKTOP_INFO 0x1F0 +#define CONDOR_INPUT_DISPLAY_RESX 0x1F4 +#define CONDOR_INPUT_DISPLAY_RESY 0x1F8 +#define CONDOR_INPUT_DISPLAY_BITS 0x1FC +#define CONDOR_OUTPUT_VNC_STATUS 0x200 + +#define CONDOR_MOUSE_INTR_STATUS_MASK 0x00000001 + +#define INPUT_TYPE_MOUSE 0x1 +#define INPUT_TYPE_KEYBOARD 0x2 + + +/* mouse button states received from SP */ +#define REMOTE_DOUBLE_CLICK 0xF0 +#define REMOTE_BUTTON_LEFT 0x01 +#define REMOTE_BUTTON_MIDDLE 0x02 +#define REMOTE_BUTTON_RIGHT 0x04 + +/* size of keysym/keycode translation matricies */ +#define XLATE_SIZE 256 + +struct mouse_input { + unsigned short y; + unsigned short x; +}; + + +struct keyboard_input { + unsigned short key_code; + unsigned char key_flag; + unsigned char key_down; +}; + + + +struct remote_input { + union { + struct mouse_input mouse; + struct keyboard_input keyboard; + } data; + + unsigned char type; + unsigned char pad1; + unsigned char mouse_buttons; + unsigned char pad3; +}; + +#define mouse_addr(sp) (sp->base_address + CONDOR_MOUSE_DATA) +#define display_width(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX) +#define display_height(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY) +#define display_depth(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS) +#define desktop_info(sp) (mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO) +#define vnc_status(sp) (mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS) +#define isr_control(sp) (mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) + +#define mouse_interrupt_pending(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS) +#define clear_mouse_interrupt(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS) +#define enable_mouse_interrupts(sp) writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) +#define disable_mouse_interrupts(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) + +/* remote input queue operations */ +#define REMOTE_QUEUE_SIZE 60 + +#define get_queue_writer(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER) +#define get_queue_reader(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER) +#define set_queue_reader(sp, reader) writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER) + +#define queue_begin (mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN) + +#define get_queue_entry(sp, read_index) \ + ((void*)(queue_begin + read_index * sizeof(struct remote_input))) + +static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader) +{ + reader++; + if (reader == REMOTE_QUEUE_SIZE) + reader = 0; + + set_queue_reader(sp, reader); + return reader; +} + +#define NO_KEYCODE 0 +#define KEY_SYM_BK_SPC 0xFF08 +#define KEY_SYM_TAB 0xFF09 +#define KEY_SYM_ENTER 0xFF0D +#define KEY_SYM_SCR_LOCK 0xFF14 +#define KEY_SYM_ESCAPE 0xFF1B +#define KEY_SYM_HOME 0xFF50 +#define KEY_SYM_LARROW 0xFF51 +#define KEY_SYM_UARROW 0xFF52 +#define KEY_SYM_RARROW 0xFF53 +#define KEY_SYM_DARROW 0xFF54 +#define KEY_SYM_PAGEUP 0xFF55 +#define KEY_SYM_PAGEDOWN 0xFF56 +#define KEY_SYM_END 0xFF57 +#define KEY_SYM_INSERT 0xFF63 +#define KEY_SYM_NUM_LOCK 0xFF7F +#define KEY_SYM_KPSTAR 0xFFAA +#define KEY_SYM_KPPLUS 0xFFAB +#define KEY_SYM_KPMINUS 0xFFAD +#define KEY_SYM_KPDOT 0xFFAE +#define KEY_SYM_KPSLASH 0xFFAF +#define KEY_SYM_KPRIGHT 0xFF96 +#define KEY_SYM_KPUP 0xFF97 +#define KEY_SYM_KPLEFT 0xFF98 +#define KEY_SYM_KPDOWN 0xFF99 +#define KEY_SYM_KP0 0xFFB0 +#define KEY_SYM_KP1 0xFFB1 +#define KEY_SYM_KP2 0xFFB2 +#define KEY_SYM_KP3 0xFFB3 +#define KEY_SYM_KP4 0xFFB4 +#define KEY_SYM_KP5 0xFFB5 +#define KEY_SYM_KP6 0xFFB6 +#define KEY_SYM_KP7 0xFFB7 +#define KEY_SYM_KP8 0xFFB8 +#define KEY_SYM_KP9 0xFFB9 +#define KEY_SYM_F1 0xFFBE // 1B 5B 5B 41 +#define KEY_SYM_F2 0xFFBF // 1B 5B 5B 42 +#define KEY_SYM_F3 0xFFC0 // 1B 5B 5B 43 +#define KEY_SYM_F4 0xFFC1 // 1B 5B 5B 44 +#define KEY_SYM_F5 0xFFC2 // 1B 5B 5B 45 +#define KEY_SYM_F6 0xFFC3 // 1B 5B 31 37 7E +#define KEY_SYM_F7 0xFFC4 // 1B 5B 31 38 7E +#define KEY_SYM_F8 0xFFC5 // 1B 5B 31 39 7E +#define KEY_SYM_F9 0xFFC6 // 1B 5B 32 30 7E +#define KEY_SYM_F10 0xFFC7 // 1B 5B 32 31 7E +#define KEY_SYM_F11 0xFFC8 // 1B 5B 32 33 7E +#define KEY_SYM_F12 0xFFC9 // 1B 5B 32 34 7E +#define KEY_SYM_SHIFT 0xFFE1 +#define KEY_SYM_CTRL 0xFFE3 +#define KEY_SYM_ALT 0xFFE9 +#define KEY_SYM_CAP_LOCK 0xFFE5 +#define KEY_SYM_DELETE 0xFFFF +#define KEY_SYM_TILDE 0x60 +#define KEY_SYM_BKTIC 0x7E +#define KEY_SYM_ONE 0x31 +#define KEY_SYM_BANG 0x21 +#define KEY_SYM_TWO 0x32 +#define KEY_SYM_AT 0x40 +#define KEY_SYM_THREE 0x33 +#define KEY_SYM_POUND 0x23 +#define KEY_SYM_FOUR 0x34 +#define KEY_SYM_DOLLAR 0x24 +#define KEY_SYM_FIVE 0x35 +#define KEY_SYM_PERCENT 0x25 +#define KEY_SYM_SIX 0x36 +#define KEY_SYM_CARAT 0x5E +#define KEY_SYM_SEVEN 0x37 +#define KEY_SYM_AMPER 0x26 +#define KEY_SYM_EIGHT 0x38 +#define KEY_SYM_STAR 0x2A +#define KEY_SYM_NINE 0x39 +#define KEY_SYM_LPAREN 0x28 +#define KEY_SYM_ZERO 0x30 +#define KEY_SYM_RPAREN 0x29 +#define KEY_SYM_MINUS 0x2D +#define KEY_SYM_USCORE 0x5F +#define KEY_SYM_EQUAL 0x2B +#define KEY_SYM_PLUS 0x3D +#define KEY_SYM_LBRKT 0x5B +#define KEY_SYM_LCURLY 0x7B +#define KEY_SYM_RBRKT 0x5D +#define KEY_SYM_RCURLY 0x7D +#define KEY_SYM_SLASH 0x5C +#define KEY_SYM_PIPE 0x7C +#define KEY_SYM_TIC 0x27 +#define KEY_SYM_QUOTE 0x22 +#define KEY_SYM_SEMIC 0x3B +#define KEY_SYM_COLON 0x3A +#define KEY_SYM_COMMA 0x2C +#define KEY_SYM_LT 0x3C +#define KEY_SYM_PERIOD 0x2E +#define KEY_SYM_GT 0x3E +#define KEY_SYM_BSLASH 0x2F +#define KEY_SYM_QMARK 0x3F +#define KEY_SYM_A 0x41 +#define KEY_SYM_B 0x42 +#define KEY_SYM_C 0x43 +#define KEY_SYM_D 0x44 +#define KEY_SYM_E 0x45 +#define KEY_SYM_F 0x46 +#define KEY_SYM_G 0x47 +#define KEY_SYM_H 0x48 +#define KEY_SYM_I 0x49 +#define KEY_SYM_J 0x4A +#define KEY_SYM_K 0x4B +#define KEY_SYM_L 0x4C +#define KEY_SYM_M 0x4D +#define KEY_SYM_N 0x4E +#define KEY_SYM_O 0x4F +#define KEY_SYM_P 0x50 +#define KEY_SYM_Q 0x51 +#define KEY_SYM_R 0x52 +#define KEY_SYM_S 0x53 +#define KEY_SYM_T 0x54 +#define KEY_SYM_U 0x55 +#define KEY_SYM_V 0x56 +#define KEY_SYM_W 0x57 +#define KEY_SYM_X 0x58 +#define KEY_SYM_Y 0x59 +#define KEY_SYM_Z 0x5A +#define KEY_SYM_a 0x61 +#define KEY_SYM_b 0x62 +#define KEY_SYM_c 0x63 +#define KEY_SYM_d 0x64 +#define KEY_SYM_e 0x65 +#define KEY_SYM_f 0x66 +#define KEY_SYM_g 0x67 +#define KEY_SYM_h 0x68 +#define KEY_SYM_i 0x69 +#define KEY_SYM_j 0x6A +#define KEY_SYM_k 0x6B +#define KEY_SYM_l 0x6C +#define KEY_SYM_m 0x6D +#define KEY_SYM_n 0x6E +#define KEY_SYM_o 0x6F +#define KEY_SYM_p 0x70 +#define KEY_SYM_q 0x71 +#define KEY_SYM_r 0x72 +#define KEY_SYM_s 0x73 +#define KEY_SYM_t 0x74 +#define KEY_SYM_u 0x75 +#define KEY_SYM_v 0x76 +#define KEY_SYM_w 0x77 +#define KEY_SYM_x 0x78 +#define KEY_SYM_y 0x79 +#define KEY_SYM_z 0x7A +#define KEY_SYM_SPACE 0x20 +#endif /* _IBMASM_REMOTE_H_ */ diff --git a/kernel/drivers/misc/ibmasm/uart.c b/kernel/drivers/misc/ibmasm/uart.c new file mode 100644 index 000000000..01e2b0d7e --- /dev/null +++ b/kernel/drivers/misc/ibmasm/uart.c @@ -0,0 +1,72 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck <amax@us.ibm.com> + * + */ + +#include <linux/termios.h> +#include <linux/tty.h> +#include <linux/serial_core.h> +#include <linux/serial_reg.h> +#include <linux/serial_8250.h> +#include "ibmasm.h" +#include "lowlevel.h" + + +void ibmasm_register_uart(struct service_processor *sp) +{ + struct uart_8250_port uart; + void __iomem *iomem_base; + + iomem_base = sp->base_address + SCOUT_COM_B_BASE; + + /* read the uart scratch register to determine if the UART + * is dedicated to the service processor or if the OS can use it + */ + if (0 == readl(iomem_base + UART_SCR)) { + dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n"); + sp->serial_line = -1; + return; + } + + memset(&uart, 0, sizeof(uart)); + uart.port.irq = sp->irq; + uart.port.uartclk = 3686400; + uart.port.flags = UPF_SHARE_IRQ; + uart.port.iotype = UPIO_MEM; + uart.port.membase = iomem_base; + + sp->serial_line = serial8250_register_8250_port(&uart); + if (sp->serial_line < 0) { + dev_err(sp->dev, "Failed to register serial port\n"); + return; + } + enable_uart_interrupts(sp->base_address); +} + +void ibmasm_unregister_uart(struct service_processor *sp) +{ + if (sp->serial_line < 0) + return; + + disable_uart_interrupts(sp->base_address); + serial8250_unregister_port(sp->serial_line); +} diff --git a/kernel/drivers/misc/ics932s401.c b/kernel/drivers/misc/ics932s401.c new file mode 100644 index 000000000..28f51e01f --- /dev/null +++ b/kernel/drivers/misc/ics932s401.c @@ -0,0 +1,495 @@ +/* + * A driver for the Integrated Circuits ICS932S401 + * Copyright (C) 2008 IBM + * + * Author: Darrick J. Wong <darrick.wong@oracle.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/err.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/log2.h> +#include <linux/slab.h> + +/* Addresses to scan */ +static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END }; + +/* ICS932S401 registers */ +#define ICS932S401_REG_CFG2 0x01 +#define ICS932S401_CFG1_SPREAD 0x01 +#define ICS932S401_REG_CFG7 0x06 +#define ICS932S401_FS_MASK 0x07 +#define ICS932S401_REG_VENDOR_REV 0x07 +#define ICS932S401_VENDOR 1 +#define ICS932S401_VENDOR_MASK 0x0F +#define ICS932S401_REV 4 +#define ICS932S401_REV_SHIFT 4 +#define ICS932S401_REG_DEVICE 0x09 +#define ICS932S401_DEVICE 11 +#define ICS932S401_REG_CTRL 0x0A +#define ICS932S401_MN_ENABLED 0x80 +#define ICS932S401_CPU_ALT 0x04 +#define ICS932S401_SRC_ALT 0x08 +#define ICS932S401_REG_CPU_M_CTRL 0x0B +#define ICS932S401_M_MASK 0x3F +#define ICS932S401_REG_CPU_N_CTRL 0x0C +#define ICS932S401_REG_CPU_SPREAD1 0x0D +#define ICS932S401_REG_CPU_SPREAD2 0x0E +#define ICS932S401_SPREAD_MASK 0x7FFF +#define ICS932S401_REG_SRC_M_CTRL 0x0F +#define ICS932S401_REG_SRC_N_CTRL 0x10 +#define ICS932S401_REG_SRC_SPREAD1 0x11 +#define ICS932S401_REG_SRC_SPREAD2 0x12 +#define ICS932S401_REG_CPU_DIVISOR 0x13 +#define ICS932S401_CPU_DIVISOR_SHIFT 4 +#define ICS932S401_REG_PCISRC_DIVISOR 0x14 +#define ICS932S401_SRC_DIVISOR_MASK 0x0F +#define ICS932S401_PCI_DIVISOR_SHIFT 4 + +/* Base clock is 14.318MHz */ +#define BASE_CLOCK 14318 + +#define NUM_REGS 21 +#define NUM_MIRRORED_REGS 15 + +static int regs_to_copy[NUM_MIRRORED_REGS] = { + ICS932S401_REG_CFG2, + ICS932S401_REG_CFG7, + ICS932S401_REG_VENDOR_REV, + ICS932S401_REG_DEVICE, + ICS932S401_REG_CTRL, + ICS932S401_REG_CPU_M_CTRL, + ICS932S401_REG_CPU_N_CTRL, + ICS932S401_REG_CPU_SPREAD1, + ICS932S401_REG_CPU_SPREAD2, + ICS932S401_REG_SRC_M_CTRL, + ICS932S401_REG_SRC_N_CTRL, + ICS932S401_REG_SRC_SPREAD1, + ICS932S401_REG_SRC_SPREAD2, + ICS932S401_REG_CPU_DIVISOR, + ICS932S401_REG_PCISRC_DIVISOR, +}; + +/* How often do we reread sensors values? (In jiffies) */ +#define SENSOR_REFRESH_INTERVAL (2 * HZ) + +/* How often do we reread sensor limit values? (In jiffies) */ +#define LIMIT_REFRESH_INTERVAL (60 * HZ) + +struct ics932s401_data { + struct attribute_group attrs; + struct mutex lock; + char sensors_valid; + unsigned long sensors_last_updated; /* In jiffies */ + + u8 regs[NUM_REGS]; +}; + +static int ics932s401_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int ics932s401_detect(struct i2c_client *client, + struct i2c_board_info *info); +static int ics932s401_remove(struct i2c_client *client); + +static const struct i2c_device_id ics932s401_id[] = { + { "ics932s401", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ics932s401_id); + +static struct i2c_driver ics932s401_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "ics932s401", + }, + .probe = ics932s401_probe, + .remove = ics932s401_remove, + .id_table = ics932s401_id, + .detect = ics932s401_detect, + .address_list = normal_i2c, +}; + +static struct ics932s401_data *ics932s401_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ics932s401_data *data = i2c_get_clientdata(client); + unsigned long local_jiffies = jiffies; + int i, temp; + + mutex_lock(&data->lock); + if (time_before(local_jiffies, data->sensors_last_updated + + SENSOR_REFRESH_INTERVAL) + && data->sensors_valid) + goto out; + + /* + * Each register must be read as a word and then right shifted 8 bits. + * Not really sure why this is; setting the "byte count programming" + * register to 1 does not fix this problem. + */ + for (i = 0; i < NUM_MIRRORED_REGS; i++) { + temp = i2c_smbus_read_word_data(client, regs_to_copy[i]); + data->regs[regs_to_copy[i]] = temp >> 8; + } + + data->sensors_last_updated = local_jiffies; + data->sensors_valid = 1; + +out: + mutex_unlock(&data->lock); + return data; +} + +static ssize_t show_spread_enabled(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD) + return sprintf(buf, "1\n"); + + return sprintf(buf, "0\n"); +} + +/* bit to cpu khz map */ +static const int fs_speeds[] = { + 266666, + 133333, + 200000, + 166666, + 333333, + 100000, + 400000, + 0, +}; + +/* clock divisor map */ +static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16, + 24, 40, 120}; + +/* Calculate CPU frequency from the M/N registers. */ +static int calculate_cpu_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_CPU_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >> + ICS932S401_CPU_DIVISOR_SHIFT]; + + return freq; +} + +static ssize_t show_cpu_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_cpu_freq(data)); +} + +static ssize_t show_cpu_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_cpu_freq(data); + else { + /* Freq is neatly wrapped up for us */ + int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK; + freq = fs_speeds[fid]; + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) { + switch (freq) { + case 166666: + freq = 160000; + break; + case 333333: + freq = 320000; + break; + } + } + } + + return sprintf(buf, "%d\n", freq); +} + +/* Calculate SRC frequency from the M/N registers. */ +static int calculate_src_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_SRC_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] & + ICS932S401_SRC_DIVISOR_MASK]; + + return freq; +} + +static ssize_t show_src_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_src_freq(data)); +} + +static ssize_t show_src_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_src_freq(data); + else + /* Freq is neatly wrapped up for us */ + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT && + data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT) + freq = 96000; + else + freq = 100000; + + return sprintf(buf, "%d\n", freq); +} + +/* Calculate PCI frequency from the SRC M/N registers. */ +static int calculate_pci_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_SRC_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >> + ICS932S401_PCI_DIVISOR_SHIFT]; + + return freq; +} + +static ssize_t show_pci_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_pci_freq(data)); +} + +static ssize_t show_pci_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_pci_freq(data); + else + freq = 33333; + + return sprintf(buf, "%d\n", freq); +} + +static ssize_t show_value(struct device *dev, + struct device_attribute *devattr, + char *buf); + +static ssize_t show_spread(struct device *dev, + struct device_attribute *devattr, + char *buf); + +static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL); +static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL); +static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL); +static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL); +static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL); +static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL); +static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL); +static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL); +static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL); +static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL); +static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL); + +static struct attribute *ics932s401_attr[] = +{ + &dev_attr_spread_enabled.attr, + &dev_attr_cpu_clock_selection.attr, + &dev_attr_cpu_clock.attr, + &dev_attr_src_clock_selection.attr, + &dev_attr_src_clock.attr, + &dev_attr_pci_clock_selection.attr, + &dev_attr_pci_clock.attr, + &dev_attr_usb_clock.attr, + &dev_attr_ref_clock.attr, + &dev_attr_cpu_spread.attr, + &dev_attr_src_spread.attr, + NULL +}; + +static ssize_t show_value(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + int x; + + if (devattr == &dev_attr_usb_clock) + x = 48000; + else if (devattr == &dev_attr_ref_clock) + x = BASE_CLOCK; + else + BUG(); + + return sprintf(buf, "%d\n", x); +} + +static ssize_t show_spread(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int reg; + unsigned long val; + + if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)) + return sprintf(buf, "0%%\n"); + + if (devattr == &dev_attr_src_spread) + reg = ICS932S401_REG_SRC_SPREAD1; + else if (devattr == &dev_attr_cpu_spread) + reg = ICS932S401_REG_CPU_SPREAD1; + else + BUG(); + + val = data->regs[reg] | (data->regs[reg + 1] << 8); + val &= ICS932S401_SPREAD_MASK; + + /* Scale 0..2^14 to -0.5. */ + val = 500000 * val / 16384; + return sprintf(buf, "-0.%lu%%\n", val); +} + +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int ics932s401_detect(struct i2c_client *client, + struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + int vendor, device, revision; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + + vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV); + vendor >>= 8; + revision = vendor >> ICS932S401_REV_SHIFT; + vendor &= ICS932S401_VENDOR_MASK; + if (vendor != ICS932S401_VENDOR) + return -ENODEV; + + device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE); + device >>= 8; + if (device != ICS932S401_DEVICE) + return -ENODEV; + + if (revision != ICS932S401_REV) + dev_info(&adapter->dev, "Unknown revision %d\n", revision); + + strlcpy(info->type, "ics932s401", I2C_NAME_SIZE); + + return 0; +} + +static int ics932s401_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ics932s401_data *data; + int err; + + data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + dev_info(&client->dev, "%s chip found\n", client->name); + + /* Register sysfs hooks */ + data->attrs.attrs = ics932s401_attr; + err = sysfs_create_group(&client->dev.kobj, &data->attrs); + if (err) + goto exit_free; + + return 0; + +exit_free: + kfree(data); +exit: + return err; +} + +static int ics932s401_remove(struct i2c_client *client) +{ + struct ics932s401_data *data = i2c_get_clientdata(client); + + sysfs_remove_group(&client->dev.kobj, &data->attrs); + kfree(data); + return 0; +} + +module_i2c_driver(ics932s401_driver); + +MODULE_AUTHOR("Darrick J. Wong <darrick.wong@oracle.com>"); +MODULE_DESCRIPTION("ICS932S401 driver"); +MODULE_LICENSE("GPL"); + +/* IBM IntelliStation Z30 */ +MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*"); +MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*"); + +/* IBM x3650/x3550 */ +MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*"); +MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*"); diff --git a/kernel/drivers/misc/ioc4.c b/kernel/drivers/misc/ioc4.c new file mode 100644 index 000000000..8758d033d --- /dev/null +++ b/kernel/drivers/misc/ioc4.c @@ -0,0 +1,500 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* This file contains the master driver module for use by SGI IOC4 subdrivers. + * + * It allocates any resources shared between multiple subdevices, and + * provides accessor functions (where needed) and the like for those + * resources. It also provides a mechanism for the subdevice modules + * to support loading and unloading. + * + * Non-shared resources (e.g. external interrupt A_INT_OUT register page + * alias, serial port and UART registers) are handled by the subdevice + * modules themselves. + * + * This is all necessary because IOC4 is not implemented as a multi-function + * PCI device, but an amalgamation of disparate registers for several + * types of device (ATA, serial, external interrupts). The normal + * resource management in the kernel doesn't have quite the right interfaces + * to handle this situation (e.g. multiple modules can't claim the same + * PCI ID), thus this IOC4 master module. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/ioc4.h> +#include <linux/ktime.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/time.h> +#include <asm/io.h> + +/*************** + * Definitions * + ***************/ + +/* Tweakable values */ + +/* PCI bus speed detection/calibration */ +#define IOC4_CALIBRATE_COUNT 63 /* Calibration cycle period */ +#define IOC4_CALIBRATE_CYCLES 256 /* Average over this many cycles */ +#define IOC4_CALIBRATE_DISCARD 2 /* Discard first few cycles */ +#define IOC4_CALIBRATE_LOW_MHZ 25 /* Lower bound on bus speed sanity */ +#define IOC4_CALIBRATE_HIGH_MHZ 75 /* Upper bound on bus speed sanity */ +#define IOC4_CALIBRATE_DEFAULT_MHZ 66 /* Assumed if sanity check fails */ + +/************************ + * Submodule management * + ************************/ + +static DEFINE_MUTEX(ioc4_mutex); + +static LIST_HEAD(ioc4_devices); +static LIST_HEAD(ioc4_submodules); + +/* Register an IOC4 submodule */ +int +ioc4_register_submodule(struct ioc4_submodule *is) +{ + struct ioc4_driver_data *idd; + + mutex_lock(&ioc4_mutex); + list_add(&is->is_list, &ioc4_submodules); + + /* Initialize submodule for each IOC4 */ + if (!is->is_probe) + goto out; + + list_for_each_entry(idd, &ioc4_devices, idd_list) { + if (is->is_probe(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule %s probe failed " + "for pci_dev %s", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + out: + mutex_unlock(&ioc4_mutex); + return 0; +} + +/* Unregister an IOC4 submodule */ +void +ioc4_unregister_submodule(struct ioc4_submodule *is) +{ + struct ioc4_driver_data *idd; + + mutex_lock(&ioc4_mutex); + list_del(&is->is_list); + + /* Remove submodule for each IOC4 */ + if (!is->is_remove) + goto out; + + list_for_each_entry(idd, &ioc4_devices, idd_list) { + if (is->is_remove(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule %s remove failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + out: + mutex_unlock(&ioc4_mutex); +} + +/********************* + * Device management * + *********************/ + +#define IOC4_CALIBRATE_LOW_LIMIT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ) +#define IOC4_CALIBRATE_HIGH_LIMIT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ) +#define IOC4_CALIBRATE_DEFAULT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ) + +#define IOC4_CALIBRATE_END \ + (IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD) + +#define IOC4_INT_OUT_MODE_TOGGLE 0x7 /* Toggle INT_OUT every COUNT+1 ticks */ + +/* Determines external interrupt output clock period of the PCI bus an + * IOC4 is attached to. This value can be used to determine the PCI + * bus speed. + * + * IOC4 has a design feature that various internal timers are derived from + * the PCI bus clock. This causes IOC4 device drivers to need to take the + * bus speed into account when setting various register values (e.g. INT_OUT + * register COUNT field, UART divisors, etc). Since this information is + * needed by several subdrivers, it is determined by the main IOC4 driver, + * even though the following code utilizes external interrupt registers + * to perform the speed calculation. + */ +static void +ioc4_clock_calibrate(struct ioc4_driver_data *idd) +{ + union ioc4_int_out int_out; + union ioc4_gpcr gpcr; + unsigned int state, last_state; + uint64_t start, end, period; + unsigned int count; + + /* Enable output */ + gpcr.raw = 0; + gpcr.fields.dir = IOC4_GPCR_DIR_0; + gpcr.fields.int_out_en = 1; + writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw); + + /* Reset to power-on state */ + writel(0, &idd->idd_misc_regs->int_out.raw); + mmiowb(); + + /* Set up square wave */ + int_out.raw = 0; + int_out.fields.count = IOC4_CALIBRATE_COUNT; + int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE; + int_out.fields.diag = 0; + writel(int_out.raw, &idd->idd_misc_regs->int_out.raw); + mmiowb(); + + /* Check square wave period averaged over some number of cycles */ + start = ktime_get_ns(); + state = 1; /* make sure the first read isn't a rising edge */ + for (count = 0; count <= IOC4_CALIBRATE_END; count++) { + do { /* wait for a rising edge */ + last_state = state; + int_out.raw = readl(&idd->idd_misc_regs->int_out.raw); + state = int_out.fields.int_out; + } while (last_state || !state); + + /* discard the first few cycles */ + if (count == IOC4_CALIBRATE_DISCARD) + start = ktime_get_ns(); + } + end = ktime_get_ns(); + + /* Calculation rearranged to preserve intermediate precision. + * Logically: + * 1. "end - start" gives us the measurement period over all + * the square wave cycles. + * 2. Divide by number of square wave cycles to get the period + * of a square wave cycle. + * 3. Divide by 2*(int_out.fields.count+1), which is the formula + * by which the IOC4 generates the square wave, to get the + * period of an IOC4 INT_OUT count. + */ + period = (end - start) / + (IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1)); + + /* Bounds check the result. */ + if (period > IOC4_CALIBRATE_LOW_LIMIT || + period < IOC4_CALIBRATE_HIGH_LIMIT) { + printk(KERN_INFO + "IOC4 %s: Clock calibration failed. Assuming" + "PCI clock is %d ns.\n", + pci_name(idd->idd_pdev), + IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR); + period = IOC4_CALIBRATE_DEFAULT; + } else { + u64 ns = period; + + do_div(ns, IOC4_EXTINT_COUNT_DIVISOR); + printk(KERN_DEBUG + "IOC4 %s: PCI clock is %llu ns.\n", + pci_name(idd->idd_pdev), (unsigned long long)ns); + } + + /* Remember results. We store the extint clock period rather + * than the PCI clock period so that greater precision is + * retained. Divide by IOC4_EXTINT_COUNT_DIVISOR to get + * PCI clock period. + */ + idd->count_period = period; +} + +/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT. + * Each brings out different combinations of IOC4 signals, thus. + * the IOC4 subdrivers need to know to which we're attached. + * + * We look for the presence of a SCSI (IO9) or SATA (IO10) controller + * on the same PCI bus at slot number 3 to differentiate IO9 from IO10. + * If neither is present, it's a PCI-RT. + */ +static unsigned int +ioc4_variant(struct ioc4_driver_data *idd) +{ + struct pci_dev *pdev = NULL; + int found = 0; + + /* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */ + do { + pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC, + PCI_DEVICE_ID_QLOGIC_ISP12160, pdev); + if (pdev && + idd->idd_pdev->bus->number == pdev->bus->number && + 3 == PCI_SLOT(pdev->devfn)) + found = 1; + } while (pdev && !found); + if (NULL != pdev) { + pci_dev_put(pdev); + return IOC4_VARIANT_IO9; + } + + /* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */ + pdev = NULL; + do { + pdev = pci_get_device(PCI_VENDOR_ID_VITESSE, + PCI_DEVICE_ID_VITESSE_VSC7174, pdev); + if (pdev && + idd->idd_pdev->bus->number == pdev->bus->number && + 3 == PCI_SLOT(pdev->devfn)) + found = 1; + } while (pdev && !found); + if (NULL != pdev) { + pci_dev_put(pdev); + return IOC4_VARIANT_IO10; + } + + /* PCI-RT: No SCSI/SATA controller will be present */ + return IOC4_VARIANT_PCI_RT; +} + +static void +ioc4_load_modules(struct work_struct *work) +{ + request_module("sgiioc4"); +} + +static DECLARE_WORK(ioc4_load_modules_work, ioc4_load_modules); + +/* Adds a new instance of an IOC4 card */ +static int +ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) +{ + struct ioc4_driver_data *idd; + struct ioc4_submodule *is; + uint32_t pcmd; + int ret; + + /* Enable IOC4 and take ownership of it */ + if ((ret = pci_enable_device(pdev))) { + printk(KERN_WARNING + "%s: Failed to enable IOC4 device for pci_dev %s.\n", + __func__, pci_name(pdev)); + goto out; + } + pci_set_master(pdev); + + /* Set up per-IOC4 data */ + idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL); + if (!idd) { + printk(KERN_WARNING + "%s: Failed to allocate IOC4 data for pci_dev %s.\n", + __func__, pci_name(pdev)); + ret = -ENODEV; + goto out_idd; + } + idd->idd_pdev = pdev; + idd->idd_pci_id = pci_id; + + /* Map IOC4 misc registers. These are shared between subdevices + * so the main IOC4 module manages them. + */ + idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0); + if (!idd->idd_bar0) { + printk(KERN_WARNING + "%s: Unable to find IOC4 misc resource " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_pci; + } + if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs), + "ioc4_misc")) { + printk(KERN_WARNING + "%s: Unable to request IOC4 misc region " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_pci; + } + idd->idd_misc_regs = ioremap(idd->idd_bar0, + sizeof(struct ioc4_misc_regs)); + if (!idd->idd_misc_regs) { + printk(KERN_WARNING + "%s: Unable to remap IOC4 misc region " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_misc_region; + } + + /* Failsafe portion of per-IOC4 initialization */ + + /* Detect card variant */ + idd->idd_variant = ioc4_variant(idd); + printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev), + idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" : + idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" : + idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown"); + + /* Initialize IOC4 */ + pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd); + pci_write_config_dword(idd->idd_pdev, PCI_COMMAND, + pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + + /* Determine PCI clock */ + ioc4_clock_calibrate(idd); + + /* Disable/clear all interrupts. Need to do this here lest + * one submodule request the shared IOC4 IRQ, but interrupt + * is generated by a different subdevice. + */ + /* Disable */ + writel(~0, &idd->idd_misc_regs->other_iec.raw); + writel(~0, &idd->idd_misc_regs->sio_iec); + /* Clear (i.e. acknowledge) */ + writel(~0, &idd->idd_misc_regs->other_ir.raw); + writel(~0, &idd->idd_misc_regs->sio_ir); + + /* Track PCI-device specific data */ + idd->idd_serial_data = NULL; + pci_set_drvdata(idd->idd_pdev, idd); + + mutex_lock(&ioc4_mutex); + list_add_tail(&idd->idd_list, &ioc4_devices); + + /* Add this IOC4 to all submodules */ + list_for_each_entry(is, &ioc4_submodules, is_list) { + if (is->is_probe && is->is_probe(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule 0x%s probe failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + mutex_unlock(&ioc4_mutex); + + /* Request sgiioc4 IDE driver on boards that bring that functionality + * off of IOC4. The root filesystem may be hosted on a drive connected + * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it + * won't be picked up by modprobes due to the ioc4 module owning the + * PCI device. + */ + if (idd->idd_variant != IOC4_VARIANT_PCI_RT) { + /* Request the module from a work procedure as the modprobe + * goes out to a userland helper and that will hang if done + * directly from ioc4_probe(). + */ + printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n"); + schedule_work(&ioc4_load_modules_work); + } + + return 0; + +out_misc_region: + release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); +out_pci: + kfree(idd); +out_idd: + pci_disable_device(pdev); +out: + return ret; +} + +/* Removes a particular instance of an IOC4 card. */ +static void +ioc4_remove(struct pci_dev *pdev) +{ + struct ioc4_submodule *is; + struct ioc4_driver_data *idd; + + idd = pci_get_drvdata(pdev); + + /* Remove this IOC4 from all submodules */ + mutex_lock(&ioc4_mutex); + list_for_each_entry(is, &ioc4_submodules, is_list) { + if (is->is_remove && is->is_remove(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule 0x%s remove failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + mutex_unlock(&ioc4_mutex); + + /* Release resources */ + iounmap(idd->idd_misc_regs); + if (!idd->idd_bar0) { + printk(KERN_WARNING + "%s: Unable to get IOC4 misc mapping for pci_dev %s. " + "Device removal may be incomplete.\n", + __func__, pci_name(idd->idd_pdev)); + } + release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); + + /* Disable IOC4 and relinquish */ + pci_disable_device(pdev); + + /* Remove and free driver data */ + mutex_lock(&ioc4_mutex); + list_del(&idd->idd_list); + mutex_unlock(&ioc4_mutex); + kfree(idd); +} + +static struct pci_device_id ioc4_id_table[] = { + {PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID, + PCI_ANY_ID, 0x0b4000, 0xFFFFFF}, + {0} +}; + +static struct pci_driver ioc4_driver = { + .name = "IOC4", + .id_table = ioc4_id_table, + .probe = ioc4_probe, + .remove = ioc4_remove, +}; + +MODULE_DEVICE_TABLE(pci, ioc4_id_table); + +/********************* + * Module management * + *********************/ + +/* Module load */ +static int __init +ioc4_init(void) +{ + return pci_register_driver(&ioc4_driver); +} + +/* Module unload */ +static void __exit +ioc4_exit(void) +{ + /* Ensure ioc4_load_modules() has completed before exiting */ + flush_work(&ioc4_load_modules_work); + pci_unregister_driver(&ioc4_driver); +} + +module_init(ioc4_init); +module_exit(ioc4_exit); + +MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>"); +MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card"); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(ioc4_register_submodule); +EXPORT_SYMBOL(ioc4_unregister_submodule); diff --git a/kernel/drivers/misc/isl29003.c b/kernel/drivers/misc/isl29003.c new file mode 100644 index 000000000..12c30b486 --- /dev/null +++ b/kernel/drivers/misc/isl29003.c @@ -0,0 +1,481 @@ +/* + * isl29003.c - Linux kernel module for + * Intersil ISL29003 ambient light sensor + * + * See file:Documentation/misc-devices/isl29003 + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * Based on code written by + * Rodolfo Giometti <giometti@linux.it> + * Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/mutex.h> +#include <linux/delay.h> + +#define ISL29003_DRV_NAME "isl29003" +#define DRIVER_VERSION "1.0" + +#define ISL29003_REG_COMMAND 0x00 +#define ISL29003_ADC_ENABLED (1 << 7) +#define ISL29003_ADC_PD (1 << 6) +#define ISL29003_TIMING_INT (1 << 5) +#define ISL29003_MODE_SHIFT (2) +#define ISL29003_MODE_MASK (0x3 << ISL29003_MODE_SHIFT) +#define ISL29003_RES_SHIFT (0) +#define ISL29003_RES_MASK (0x3 << ISL29003_RES_SHIFT) + +#define ISL29003_REG_CONTROL 0x01 +#define ISL29003_INT_FLG (1 << 5) +#define ISL29003_RANGE_SHIFT (2) +#define ISL29003_RANGE_MASK (0x3 << ISL29003_RANGE_SHIFT) +#define ISL29003_INT_PERSISTS_SHIFT (0) +#define ISL29003_INT_PERSISTS_MASK (0xf << ISL29003_INT_PERSISTS_SHIFT) + +#define ISL29003_REG_IRQ_THRESH_HI 0x02 +#define ISL29003_REG_IRQ_THRESH_LO 0x03 +#define ISL29003_REG_LSB_SENSOR 0x04 +#define ISL29003_REG_MSB_SENSOR 0x05 +#define ISL29003_REG_LSB_TIMER 0x06 +#define ISL29003_REG_MSB_TIMER 0x07 + +#define ISL29003_NUM_CACHABLE_REGS 4 + +struct isl29003_data { + struct i2c_client *client; + struct mutex lock; + u8 reg_cache[ISL29003_NUM_CACHABLE_REGS]; + u8 power_state_before_suspend; +}; + +static int gain_range[] = { + 1000, 4000, 16000, 64000 +}; + +/* + * register access helpers + */ + +static int __isl29003_read_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + return (data->reg_cache[reg] & mask) >> shift; +} + +static int __isl29003_write_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift, u8 val) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int ret = 0; + u8 tmp; + + if (reg >= ISL29003_NUM_CACHABLE_REGS) + return -EINVAL; + + mutex_lock(&data->lock); + + tmp = data->reg_cache[reg]; + tmp &= ~mask; + tmp |= val << shift; + + ret = i2c_smbus_write_byte_data(client, reg, tmp); + if (!ret) + data->reg_cache[reg] = tmp; + + mutex_unlock(&data->lock); + return ret; +} + +/* + * internally used functions + */ + +/* range */ +static int isl29003_get_range(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT); +} + +static int isl29003_set_range(struct i2c_client *client, int range) +{ + return __isl29003_write_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range); +} + +/* resolution */ +static int isl29003_get_resolution(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_resolution(struct i2c_client *client, int res) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, res); +} + +/* mode */ +static int isl29003_get_mode(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_mode(struct i2c_client *client, int mode) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode); +} + +/* power_state */ +static int isl29003_set_power_state(struct i2c_client *client, int state) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0, + state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD); +} + +static int isl29003_get_power_state(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND]; + return ~cmdreg & ISL29003_ADC_PD; +} + +static int isl29003_get_adc_value(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int lsb, msb, range, bitdepth; + + mutex_lock(&data->lock); + lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR); + + if (lsb < 0) { + mutex_unlock(&data->lock); + return lsb; + } + + msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR); + mutex_unlock(&data->lock); + + if (msb < 0) + return msb; + + range = isl29003_get_range(client); + bitdepth = (4 - isl29003_get_resolution(client)) * 4; + return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth; +} + +/* + * sysfs layer + */ + +/* range */ +static ssize_t isl29003_show_range(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%i\n", isl29003_get_range(client)); +} + +static ssize_t isl29003_store_range(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 3) + return -EINVAL; + + ret = isl29003_set_range(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(range, S_IWUSR | S_IRUGO, + isl29003_show_range, isl29003_store_range); + + +/* resolution */ +static ssize_t isl29003_show_resolution(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_resolution(client)); +} + +static ssize_t isl29003_store_resolution(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 3) + return -EINVAL; + + ret = isl29003_set_resolution(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO, + isl29003_show_resolution, isl29003_store_resolution); + +/* mode */ +static ssize_t isl29003_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_mode(client)); +} + +static ssize_t isl29003_store_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 2) + return -EINVAL; + + ret = isl29003_set_mode(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, + isl29003_show_mode, isl29003_store_mode); + + +/* power state */ +static ssize_t isl29003_show_power_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + return sprintf(buf, "%d\n", isl29003_get_power_state(client)); +} + +static ssize_t isl29003_store_power_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + ret = isl29003_set_power_state(client, val); + return ret ? ret : count; +} + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + isl29003_show_power_state, isl29003_store_power_state); + + +/* lux */ +static ssize_t isl29003_show_lux(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + /* No LUX data if not operational */ + if (!isl29003_get_power_state(client)) + return -EBUSY; + + return sprintf(buf, "%d\n", isl29003_get_adc_value(client)); +} + +static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL); + +static struct attribute *isl29003_attributes[] = { + &dev_attr_range.attr, + &dev_attr_resolution.attr, + &dev_attr_mode.attr, + &dev_attr_power_state.attr, + &dev_attr_lux.attr, + NULL +}; + +static const struct attribute_group isl29003_attr_group = { + .attrs = isl29003_attributes, +}; + +static int isl29003_init_client(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int i; + + /* read all the registers once to fill the cache. + * if one of the reads fails, we consider the init failed */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) { + int v = i2c_smbus_read_byte_data(client, i); + if (v < 0) + return -ENODEV; + + data->reg_cache[i] = v; + } + + /* set defaults */ + isl29003_set_range(client, 0); + isl29003_set_resolution(client, 0); + isl29003_set_mode(client, 0); + isl29003_set_power_state(client, 0); + + return 0; +} + +/* + * I2C layer + */ + +static int isl29003_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct isl29003_data *data; + int err = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->client = client; + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + /* initialize the ISL29003 chip */ + err = isl29003_init_client(client); + if (err) + goto exit_kfree; + + /* register sysfs hooks */ + err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group); + if (err) + goto exit_kfree; + + dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION); + return 0; + +exit_kfree: + kfree(data); + return err; +} + +static int isl29003_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group); + isl29003_set_power_state(client, 0); + kfree(i2c_get_clientdata(client)); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int isl29003_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct isl29003_data *data = i2c_get_clientdata(client); + + data->power_state_before_suspend = isl29003_get_power_state(client); + return isl29003_set_power_state(client, 0); +} + +static int isl29003_resume(struct device *dev) +{ + int i; + struct i2c_client *client = to_i2c_client(dev); + struct isl29003_data *data = i2c_get_clientdata(client); + + /* restore registers from cache */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) + if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i])) + return -EIO; + + return isl29003_set_power_state(client, + data->power_state_before_suspend); +} + +static SIMPLE_DEV_PM_OPS(isl29003_pm_ops, isl29003_suspend, isl29003_resume); +#define ISL29003_PM_OPS (&isl29003_pm_ops) + +#else +#define ISL29003_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + +static const struct i2c_device_id isl29003_id[] = { + { "isl29003", 0 }, + {} +}; +MODULE_DEVICE_TABLE(i2c, isl29003_id); + +static struct i2c_driver isl29003_driver = { + .driver = { + .name = ISL29003_DRV_NAME, + .owner = THIS_MODULE, + .pm = ISL29003_PM_OPS, + }, + .probe = isl29003_probe, + .remove = isl29003_remove, + .id_table = isl29003_id, +}; + +module_i2c_driver(isl29003_driver); + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("ISL29003 ambient light sensor driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/kernel/drivers/misc/isl29020.c b/kernel/drivers/misc/isl29020.c new file mode 100644 index 000000000..4a9c50a43 --- /dev/null +++ b/kernel/drivers/misc/isl29020.c @@ -0,0 +1,238 @@ +/* + * isl29020.c - Intersil ALS Driver + * + * Copyright (C) 2008 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Data sheet at: http://www.intersil.com/data/fn/fn6505.pdf + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/sysfs.h> +#include <linux/pm_runtime.h> + +static DEFINE_MUTEX(mutex); + +static ssize_t als_sensing_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + int val; + + val = i2c_smbus_read_byte_data(client, 0x00); + + if (val < 0) + return val; + return sprintf(buf, "%d000\n", 1 << (2 * (val & 3))); + +} + +static ssize_t als_lux_input_data_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret_val, val; + unsigned long int lux; + int temp; + + pm_runtime_get_sync(dev); + msleep(100); + + mutex_lock(&mutex); + temp = i2c_smbus_read_byte_data(client, 0x02); /* MSB data */ + if (temp < 0) { + pm_runtime_put_sync(dev); + mutex_unlock(&mutex); + return temp; + } + + ret_val = i2c_smbus_read_byte_data(client, 0x01); /* LSB data */ + mutex_unlock(&mutex); + + if (ret_val < 0) { + pm_runtime_put_sync(dev); + return ret_val; + } + + ret_val |= temp << 8; + val = i2c_smbus_read_byte_data(client, 0x00); + pm_runtime_put_sync(dev); + if (val < 0) + return val; + lux = ((((1 << (2 * (val & 3))))*1000) * ret_val) / 65536; + return sprintf(buf, "%ld\n", lux); +} + +static ssize_t als_sensing_range_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret_val; + unsigned long val; + + ret_val = kstrtoul(buf, 10, &val); + if (ret_val) + return ret_val; + + if (val < 1 || val > 64000) + return -EINVAL; + + /* Pick the smallest sensor range that will meet our requirements */ + if (val <= 1000) + val = 1; + else if (val <= 4000) + val = 2; + else if (val <= 16000) + val = 3; + else + val = 4; + + ret_val = i2c_smbus_read_byte_data(client, 0x00); + if (ret_val < 0) + return ret_val; + + ret_val &= 0xFC; /*reset the bit before setting them */ + ret_val |= val - 1; + ret_val = i2c_smbus_write_byte_data(client, 0x00, ret_val); + + if (ret_val < 0) + return ret_val; + return count; +} + +static void als_set_power_state(struct i2c_client *client, int enable) +{ + int ret_val; + + ret_val = i2c_smbus_read_byte_data(client, 0x00); + if (ret_val < 0) + return; + + if (enable) + ret_val |= 0x80; + else + ret_val &= 0x7F; + + i2c_smbus_write_byte_data(client, 0x00, ret_val); +} + +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR, + als_sensing_range_show, als_sensing_range_store); +static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux_input_data_show, NULL); + +static struct attribute *mid_att_als[] = { + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_input.attr, + NULL +}; + +static struct attribute_group m_als_gr = { + .name = "isl29020", + .attrs = mid_att_als +}; + +static int als_set_default_config(struct i2c_client *client) +{ + int retval; + + retval = i2c_smbus_write_byte_data(client, 0x00, 0xc0); + if (retval < 0) { + dev_err(&client->dev, "default write failed."); + return retval; + } + return 0; +} + +static int isl29020_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int res; + + res = als_set_default_config(client); + if (res < 0) + return res; + + res = sysfs_create_group(&client->dev.kobj, &m_als_gr); + if (res) { + dev_err(&client->dev, "isl29020: device create file failed\n"); + return res; + } + dev_info(&client->dev, "%s isl29020: ALS chip found\n", client->name); + als_set_power_state(client, 0); + pm_runtime_enable(&client->dev); + return res; +} + +static int isl29020_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &m_als_gr); + return 0; +} + +static struct i2c_device_id isl29020_id[] = { + { "isl29020", 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, isl29020_id); + +#ifdef CONFIG_PM + +static int isl29020_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + als_set_power_state(client, 0); + return 0; +} + +static int isl29020_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + als_set_power_state(client, 1); + return 0; +} + +static const struct dev_pm_ops isl29020_pm_ops = { + .runtime_suspend = isl29020_runtime_suspend, + .runtime_resume = isl29020_runtime_resume, +}; + +#define ISL29020_PM_OPS (&isl29020_pm_ops) +#else /* CONFIG_PM */ +#define ISL29020_PM_OPS NULL +#endif /* CONFIG_PM */ + +static struct i2c_driver isl29020_driver = { + .driver = { + .name = "isl29020", + .pm = ISL29020_PM_OPS, + }, + .probe = isl29020_probe, + .remove = isl29020_remove, + .id_table = isl29020_id, +}; + +module_i2c_driver(isl29020_driver); + +MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com>"); +MODULE_DESCRIPTION("Intersil isl29020 ALS Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/kgdbts.c b/kernel/drivers/misc/kgdbts.c new file mode 100644 index 000000000..36f5d5277 --- /dev/null +++ b/kernel/drivers/misc/kgdbts.c @@ -0,0 +1,1189 @@ +/* + * kgdbts is a test suite for kgdb for the sole purpose of validating + * that key pieces of the kgdb internals are working properly such as + * HW/SW breakpoints, single stepping, and NMI. + * + * Created by: Jason Wessel <jason.wessel@windriver.com> + * + * Copyright (c) 2008 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* Information about the kgdb test suite. + * ------------------------------------- + * + * The kgdb test suite is designed as a KGDB I/O module which + * simulates the communications that a debugger would have with kgdb. + * The tests are broken up in to a line by line and referenced here as + * a "get" which is kgdb requesting input and "put" which is kgdb + * sending a response. + * + * The kgdb suite can be invoked from the kernel command line + * arguments system or executed dynamically at run time. The test + * suite uses the variable "kgdbts" to obtain the information about + * which tests to run and to configure the verbosity level. The + * following are the various characters you can use with the kgdbts= + * line: + * + * When using the "kgdbts=" you only choose one of the following core + * test types: + * A = Run all the core tests silently + * V1 = Run all the core tests with minimal output + * V2 = Run all the core tests in debug mode + * + * You can also specify optional tests: + * N## = Go to sleep with interrupts of for ## seconds + * to test the HW NMI watchdog + * F## = Break at do_fork for ## iterations + * S## = Break at sys_open for ## iterations + * I## = Run the single step test ## iterations + * + * NOTE: that the do_fork and sys_open tests are mutually exclusive. + * + * To invoke the kgdb test suite from boot you use a kernel start + * argument as follows: + * kgdbts=V1 kgdbwait + * Or if you wanted to perform the NMI test for 6 seconds and do_fork + * test for 100 forks, you could use: + * kgdbts=V1N6F100 kgdbwait + * + * The test suite can also be invoked at run time with: + * echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts + * Or as another example: + * echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts + * + * When developing a new kgdb arch specific implementation or + * using these tests for the purpose of regression testing, + * several invocations are required. + * + * 1) Boot with the test suite enabled by using the kernel arguments + * "kgdbts=V1F100 kgdbwait" + * ## If kgdb arch specific implementation has NMI use + * "kgdbts=V1N6F100 + * + * 2) After the system boot run the basic test. + * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts + * + * 3) Run the concurrency tests. It is best to use n+1 + * while loops where n is the number of cpus you have + * in your system. The example below uses only two + * loops. + * + * ## This tests break points on sys_open + * while [ 1 ] ; do find / > /dev/null 2>&1 ; done & + * while [ 1 ] ; do find / > /dev/null 2>&1 ; done & + * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts + * fg # and hit control-c + * fg # and hit control-c + * ## This tests break points on do_fork + * while [ 1 ] ; do date > /dev/null ; done & + * while [ 1 ] ; do date > /dev/null ; done & + * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts + * fg # and hit control-c + * + */ + +#include <linux/kernel.h> +#include <linux/kgdb.h> +#include <linux/ctype.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/nmi.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <asm/sections.h> + +#define v1printk(a...) do { \ + if (verbose) \ + printk(KERN_INFO a); \ + } while (0) +#define v2printk(a...) do { \ + if (verbose > 1) \ + printk(KERN_INFO a); \ + touch_nmi_watchdog(); \ + } while (0) +#define eprintk(a...) do { \ + printk(KERN_ERR a); \ + WARN_ON(1); \ + } while (0) +#define MAX_CONFIG_LEN 40 + +static struct kgdb_io kgdbts_io_ops; +static char get_buf[BUFMAX]; +static int get_buf_cnt; +static char put_buf[BUFMAX]; +static int put_buf_cnt; +static char scratch_buf[BUFMAX]; +static int verbose; +static int repeat_test; +static int test_complete; +static int send_ack; +static int final_ack; +static int force_hwbrks; +static int hwbreaks_ok; +static int hw_break_val; +static int hw_break_val2; +static int cont_instead_of_sstep; +static unsigned long cont_thread_id; +static unsigned long sstep_thread_id; +#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC) +static int arch_needs_sstep_emulation = 1; +#else +static int arch_needs_sstep_emulation; +#endif +static unsigned long cont_addr; +static unsigned long sstep_addr; +static int restart_from_top_after_write; +static int sstep_state; + +/* Storage for the registers, in GDB format. */ +static unsigned long kgdbts_gdb_regs[(NUMREGBYTES + + sizeof(unsigned long) - 1) / + sizeof(unsigned long)]; +static struct pt_regs kgdbts_regs; + +/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */ +static int configured = -1; + +#ifdef CONFIG_KGDB_TESTS_BOOT_STRING +static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING; +#else +static char config[MAX_CONFIG_LEN]; +#endif +static struct kparam_string kps = { + .string = config, + .maxlen = MAX_CONFIG_LEN, +}; + +static void fill_get_buf(char *buf); + +struct test_struct { + char *get; + char *put; + void (*get_handler)(char *); + int (*put_handler)(char *, char *); +}; + +struct test_state { + char *name; + struct test_struct *tst; + int idx; + int (*run_test) (int, int); + int (*validate_put) (char *); +}; + +static struct test_state ts; + +static int kgdbts_unreg_thread(void *ptr) +{ + /* Wait until the tests are complete and then ungresiter the I/O + * driver. + */ + while (!final_ack) + msleep_interruptible(1500); + /* Pause for any other threads to exit after final ack. */ + msleep_interruptible(1000); + if (configured) + kgdb_unregister_io_module(&kgdbts_io_ops); + configured = 0; + + return 0; +} + +/* This is noinline such that it can be used for a single location to + * place a breakpoint + */ +static noinline void kgdbts_break_test(void) +{ + v2printk("kgdbts: breakpoint complete\n"); +} + +/* Lookup symbol info in the kernel */ +static unsigned long lookup_addr(char *arg) +{ + unsigned long addr = 0; + + if (!strcmp(arg, "kgdbts_break_test")) + addr = (unsigned long)kgdbts_break_test; + else if (!strcmp(arg, "sys_open")) + addr = (unsigned long)do_sys_open; + else if (!strcmp(arg, "do_fork")) + addr = (unsigned long)do_fork; + else if (!strcmp(arg, "hw_break_val")) + addr = (unsigned long)&hw_break_val; + addr = (unsigned long) dereference_function_descriptor((void *)addr); + return addr; +} + +static void break_helper(char *bp_type, char *arg, unsigned long vaddr) +{ + unsigned long addr; + + if (arg) + addr = lookup_addr(arg); + else + addr = vaddr; + + sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr, + BREAK_INSTR_SIZE); + fill_get_buf(scratch_buf); +} + +static void sw_break(char *arg) +{ + break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0); +} + +static void sw_rem_break(char *arg) +{ + break_helper(force_hwbrks ? "z1" : "z0", arg, 0); +} + +static void hw_break(char *arg) +{ + break_helper("Z1", arg, 0); +} + +static void hw_rem_break(char *arg) +{ + break_helper("z1", arg, 0); +} + +static void hw_write_break(char *arg) +{ + break_helper("Z2", arg, 0); +} + +static void hw_rem_write_break(char *arg) +{ + break_helper("z2", arg, 0); +} + +static void hw_access_break(char *arg) +{ + break_helper("Z4", arg, 0); +} + +static void hw_rem_access_break(char *arg) +{ + break_helper("z4", arg, 0); +} + +static void hw_break_val_access(void) +{ + hw_break_val2 = hw_break_val; +} + +static void hw_break_val_write(void) +{ + hw_break_val++; +} + +static int get_thread_id_continue(char *put_str, char *arg) +{ + char *ptr = &put_str[11]; + + if (put_str[1] != 'T' || put_str[2] != '0') + return 1; + kgdb_hex2long(&ptr, &cont_thread_id); + return 0; +} + +static int check_and_rewind_pc(char *put_str, char *arg) +{ + unsigned long addr = lookup_addr(arg); + unsigned long ip; + int offset = 0; + + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + ip = instruction_pointer(&kgdbts_regs); + v2printk("Stopped at IP: %lx\n", ip); +#ifdef GDB_ADJUSTS_BREAK_OFFSET + /* On some arches, a breakpoint stop requires it to be decremented */ + if (addr + BREAK_INSTR_SIZE == ip) + offset = -BREAK_INSTR_SIZE; +#endif + + if (arch_needs_sstep_emulation && sstep_addr && + ip + offset == sstep_addr && + ((!strcmp(arg, "sys_open") || !strcmp(arg, "do_fork")))) { + /* This is special case for emulated single step */ + v2printk("Emul: rewind hit single step bp\n"); + restart_from_top_after_write = 1; + } else if (strcmp(arg, "silent") && ip + offset != addr) { + eprintk("kgdbts: BP mismatch %lx expected %lx\n", + ip + offset, addr); + return 1; + } + /* Readjust the instruction pointer if needed */ + ip += offset; + cont_addr = ip; +#ifdef GDB_ADJUSTS_BREAK_OFFSET + instruction_pointer_set(&kgdbts_regs, ip); +#endif + return 0; +} + +static int check_single_step(char *put_str, char *arg) +{ + unsigned long addr = lookup_addr(arg); + static int matched_id; + + /* + * From an arch indepent point of view the instruction pointer + * should be on a different instruction + */ + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Singlestep stopped at IP: %lx\n", + instruction_pointer(&kgdbts_regs)); + + if (sstep_thread_id != cont_thread_id) { + /* + * Ensure we stopped in the same thread id as before, else the + * debugger should continue until the original thread that was + * single stepped is scheduled again, emulating gdb's behavior. + */ + v2printk("ThrID does not match: %lx\n", cont_thread_id); + if (arch_needs_sstep_emulation) { + if (matched_id && + instruction_pointer(&kgdbts_regs) != addr) + goto continue_test; + matched_id++; + ts.idx -= 2; + sstep_state = 0; + return 0; + } + cont_instead_of_sstep = 1; + ts.idx -= 4; + return 0; + } +continue_test: + matched_id = 0; + if (instruction_pointer(&kgdbts_regs) == addr) { + eprintk("kgdbts: SingleStep failed at %lx\n", + instruction_pointer(&kgdbts_regs)); + return 1; + } + + return 0; +} + +static void write_regs(char *arg) +{ + memset(scratch_buf, 0, sizeof(scratch_buf)); + scratch_buf[0] = 'G'; + pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs); + kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES); + fill_get_buf(scratch_buf); +} + +static void skip_back_repeat_test(char *arg) +{ + int go_back = simple_strtol(arg, NULL, 10); + + repeat_test--; + if (repeat_test <= 0) + ts.idx++; + else + ts.idx -= go_back; + fill_get_buf(ts.tst[ts.idx].get); +} + +static int got_break(char *put_str, char *arg) +{ + test_complete = 1; + if (!strncmp(put_str+1, arg, 2)) { + if (!strncmp(arg, "T0", 2)) + test_complete = 2; + return 0; + } + return 1; +} + +static void get_cont_catch(char *arg) +{ + /* Always send detach because the test is completed at this point */ + fill_get_buf("D"); +} + +static int put_cont_catch(char *put_str, char *arg) +{ + /* This is at the end of the test and we catch any and all input */ + v2printk("kgdbts: cleanup task: %lx\n", sstep_thread_id); + ts.idx--; + return 0; +} + +static int emul_reset(char *put_str, char *arg) +{ + if (strncmp(put_str, "$OK", 3)) + return 1; + if (restart_from_top_after_write) { + restart_from_top_after_write = 0; + ts.idx = -1; + } + return 0; +} + +static void emul_sstep_get(char *arg) +{ + if (!arch_needs_sstep_emulation) { + if (cont_instead_of_sstep) { + cont_instead_of_sstep = 0; + fill_get_buf("c"); + } else { + fill_get_buf(arg); + } + return; + } + switch (sstep_state) { + case 0: + v2printk("Emulate single step\n"); + /* Start by looking at the current PC */ + fill_get_buf("g"); + break; + case 1: + /* set breakpoint */ + break_helper("Z0", NULL, sstep_addr); + break; + case 2: + /* Continue */ + fill_get_buf("c"); + break; + case 3: + /* Clear breakpoint */ + break_helper("z0", NULL, sstep_addr); + break; + default: + eprintk("kgdbts: ERROR failed sstep get emulation\n"); + } + sstep_state++; +} + +static int emul_sstep_put(char *put_str, char *arg) +{ + if (!arch_needs_sstep_emulation) { + char *ptr = &put_str[11]; + if (put_str[1] != 'T' || put_str[2] != '0') + return 1; + kgdb_hex2long(&ptr, &sstep_thread_id); + return 0; + } + switch (sstep_state) { + case 1: + /* validate the "g" packet to get the IP */ + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Stopped at IP: %lx\n", + instruction_pointer(&kgdbts_regs)); + /* Want to stop at IP + break instruction size by default */ + sstep_addr = cont_addr + BREAK_INSTR_SIZE; + break; + case 2: + if (strncmp(put_str, "$OK", 3)) { + eprintk("kgdbts: failed sstep break set\n"); + return 1; + } + break; + case 3: + if (strncmp(put_str, "$T0", 3)) { + eprintk("kgdbts: failed continue sstep\n"); + return 1; + } else { + char *ptr = &put_str[11]; + kgdb_hex2long(&ptr, &sstep_thread_id); + } + break; + case 4: + if (strncmp(put_str, "$OK", 3)) { + eprintk("kgdbts: failed sstep break unset\n"); + return 1; + } + /* Single step is complete so continue on! */ + sstep_state = 0; + return 0; + default: + eprintk("kgdbts: ERROR failed sstep put emulation\n"); + } + + /* Continue on the same test line until emulation is complete */ + ts.idx--; + return 0; +} + +static int final_ack_set(char *put_str, char *arg) +{ + if (strncmp(put_str+1, arg, 2)) + return 1; + final_ack = 1; + return 0; +} +/* + * Test to plant a breakpoint and detach, which should clear out the + * breakpoint and restore the original instruction. + */ +static struct test_struct plant_and_detach_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "D", "OK" }, /* Detach */ + { "", "" }, +}; + +/* + * Simple test to write in a software breakpoint, check for the + * correct stop location and detach. + */ +static struct test_struct sw_breakpoint_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test a known bad memory read location to test the fault handler and + * read bytes 1-8 at the bad address + */ +static struct test_struct bad_read_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "m0,1", "E*" }, /* read 1 byte at address 1 */ + { "m0,2", "E*" }, /* read 1 byte at address 2 */ + { "m0,3", "E*" }, /* read 1 byte at address 3 */ + { "m0,4", "E*" }, /* read 1 byte at address 4 */ + { "m0,5", "E*" }, /* read 1 byte at address 5 */ + { "m0,6", "E*" }, /* read 1 byte at address 6 */ + { "m0,7", "E*" }, /* read 1 byte at address 7 */ + { "m0,8", "E*" }, /* read 1 byte at address 8 */ + { "D", "OK" }, /* Detach which removes all breakpoints and continues */ + { "", "" }, +}; + +/* + * Test for hitting a breakpoint, remove it, single step, plant it + * again and detach. + */ +static struct test_struct singlestep_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */ + { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, /* Write registers */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "kgdbts_break_test", NULL, check_single_step }, + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, /* Write registers */ + { "D", "OK" }, /* Remove all breakpoints and continues */ + { "", "" }, +}; + +/* + * Test for hitting a breakpoint at do_fork for what ever the number + * of iterations required by the variable repeat_test. + */ +static struct test_struct do_fork_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */ + { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */ + { "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */ + { "write", "OK", write_regs, emul_reset }, /* Write registers */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "do_fork", NULL, check_single_step }, + { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ + { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ + { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */ + { "", "", get_cont_catch, put_cont_catch }, +}; + +/* Test for hitting a breakpoint at sys_open for what ever the number + * of iterations required by the variable repeat_test. + */ +static struct test_struct sys_open_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */ + { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */ + { "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */ + { "write", "OK", write_regs, emul_reset }, /* Write registers */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "sys_open", NULL, check_single_step }, + { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ + { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ + { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */ + { "", "", get_cont_catch, put_cont_catch }, +}; + +/* + * Test for hitting a simple hw breakpoint + */ +static struct test_struct hw_breakpoint_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw write breakpoint + */ +static struct test_struct hw_write_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "g", "silent", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw access breakpoint + */ +static struct test_struct hw_access_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "g", "silent", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw access breakpoint + */ +static struct test_struct nmi_sleep_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +static void fill_get_buf(char *buf) +{ + unsigned char checksum = 0; + int count = 0; + char ch; + + strcpy(get_buf, "$"); + strcat(get_buf, buf); + while ((ch = buf[count])) { + checksum += ch; + count++; + } + strcat(get_buf, "#"); + get_buf[count + 2] = hex_asc_hi(checksum); + get_buf[count + 3] = hex_asc_lo(checksum); + get_buf[count + 4] = '\0'; + v2printk("get%i: %s\n", ts.idx, get_buf); +} + +static int validate_simple_test(char *put_str) +{ + char *chk_str; + + if (ts.tst[ts.idx].put_handler) + return ts.tst[ts.idx].put_handler(put_str, + ts.tst[ts.idx].put); + + chk_str = ts.tst[ts.idx].put; + if (*put_str == '$') + put_str++; + + while (*chk_str != '\0' && *put_str != '\0') { + /* If someone does a * to match the rest of the string, allow + * it, or stop if the received string is complete. + */ + if (*put_str == '#' || *chk_str == '*') + return 0; + if (*put_str != *chk_str) + return 1; + + chk_str++; + put_str++; + } + if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#')) + return 0; + + return 1; +} + +static int run_simple_test(int is_get_char, int chr) +{ + int ret = 0; + if (is_get_char) { + /* Send an ACK on the get if a prior put completed and set the + * send ack variable + */ + if (send_ack) { + send_ack = 0; + return '+'; + } + /* On the first get char, fill the transmit buffer and then + * take from the get_string. + */ + if (get_buf_cnt == 0) { + if (ts.tst[ts.idx].get_handler) + ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get); + else + fill_get_buf(ts.tst[ts.idx].get); + } + + if (get_buf[get_buf_cnt] == '\0') { + eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n", + ts.name, ts.idx); + get_buf_cnt = 0; + fill_get_buf("D"); + } + ret = get_buf[get_buf_cnt]; + get_buf_cnt++; + return ret; + } + + /* This callback is a put char which is when kgdb sends data to + * this I/O module. + */ + if (ts.tst[ts.idx].get[0] == '\0' && ts.tst[ts.idx].put[0] == '\0' && + !ts.tst[ts.idx].get_handler) { + eprintk("kgdbts: ERROR: beyond end of test on" + " '%s' line %i\n", ts.name, ts.idx); + return 0; + } + + if (put_buf_cnt >= BUFMAX) { + eprintk("kgdbts: ERROR: put buffer overflow on" + " '%s' line %i\n", ts.name, ts.idx); + put_buf_cnt = 0; + return 0; + } + /* Ignore everything until the first valid packet start '$' */ + if (put_buf_cnt == 0 && chr != '$') + return 0; + + put_buf[put_buf_cnt] = chr; + put_buf_cnt++; + + /* End of packet == #XX so look for the '#' */ + if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') { + if (put_buf_cnt >= BUFMAX) { + eprintk("kgdbts: ERROR: put buffer overflow on" + " '%s' line %i\n", ts.name, ts.idx); + put_buf_cnt = 0; + return 0; + } + put_buf[put_buf_cnt] = '\0'; + v2printk("put%i: %s\n", ts.idx, put_buf); + /* Trigger check here */ + if (ts.validate_put && ts.validate_put(put_buf)) { + eprintk("kgdbts: ERROR PUT: end of test " + "buffer on '%s' line %i expected %s got %s\n", + ts.name, ts.idx, ts.tst[ts.idx].put, put_buf); + } + ts.idx++; + put_buf_cnt = 0; + get_buf_cnt = 0; + send_ack = 1; + } + return 0; +} + +static void init_simple_test(void) +{ + memset(&ts, 0, sizeof(ts)); + ts.run_test = run_simple_test; + ts.validate_put = validate_simple_test; +} + +static void run_plant_and_detach_test(int is_early) +{ + char before[BREAK_INSTR_SIZE]; + char after[BREAK_INSTR_SIZE]; + + probe_kernel_read(before, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); + init_simple_test(); + ts.tst = plant_and_detach_test; + ts.name = "plant_and_detach_test"; + /* Activate test with initial breakpoint */ + if (!is_early) + kgdb_breakpoint(); + probe_kernel_read(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); + if (memcmp(before, after, BREAK_INSTR_SIZE)) { + printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); + panic("kgdb memory corruption"); + } + + /* complete the detach test */ + if (!is_early) + kgdbts_break_test(); +} + +static void run_breakpoint_test(int is_hw_breakpoint) +{ + test_complete = 0; + init_simple_test(); + if (is_hw_breakpoint) { + ts.tst = hw_breakpoint_test; + ts.name = "hw_breakpoint_test"; + } else { + ts.tst = sw_breakpoint_test; + ts.name = "sw_breakpoint_test"; + } + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + /* run code with the break point in it */ + kgdbts_break_test(); + kgdb_breakpoint(); + + if (test_complete) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); + if (is_hw_breakpoint) + hwbreaks_ok = 0; +} + +static void run_hw_break_test(int is_write_test) +{ + test_complete = 0; + init_simple_test(); + if (is_write_test) { + ts.tst = hw_write_break_test; + ts.name = "hw_write_break_test"; + } else { + ts.tst = hw_access_break_test; + ts.name = "hw_access_break_test"; + } + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + hw_break_val_access(); + if (is_write_test) { + if (test_complete == 2) { + eprintk("kgdbts: ERROR %s broke on access\n", + ts.name); + hwbreaks_ok = 0; + } + hw_break_val_write(); + } + kgdb_breakpoint(); + + if (test_complete == 1) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); + hwbreaks_ok = 0; +} + +static void run_nmi_sleep_test(int nmi_sleep) +{ + unsigned long flags; + + init_simple_test(); + ts.tst = nmi_sleep_test; + ts.name = "nmi_sleep_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + local_irq_save(flags); + mdelay(nmi_sleep*1000); + touch_nmi_watchdog(); + local_irq_restore(flags); + if (test_complete != 2) + eprintk("kgdbts: ERROR nmi_test did not hit nmi\n"); + kgdb_breakpoint(); + if (test_complete == 1) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); +} + +static void run_bad_read_test(void) +{ + init_simple_test(); + ts.tst = bad_read_test; + ts.name = "bad_read_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_do_fork_test(void) +{ + init_simple_test(); + ts.tst = do_fork_test; + ts.name = "do_fork_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_sys_open_test(void) +{ + init_simple_test(); + ts.tst = sys_open_test; + ts.name = "sys_open_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_singlestep_break_test(void) +{ + init_simple_test(); + ts.tst = singlestep_break_test; + ts.name = "singlestep_breakpoint_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + kgdbts_break_test(); + kgdbts_break_test(); +} + +static void kgdbts_run_tests(void) +{ + char *ptr; + int fork_test = 0; + int do_sys_open_test = 0; + int sstep_test = 1000; + int nmi_sleep = 0; + int i; + + ptr = strchr(config, 'F'); + if (ptr) + fork_test = simple_strtol(ptr + 1, NULL, 10); + ptr = strchr(config, 'S'); + if (ptr) + do_sys_open_test = simple_strtol(ptr + 1, NULL, 10); + ptr = strchr(config, 'N'); + if (ptr) + nmi_sleep = simple_strtol(ptr+1, NULL, 10); + ptr = strchr(config, 'I'); + if (ptr) + sstep_test = simple_strtol(ptr+1, NULL, 10); + + /* All HW break point tests */ + if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) { + hwbreaks_ok = 1; + v1printk("kgdbts:RUN hw breakpoint test\n"); + run_breakpoint_test(1); + v1printk("kgdbts:RUN hw write breakpoint test\n"); + run_hw_break_test(1); + v1printk("kgdbts:RUN access write breakpoint test\n"); + run_hw_break_test(0); + } + + /* required internal KGDB tests */ + v1printk("kgdbts:RUN plant and detach test\n"); + run_plant_and_detach_test(0); + v1printk("kgdbts:RUN sw breakpoint test\n"); + run_breakpoint_test(0); + v1printk("kgdbts:RUN bad memory access test\n"); + run_bad_read_test(); + v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test); + for (i = 0; i < sstep_test; i++) { + run_singlestep_break_test(); + if (i % 100 == 0) + v1printk("kgdbts:RUN singlestep [%i/%i]\n", + i, sstep_test); + } + + /* ===Optional tests=== */ + + if (nmi_sleep) { + v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep); + run_nmi_sleep_test(nmi_sleep); + } + + /* If the do_fork test is run it will be the last test that is + * executed because a kernel thread will be spawned at the very + * end to unregister the debug hooks. + */ + if (fork_test) { + repeat_test = fork_test; + printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n", + repeat_test); + kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg"); + run_do_fork_test(); + return; + } + + /* If the sys_open test is run it will be the last test that is + * executed because a kernel thread will be spawned at the very + * end to unregister the debug hooks. + */ + if (do_sys_open_test) { + repeat_test = do_sys_open_test; + printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n", + repeat_test); + kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg"); + run_sys_open_test(); + return; + } + /* Shutdown and unregister */ + kgdb_unregister_io_module(&kgdbts_io_ops); + configured = 0; +} + +static int kgdbts_option_setup(char *opt) +{ + if (strlen(opt) >= MAX_CONFIG_LEN) { + printk(KERN_ERR "kgdbts: config string too long\n"); + return -ENOSPC; + } + strcpy(config, opt); + + verbose = 0; + if (strstr(config, "V1")) + verbose = 1; + if (strstr(config, "V2")) + verbose = 2; + + return 0; +} + +__setup("kgdbts=", kgdbts_option_setup); + +static int configure_kgdbts(void) +{ + int err = 0; + + if (!strlen(config) || isspace(config[0])) + goto noconfig; + err = kgdbts_option_setup(config); + if (err) + goto noconfig; + + final_ack = 0; + run_plant_and_detach_test(1); + + err = kgdb_register_io_module(&kgdbts_io_ops); + if (err) { + configured = 0; + return err; + } + configured = 1; + kgdbts_run_tests(); + + return err; + +noconfig: + config[0] = 0; + configured = 0; + + return err; +} + +static int __init init_kgdbts(void) +{ + /* Already configured? */ + if (configured == 1) + return 0; + + return configure_kgdbts(); +} + +static int kgdbts_get_char(void) +{ + int val = 0; + + if (ts.run_test) + val = ts.run_test(1, 0); + + return val; +} + +static void kgdbts_put_char(u8 chr) +{ + if (ts.run_test) + ts.run_test(0, chr); +} + +static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp) +{ + int len = strlen(kmessage); + + if (len >= MAX_CONFIG_LEN) { + printk(KERN_ERR "kgdbts: config string too long\n"); + return -ENOSPC; + } + + /* Only copy in the string if the init function has not run yet */ + if (configured < 0) { + strcpy(config, kmessage); + return 0; + } + + if (configured == 1) { + printk(KERN_ERR "kgdbts: ERROR: Already configured and running.\n"); + return -EBUSY; + } + + strcpy(config, kmessage); + /* Chop out \n char as a result of echo */ + if (config[len - 1] == '\n') + config[len - 1] = '\0'; + + /* Go and configure with the new params. */ + return configure_kgdbts(); +} + +static void kgdbts_pre_exp_handler(void) +{ + /* Increment the module count when the debugger is active */ + if (!kgdb_connected) + try_module_get(THIS_MODULE); +} + +static void kgdbts_post_exp_handler(void) +{ + /* decrement the module count when the debugger detaches */ + if (!kgdb_connected) + module_put(THIS_MODULE); +} + +static struct kgdb_io kgdbts_io_ops = { + .name = "kgdbts", + .read_char = kgdbts_get_char, + .write_char = kgdbts_put_char, + .pre_exception = kgdbts_pre_exp_handler, + .post_exception = kgdbts_post_exp_handler, +}; + +module_init(init_kgdbts); +module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644); +MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]"); +MODULE_DESCRIPTION("KGDB Test Suite"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Wind River Systems, Inc."); + diff --git a/kernel/drivers/misc/lattice-ecp3-config.c b/kernel/drivers/misc/lattice-ecp3-config.c new file mode 100644 index 000000000..c544f1f50 --- /dev/null +++ b/kernel/drivers/misc/lattice-ecp3-config.c @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2012 Stefan Roese <sr@denx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/device.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/spi/spi.h> +#include <linux/platform_device.h> +#include <linux/delay.h> +#include <asm/unaligned.h> + +#define FIRMWARE_NAME "lattice-ecp3.bit" + +/* + * The JTAG ID's of the supported FPGA's. The ID is 32bit wide + * reversed as noted in the manual. + */ +#define ID_ECP3_17 0xc2088080 +#define ID_ECP3_35 0xc2048080 + +/* FPGA commands */ +#define FPGA_CMD_READ_ID 0x07 /* plus 24 bits */ +#define FPGA_CMD_READ_STATUS 0x09 /* plus 24 bits */ +#define FPGA_CMD_CLEAR 0x70 +#define FPGA_CMD_REFRESH 0x71 +#define FPGA_CMD_WRITE_EN 0x4a /* plus 2 bits */ +#define FPGA_CMD_WRITE_DIS 0x4f /* plus 8 bits */ +#define FPGA_CMD_WRITE_INC 0x41 /* plus 0 bits */ + +/* + * The status register is 32bit revered, DONE is bit 17 from the TN1222.pdf + * (LatticeECP3 Slave SPI Port User's Guide) + */ +#define FPGA_STATUS_DONE 0x00004000 +#define FPGA_STATUS_CLEARED 0x00010000 + +#define FPGA_CLEAR_TIMEOUT 5000 /* max. 5000ms for FPGA clear */ +#define FPGA_CLEAR_MSLEEP 10 +#define FPGA_CLEAR_LOOP_COUNT (FPGA_CLEAR_TIMEOUT / FPGA_CLEAR_MSLEEP) + +struct fpga_data { + struct completion fw_loaded; +}; + +struct ecp3_dev { + u32 jedec_id; + char *name; +}; + +static const struct ecp3_dev ecp3_dev[] = { + { + .jedec_id = ID_ECP3_17, + .name = "Lattice ECP3-17", + }, + { + .jedec_id = ID_ECP3_35, + .name = "Lattice ECP3-35", + }, +}; + +static void firmware_load(const struct firmware *fw, void *context) +{ + struct spi_device *spi = (struct spi_device *)context; + struct fpga_data *data = spi_get_drvdata(spi); + u8 *buffer; + int ret; + u8 txbuf[8]; + u8 rxbuf[8]; + int rx_len = 8; + int i; + u32 jedec_id; + u32 status; + + if (fw == NULL) { + dev_err(&spi->dev, "Cannot load firmware, aborting\n"); + return; + } + + if (fw->size == 0) { + dev_err(&spi->dev, "Error: Firmware size is 0!\n"); + return; + } + + /* Fill dummy data (24 stuffing bits for commands) */ + txbuf[1] = 0x00; + txbuf[2] = 0x00; + txbuf[3] = 0x00; + + /* Trying to speak with the FPGA via SPI... */ + txbuf[0] = FPGA_CMD_READ_ID; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + jedec_id = get_unaligned_be32(&rxbuf[4]); + dev_dbg(&spi->dev, "FPGA JTAG ID=%08x\n", jedec_id); + + for (i = 0; i < ARRAY_SIZE(ecp3_dev); i++) { + if (jedec_id == ecp3_dev[i].jedec_id) + break; + } + if (i == ARRAY_SIZE(ecp3_dev)) { + dev_err(&spi->dev, + "Error: No supported FPGA detected (JEDEC_ID=%08x)!\n", + jedec_id); + return; + } + + dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name); + + txbuf[0] = FPGA_CMD_READ_STATUS; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + status = get_unaligned_be32(&rxbuf[4]); + dev_dbg(&spi->dev, "FPGA Status=%08x\n", status); + + buffer = kzalloc(fw->size + 8, GFP_KERNEL); + if (!buffer) { + dev_err(&spi->dev, "Error: Can't allocate memory!\n"); + return; + } + + /* + * Insert WRITE_INC command into stream (one SPI frame) + */ + buffer[0] = FPGA_CMD_WRITE_INC; + buffer[1] = 0xff; + buffer[2] = 0xff; + buffer[3] = 0xff; + memcpy(buffer + 4, fw->data, fw->size); + + txbuf[0] = FPGA_CMD_REFRESH; + ret = spi_write(spi, txbuf, 4); + + txbuf[0] = FPGA_CMD_WRITE_EN; + ret = spi_write(spi, txbuf, 4); + + txbuf[0] = FPGA_CMD_CLEAR; + ret = spi_write(spi, txbuf, 4); + + /* + * Wait for FPGA memory to become cleared + */ + for (i = 0; i < FPGA_CLEAR_LOOP_COUNT; i++) { + txbuf[0] = FPGA_CMD_READ_STATUS; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + status = get_unaligned_be32(&rxbuf[4]); + if (status == FPGA_STATUS_CLEARED) + break; + + msleep(FPGA_CLEAR_MSLEEP); + } + + if (i == FPGA_CLEAR_LOOP_COUNT) { + dev_err(&spi->dev, + "Error: Timeout waiting for FPGA to clear (status=%08x)!\n", + status); + kfree(buffer); + return; + } + + dev_info(&spi->dev, "Configuring the FPGA...\n"); + ret = spi_write(spi, buffer, fw->size + 8); + + txbuf[0] = FPGA_CMD_WRITE_DIS; + ret = spi_write(spi, txbuf, 4); + + txbuf[0] = FPGA_CMD_READ_STATUS; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + status = get_unaligned_be32(&rxbuf[4]); + dev_dbg(&spi->dev, "FPGA Status=%08x\n", status); + + /* Check result */ + if (status & FPGA_STATUS_DONE) + dev_info(&spi->dev, "FPGA successfully configured!\n"); + else + dev_info(&spi->dev, "FPGA not configured (DONE not set)\n"); + + /* + * Don't forget to release the firmware again + */ + release_firmware(fw); + + kfree(buffer); + + complete(&data->fw_loaded); +} + +static int lattice_ecp3_probe(struct spi_device *spi) +{ + struct fpga_data *data; + int err; + + data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL); + if (!data) { + dev_err(&spi->dev, "Memory allocation for fpga_data failed\n"); + return -ENOMEM; + } + spi_set_drvdata(spi, data); + + init_completion(&data->fw_loaded); + err = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, + FIRMWARE_NAME, &spi->dev, + GFP_KERNEL, spi, firmware_load); + if (err) { + dev_err(&spi->dev, "Firmware loading failed with %d!\n", err); + return err; + } + + dev_info(&spi->dev, "FPGA bitstream configuration driver registered\n"); + + return 0; +} + +static int lattice_ecp3_remove(struct spi_device *spi) +{ + struct fpga_data *data = spi_get_drvdata(spi); + + wait_for_completion(&data->fw_loaded); + + return 0; +} + +static const struct spi_device_id lattice_ecp3_id[] = { + { "ecp3-17", 0 }, + { "ecp3-35", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, lattice_ecp3_id); + +static struct spi_driver lattice_ecp3_driver = { + .driver = { + .name = "lattice-ecp3", + .owner = THIS_MODULE, + }, + .probe = lattice_ecp3_probe, + .remove = lattice_ecp3_remove, + .id_table = lattice_ecp3_id, +}; + +module_spi_driver(lattice_ecp3_driver); + +MODULE_AUTHOR("Stefan Roese <sr@denx.de>"); +MODULE_DESCRIPTION("Lattice ECP3 FPGA configuration via SPI"); +MODULE_LICENSE("GPL"); +MODULE_FIRMWARE(FIRMWARE_NAME); diff --git a/kernel/drivers/misc/lis3lv02d/Kconfig b/kernel/drivers/misc/lis3lv02d/Kconfig new file mode 100644 index 000000000..8f474e6fc --- /dev/null +++ b/kernel/drivers/misc/lis3lv02d/Kconfig @@ -0,0 +1,37 @@ +# +# STMicroelectonics LIS3LV02D and similar accelerometers +# + +config SENSORS_LIS3_SPI + tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)" + depends on !ACPI && SPI_MASTER && INPUT + select SENSORS_LIS3LV02D + default n + help + This driver provides support for the LIS3LV02Dx accelerometer connected + via SPI. The accelerometer data is readable via + /sys/devices/platform/lis3lv02d. + + This driver also provides an absolute input class device, allowing + the laptop to act as a pinball machine-esque joystick. + + This driver can also be built as modules. If so, the core module + will be called lis3lv02d and a specific module for the SPI transport + is called lis3lv02d_spi. + +config SENSORS_LIS3_I2C + tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (I2C)" + depends on I2C && INPUT + select SENSORS_LIS3LV02D + default n + help + This driver provides support for the LIS3LV02Dx accelerometer connected + via I2C. The accelerometer data is readable via + /sys/devices/platform/lis3lv02d. + + This driver also provides an absolute input class device, allowing + the device to act as a pinball machine-esque joystick. + + This driver can also be built as modules. If so, the core module + will be called lis3lv02d and a specific module for the I2C transport + is called lis3lv02d_i2c. diff --git a/kernel/drivers/misc/lis3lv02d/Makefile b/kernel/drivers/misc/lis3lv02d/Makefile new file mode 100644 index 000000000..4bf58b16f --- /dev/null +++ b/kernel/drivers/misc/lis3lv02d/Makefile @@ -0,0 +1,7 @@ +# +# STMicroelectonics LIS3LV02D and similar accelerometers +# + +obj-$(CONFIG_SENSORS_LIS3LV02D) += lis3lv02d.o +obj-$(CONFIG_SENSORS_LIS3_SPI) += lis3lv02d_spi.o +obj-$(CONFIG_SENSORS_LIS3_I2C) += lis3lv02d_i2c.o diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d.c new file mode 100644 index 000000000..4739689d2 --- /dev/null +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d.c @@ -0,0 +1,1262 @@ +/* + * lis3lv02d.c - ST LIS3LV02DL accelerometer driver + * + * Copyright (C) 2007-2008 Yan Burman + * Copyright (C) 2008 Eric Piel + * Copyright (C) 2008-2009 Pavel Machek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/dmi.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/input-polldev.h> +#include <linux/delay.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/freezer.h> +#include <linux/uaccess.h> +#include <linux/miscdevice.h> +#include <linux/pm_runtime.h> +#include <linux/atomic.h> +#include <linux/of_device.h> +#include "lis3lv02d.h" + +#define DRIVER_NAME "lis3lv02d" + +/* joystick device poll interval in milliseconds */ +#define MDPS_POLL_INTERVAL 50 +#define MDPS_POLL_MIN 0 +#define MDPS_POLL_MAX 2000 + +#define LIS3_SYSFS_POWERDOWN_DELAY 5000 /* In milliseconds */ + +#define SELFTEST_OK 0 +#define SELFTEST_FAIL -1 +#define SELFTEST_IRQ -2 + +#define IRQ_LINE0 0 +#define IRQ_LINE1 1 + +/* + * The sensor can also generate interrupts (DRDY) but it's pretty pointless + * because they are generated even if the data do not change. So it's better + * to keep the interrupt for the free-fall event. The values are updated at + * 40Hz (at the lowest frequency), but as it can be pretty time consuming on + * some low processor, we poll the sensor only at 20Hz... enough for the + * joystick. + */ + +#define LIS3_PWRON_DELAY_WAI_12B (5000) +#define LIS3_PWRON_DELAY_WAI_8B (3000) + +/* + * LIS3LV02D spec says 1024 LSBs corresponds 1 G -> 1LSB is 1000/1024 mG + * LIS302D spec says: 18 mG / digit + * LIS3_ACCURACY is used to increase accuracy of the intermediate + * calculation results. + */ +#define LIS3_ACCURACY 1024 +/* Sensitivity values for -2G +2G scale */ +#define LIS3_SENSITIVITY_12B ((LIS3_ACCURACY * 1000) / 1024) +#define LIS3_SENSITIVITY_8B (18 * LIS3_ACCURACY) + +/* + * LIS331DLH spec says 1LSBs corresponds 4G/4096 -> 1LSB is 1000/1024 mG. + * Below macros defines sensitivity values for +/-2G. Dataout bits for + * +/-2G range is 12 bits so 4 bits adjustment must be done to get 12bit + * data from 16bit value. Currently this driver supports only 2G range. + */ +#define LIS3DLH_SENSITIVITY_2G ((LIS3_ACCURACY * 1000) / 1024) +#define SHIFT_ADJ_2G 4 + +#define LIS3_DEFAULT_FUZZ_12B 3 +#define LIS3_DEFAULT_FLAT_12B 3 +#define LIS3_DEFAULT_FUZZ_8B 1 +#define LIS3_DEFAULT_FLAT_8B 1 + +struct lis3lv02d lis3_dev = { + .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lis3_dev.misc_wait), +}; +EXPORT_SYMBOL_GPL(lis3_dev); + +/* just like param_set_int() but does sanity-check so that it won't point + * over the axis array size + */ +static int param_set_axis(const char *val, const struct kernel_param *kp) +{ + int ret = param_set_int(val, kp); + if (!ret) { + int val = *(int *)kp->arg; + if (val < 0) + val = -val; + if (!val || val > 3) + return -EINVAL; + } + return ret; +} + +static struct kernel_param_ops param_ops_axis = { + .set = param_set_axis, + .get = param_get_int, +}; + +#define param_check_axis(name, p) param_check_int(name, p) + +module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644); +MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions"); + +static s16 lis3lv02d_read_8(struct lis3lv02d *lis3, int reg) +{ + s8 lo; + if (lis3->read(lis3, reg, &lo) < 0) + return 0; + + return lo; +} + +static s16 lis3lv02d_read_12(struct lis3lv02d *lis3, int reg) +{ + u8 lo, hi; + + lis3->read(lis3, reg - 1, &lo); + lis3->read(lis3, reg, &hi); + /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */ + return (s16)((hi << 8) | lo); +} + +/* 12bits for 2G range, 13 bits for 4G range and 14 bits for 8G range */ +static s16 lis331dlh_read_data(struct lis3lv02d *lis3, int reg) +{ + u8 lo, hi; + int v; + + lis3->read(lis3, reg - 1, &lo); + lis3->read(lis3, reg, &hi); + v = (int) ((hi << 8) | lo); + + return (s16) v >> lis3->shift_adj; +} + +/** + * lis3lv02d_get_axis - For the given axis, give the value converted + * @axis: 1,2,3 - can also be negative + * @hw_values: raw values returned by the hardware + * + * Returns the converted value. + */ +static inline int lis3lv02d_get_axis(s8 axis, int hw_values[3]) +{ + if (axis > 0) + return hw_values[axis - 1]; + else + return -hw_values[-axis - 1]; +} + +/** + * lis3lv02d_get_xyz - Get X, Y and Z axis values from the accelerometer + * @lis3: pointer to the device struct + * @x: where to store the X axis value + * @y: where to store the Y axis value + * @z: where to store the Z axis value + * + * Note that 40Hz input device can eat up about 10% CPU at 800MHZ + */ +static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z) +{ + int position[3]; + int i; + + if (lis3->blkread) { + if (lis3->whoami == WAI_12B) { + u16 data[3]; + lis3->blkread(lis3, OUTX_L, 6, (u8 *)data); + for (i = 0; i < 3; i++) + position[i] = (s16)le16_to_cpu(data[i]); + } else { + u8 data[5]; + /* Data: x, dummy, y, dummy, z */ + lis3->blkread(lis3, OUTX, 5, data); + for (i = 0; i < 3; i++) + position[i] = (s8)data[i * 2]; + } + } else { + position[0] = lis3->read_data(lis3, OUTX); + position[1] = lis3->read_data(lis3, OUTY); + position[2] = lis3->read_data(lis3, OUTZ); + } + + for (i = 0; i < 3; i++) + position[i] = (position[i] * lis3->scale) / LIS3_ACCURACY; + + *x = lis3lv02d_get_axis(lis3->ac.x, position); + *y = lis3lv02d_get_axis(lis3->ac.y, position); + *z = lis3lv02d_get_axis(lis3->ac.z, position); +} + +/* conversion btw sampling rate and the register values */ +static int lis3_12_rates[4] = {40, 160, 640, 2560}; +static int lis3_8_rates[2] = {100, 400}; +static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000}; +static int lis3_3dlh_rates[4] = {50, 100, 400, 1000}; + +/* ODR is Output Data Rate */ +static int lis3lv02d_get_odr(struct lis3lv02d *lis3) +{ + u8 ctrl; + int shift; + + lis3->read(lis3, CTRL_REG1, &ctrl); + ctrl &= lis3->odr_mask; + shift = ffs(lis3->odr_mask) - 1; + return lis3->odrs[(ctrl >> shift)]; +} + +static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3) +{ + int div = lis3lv02d_get_odr(lis3); + + if (WARN_ONCE(div == 0, "device returned spurious data")) + return -ENXIO; + + /* LIS3 power on delay is quite long */ + msleep(lis3->pwron_delay / div); + return 0; +} + +static int lis3lv02d_set_odr(struct lis3lv02d *lis3, int rate) +{ + u8 ctrl; + int i, len, shift; + + if (!rate) + return -EINVAL; + + lis3->read(lis3, CTRL_REG1, &ctrl); + ctrl &= ~lis3->odr_mask; + len = 1 << hweight_long(lis3->odr_mask); /* # of possible values */ + shift = ffs(lis3->odr_mask) - 1; + + for (i = 0; i < len; i++) + if (lis3->odrs[i] == rate) { + lis3->write(lis3, CTRL_REG1, + ctrl | (i << shift)); + return 0; + } + return -EINVAL; +} + +static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) +{ + u8 ctlreg, reg; + s16 x, y, z; + u8 selftest; + int ret; + u8 ctrl_reg_data; + unsigned char irq_cfg; + + mutex_lock(&lis3->mutex); + + irq_cfg = lis3->irq_cfg; + if (lis3->whoami == WAI_8B) { + lis3->data_ready_count[IRQ_LINE0] = 0; + lis3->data_ready_count[IRQ_LINE1] = 0; + + /* Change interrupt cfg to data ready for selftest */ + atomic_inc(&lis3->wake_thread); + lis3->irq_cfg = LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY; + lis3->read(lis3, CTRL_REG3, &ctrl_reg_data); + lis3->write(lis3, CTRL_REG3, (ctrl_reg_data & + ~(LIS3_IRQ1_MASK | LIS3_IRQ2_MASK)) | + (LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY)); + } + + if ((lis3->whoami == WAI_3DC) || (lis3->whoami == WAI_3DLH)) { + ctlreg = CTRL_REG4; + selftest = CTRL4_ST0; + } else { + ctlreg = CTRL_REG1; + if (lis3->whoami == WAI_12B) + selftest = CTRL1_ST; + else + selftest = CTRL1_STP; + } + + lis3->read(lis3, ctlreg, ®); + lis3->write(lis3, ctlreg, (reg | selftest)); + ret = lis3lv02d_get_pwron_wait(lis3); + if (ret) + goto fail; + + /* Read directly to avoid axis remap */ + x = lis3->read_data(lis3, OUTX); + y = lis3->read_data(lis3, OUTY); + z = lis3->read_data(lis3, OUTZ); + + /* back to normal settings */ + lis3->write(lis3, ctlreg, reg); + ret = lis3lv02d_get_pwron_wait(lis3); + if (ret) + goto fail; + + results[0] = x - lis3->read_data(lis3, OUTX); + results[1] = y - lis3->read_data(lis3, OUTY); + results[2] = z - lis3->read_data(lis3, OUTZ); + + ret = 0; + + if (lis3->whoami == WAI_8B) { + /* Restore original interrupt configuration */ + atomic_dec(&lis3->wake_thread); + lis3->write(lis3, CTRL_REG3, ctrl_reg_data); + lis3->irq_cfg = irq_cfg; + + if ((irq_cfg & LIS3_IRQ1_MASK) && + lis3->data_ready_count[IRQ_LINE0] < 2) { + ret = SELFTEST_IRQ; + goto fail; + } + + if ((irq_cfg & LIS3_IRQ2_MASK) && + lis3->data_ready_count[IRQ_LINE1] < 2) { + ret = SELFTEST_IRQ; + goto fail; + } + } + + if (lis3->pdata) { + int i; + for (i = 0; i < 3; i++) { + /* Check against selftest acceptance limits */ + if ((results[i] < lis3->pdata->st_min_limits[i]) || + (results[i] > lis3->pdata->st_max_limits[i])) { + ret = SELFTEST_FAIL; + goto fail; + } + } + } + + /* test passed */ +fail: + mutex_unlock(&lis3->mutex); + return ret; +} + +/* + * Order of registers in the list affects to order of the restore process. + * Perhaps it is a good idea to set interrupt enable register as a last one + * after all other configurations + */ +static u8 lis3_wai8_regs[] = { FF_WU_CFG_1, FF_WU_THS_1, FF_WU_DURATION_1, + FF_WU_CFG_2, FF_WU_THS_2, FF_WU_DURATION_2, + CLICK_CFG, CLICK_SRC, CLICK_THSY_X, CLICK_THSZ, + CLICK_TIMELIMIT, CLICK_LATENCY, CLICK_WINDOW, + CTRL_REG1, CTRL_REG2, CTRL_REG3}; + +static u8 lis3_wai12_regs[] = {FF_WU_CFG, FF_WU_THS_L, FF_WU_THS_H, + FF_WU_DURATION, DD_CFG, DD_THSI_L, DD_THSI_H, + DD_THSE_L, DD_THSE_H, + CTRL_REG1, CTRL_REG3, CTRL_REG2}; + +static inline void lis3_context_save(struct lis3lv02d *lis3) +{ + int i; + for (i = 0; i < lis3->regs_size; i++) + lis3->read(lis3, lis3->regs[i], &lis3->reg_cache[i]); + lis3->regs_stored = true; +} + +static inline void lis3_context_restore(struct lis3lv02d *lis3) +{ + int i; + if (lis3->regs_stored) + for (i = 0; i < lis3->regs_size; i++) + lis3->write(lis3, lis3->regs[i], lis3->reg_cache[i]); +} + +void lis3lv02d_poweroff(struct lis3lv02d *lis3) +{ + if (lis3->reg_ctrl) + lis3_context_save(lis3); + /* disable X,Y,Z axis and power down */ + lis3->write(lis3, CTRL_REG1, 0x00); + if (lis3->reg_ctrl) + lis3->reg_ctrl(lis3, LIS3_REG_OFF); +} +EXPORT_SYMBOL_GPL(lis3lv02d_poweroff); + +int lis3lv02d_poweron(struct lis3lv02d *lis3) +{ + int err; + u8 reg; + + lis3->init(lis3); + + /* + * Common configuration + * BDU: (12 bits sensors only) LSB and MSB values are not updated until + * both have been read. So the value read will always be correct. + * Set BOOT bit to refresh factory tuning values. + */ + if (lis3->pdata) { + lis3->read(lis3, CTRL_REG2, ®); + if (lis3->whoami == WAI_12B) + reg |= CTRL2_BDU | CTRL2_BOOT; + else if (lis3->whoami == WAI_3DLH) + reg |= CTRL2_BOOT_3DLH; + else + reg |= CTRL2_BOOT_8B; + lis3->write(lis3, CTRL_REG2, reg); + + if (lis3->whoami == WAI_3DLH) { + lis3->read(lis3, CTRL_REG4, ®); + reg |= CTRL4_BDU; + lis3->write(lis3, CTRL_REG4, reg); + } + } + + err = lis3lv02d_get_pwron_wait(lis3); + if (err) + return err; + + if (lis3->reg_ctrl) + lis3_context_restore(lis3); + + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_poweron); + + +static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev) +{ + struct lis3lv02d *lis3 = pidev->private; + int x, y, z; + + mutex_lock(&lis3->mutex); + lis3lv02d_get_xyz(lis3, &x, &y, &z); + input_report_abs(pidev->input, ABS_X, x); + input_report_abs(pidev->input, ABS_Y, y); + input_report_abs(pidev->input, ABS_Z, z); + input_sync(pidev->input); + mutex_unlock(&lis3->mutex); +} + +static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) +{ + struct lis3lv02d *lis3 = pidev->private; + + if (lis3->pm_dev) + pm_runtime_get_sync(lis3->pm_dev); + + if (lis3->pdata && lis3->whoami == WAI_8B && lis3->idev) + atomic_set(&lis3->wake_thread, 1); + /* + * Update coordinates for the case where poll interval is 0 and + * the chip in running purely under interrupt control + */ + lis3lv02d_joystick_poll(pidev); +} + +static void lis3lv02d_joystick_close(struct input_polled_dev *pidev) +{ + struct lis3lv02d *lis3 = pidev->private; + + atomic_set(&lis3->wake_thread, 0); + if (lis3->pm_dev) + pm_runtime_put(lis3->pm_dev); +} + +static irqreturn_t lis302dl_interrupt(int irq, void *data) +{ + struct lis3lv02d *lis3 = data; + + if (!test_bit(0, &lis3->misc_opened)) + goto out; + + /* + * Be careful: on some HP laptops the bios force DD when on battery and + * the lid is closed. This leads to interrupts as soon as a little move + * is done. + */ + atomic_inc(&lis3->count); + + wake_up_interruptible(&lis3->misc_wait); + kill_fasync(&lis3->async_queue, SIGIO, POLL_IN); +out: + if (atomic_read(&lis3->wake_thread)) + return IRQ_WAKE_THREAD; + return IRQ_HANDLED; +} + +static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3) +{ + struct input_dev *dev = lis3->idev->input; + u8 click_src; + + mutex_lock(&lis3->mutex); + lis3->read(lis3, CLICK_SRC, &click_src); + + if (click_src & CLICK_SINGLE_X) { + input_report_key(dev, lis3->mapped_btns[0], 1); + input_report_key(dev, lis3->mapped_btns[0], 0); + } + + if (click_src & CLICK_SINGLE_Y) { + input_report_key(dev, lis3->mapped_btns[1], 1); + input_report_key(dev, lis3->mapped_btns[1], 0); + } + + if (click_src & CLICK_SINGLE_Z) { + input_report_key(dev, lis3->mapped_btns[2], 1); + input_report_key(dev, lis3->mapped_btns[2], 0); + } + input_sync(dev); + mutex_unlock(&lis3->mutex); +} + +static inline void lis302dl_data_ready(struct lis3lv02d *lis3, int index) +{ + int dummy; + + /* Dummy read to ack interrupt */ + lis3lv02d_get_xyz(lis3, &dummy, &dummy, &dummy); + lis3->data_ready_count[index]++; +} + +static irqreturn_t lis302dl_interrupt_thread1_8b(int irq, void *data) +{ + struct lis3lv02d *lis3 = data; + u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ1_MASK; + + if (irq_cfg == LIS3_IRQ1_CLICK) + lis302dl_interrupt_handle_click(lis3); + else if (unlikely(irq_cfg == LIS3_IRQ1_DATA_READY)) + lis302dl_data_ready(lis3, IRQ_LINE0); + else + lis3lv02d_joystick_poll(lis3->idev); + + return IRQ_HANDLED; +} + +static irqreturn_t lis302dl_interrupt_thread2_8b(int irq, void *data) +{ + struct lis3lv02d *lis3 = data; + u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ2_MASK; + + if (irq_cfg == LIS3_IRQ2_CLICK) + lis302dl_interrupt_handle_click(lis3); + else if (unlikely(irq_cfg == LIS3_IRQ2_DATA_READY)) + lis302dl_data_ready(lis3, IRQ_LINE1); + else + lis3lv02d_joystick_poll(lis3->idev); + + return IRQ_HANDLED; +} + +static int lis3lv02d_misc_open(struct inode *inode, struct file *file) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + if (test_and_set_bit(0, &lis3->misc_opened)) + return -EBUSY; /* already open */ + + if (lis3->pm_dev) + pm_runtime_get_sync(lis3->pm_dev); + + atomic_set(&lis3->count, 0); + return 0; +} + +static int lis3lv02d_misc_release(struct inode *inode, struct file *file) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + clear_bit(0, &lis3->misc_opened); /* release the device */ + if (lis3->pm_dev) + pm_runtime_put(lis3->pm_dev); + return 0; +} + +static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + DECLARE_WAITQUEUE(wait, current); + u32 data; + unsigned char byte_data; + ssize_t retval = 1; + + if (count < 1) + return -EINVAL; + + add_wait_queue(&lis3->misc_wait, &wait); + while (true) { + set_current_state(TASK_INTERRUPTIBLE); + data = atomic_xchg(&lis3->count, 0); + if (data) + break; + + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + + schedule(); + } + + if (data < 255) + byte_data = data; + else + byte_data = 255; + + /* make sure we are not going into copy_to_user() with + * TASK_INTERRUPTIBLE state */ + set_current_state(TASK_RUNNING); + if (copy_to_user(buf, &byte_data, sizeof(byte_data))) + retval = -EFAULT; + +out: + __set_current_state(TASK_RUNNING); + remove_wait_queue(&lis3->misc_wait, &wait); + + return retval; +} + +static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + poll_wait(file, &lis3->misc_wait, wait); + if (atomic_read(&lis3->count)) + return POLLIN | POLLRDNORM; + return 0; +} + +static int lis3lv02d_misc_fasync(int fd, struct file *file, int on) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + return fasync_helper(fd, file, on, &lis3->async_queue); +} + +static const struct file_operations lis3lv02d_misc_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = lis3lv02d_misc_read, + .open = lis3lv02d_misc_open, + .release = lis3lv02d_misc_release, + .poll = lis3lv02d_misc_poll, + .fasync = lis3lv02d_misc_fasync, +}; + +int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) +{ + struct input_dev *input_dev; + int err; + int max_val, fuzz, flat; + int btns[] = {BTN_X, BTN_Y, BTN_Z}; + + if (lis3->idev) + return -EINVAL; + + lis3->idev = input_allocate_polled_device(); + if (!lis3->idev) + return -ENOMEM; + + lis3->idev->poll = lis3lv02d_joystick_poll; + lis3->idev->open = lis3lv02d_joystick_open; + lis3->idev->close = lis3lv02d_joystick_close; + lis3->idev->poll_interval = MDPS_POLL_INTERVAL; + lis3->idev->poll_interval_min = MDPS_POLL_MIN; + lis3->idev->poll_interval_max = MDPS_POLL_MAX; + lis3->idev->private = lis3; + input_dev = lis3->idev->input; + + input_dev->name = "ST LIS3LV02DL Accelerometer"; + input_dev->phys = DRIVER_NAME "/input0"; + input_dev->id.bustype = BUS_HOST; + input_dev->id.vendor = 0; + input_dev->dev.parent = &lis3->pdev->dev; + + set_bit(EV_ABS, input_dev->evbit); + max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY; + if (lis3->whoami == WAI_12B) { + fuzz = LIS3_DEFAULT_FUZZ_12B; + flat = LIS3_DEFAULT_FLAT_12B; + } else { + fuzz = LIS3_DEFAULT_FUZZ_8B; + flat = LIS3_DEFAULT_FLAT_8B; + } + fuzz = (fuzz * lis3->scale) / LIS3_ACCURACY; + flat = (flat * lis3->scale) / LIS3_ACCURACY; + + input_set_abs_params(input_dev, ABS_X, -max_val, max_val, fuzz, flat); + input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat); + input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat); + + lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns); + lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns); + lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns); + + err = input_register_polled_device(lis3->idev); + if (err) { + input_free_polled_device(lis3->idev); + lis3->idev = NULL; + } + + return err; +} +EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); + +void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) +{ + if (lis3->irq) + free_irq(lis3->irq, lis3); + if (lis3->pdata && lis3->pdata->irq2) + free_irq(lis3->pdata->irq2, lis3); + + if (!lis3->idev) + return; + + if (lis3->irq) + misc_deregister(&lis3->miscdev); + input_unregister_polled_device(lis3->idev); + input_free_polled_device(lis3->idev); + lis3->idev = NULL; +} +EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); + +/* Sysfs stuff */ +static void lis3lv02d_sysfs_poweron(struct lis3lv02d *lis3) +{ + /* + * SYSFS functions are fast visitors so put-call + * immediately after the get-call. However, keep + * chip running for a while and schedule delayed + * suspend. This way periodic sysfs calls doesn't + * suffer from relatively long power up time. + */ + + if (lis3->pm_dev) { + pm_runtime_get_sync(lis3->pm_dev); + pm_runtime_put_noidle(lis3->pm_dev); + pm_schedule_suspend(lis3->pm_dev, LIS3_SYSFS_POWERDOWN_DELAY); + } +} + +static ssize_t lis3lv02d_selftest_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + s16 values[3]; + + static const char ok[] = "OK"; + static const char fail[] = "FAIL"; + static const char irq[] = "FAIL_IRQ"; + const char *res; + + lis3lv02d_sysfs_poweron(lis3); + switch (lis3lv02d_selftest(lis3, values)) { + case SELFTEST_FAIL: + res = fail; + break; + case SELFTEST_IRQ: + res = irq; + break; + case SELFTEST_OK: + default: + res = ok; + break; + } + return sprintf(buf, "%s %d %d %d\n", res, + values[0], values[1], values[2]); +} + +static ssize_t lis3lv02d_position_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + int x, y, z; + + lis3lv02d_sysfs_poweron(lis3); + mutex_lock(&lis3->mutex); + lis3lv02d_get_xyz(lis3, &x, &y, &z); + mutex_unlock(&lis3->mutex); + return sprintf(buf, "(%d,%d,%d)\n", x, y, z); +} + +static ssize_t lis3lv02d_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + + lis3lv02d_sysfs_poweron(lis3); + return sprintf(buf, "%d\n", lis3lv02d_get_odr(lis3)); +} + +static ssize_t lis3lv02d_rate_set(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + unsigned long rate; + int ret; + + ret = kstrtoul(buf, 0, &rate); + if (ret) + return ret; + + lis3lv02d_sysfs_poweron(lis3); + if (lis3lv02d_set_odr(lis3, rate)) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR(selftest, S_IRUSR, lis3lv02d_selftest_show, NULL); +static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL); +static DEVICE_ATTR(rate, S_IRUGO | S_IWUSR, lis3lv02d_rate_show, + lis3lv02d_rate_set); + +static struct attribute *lis3lv02d_attributes[] = { + &dev_attr_selftest.attr, + &dev_attr_position.attr, + &dev_attr_rate.attr, + NULL +}; + +static struct attribute_group lis3lv02d_attribute_group = { + .attrs = lis3lv02d_attributes +}; + + +static int lis3lv02d_add_fs(struct lis3lv02d *lis3) +{ + lis3->pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); + if (IS_ERR(lis3->pdev)) + return PTR_ERR(lis3->pdev); + + platform_set_drvdata(lis3->pdev, lis3); + return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); +} + +int lis3lv02d_remove_fs(struct lis3lv02d *lis3) +{ + sysfs_remove_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); + platform_device_unregister(lis3->pdev); + if (lis3->pm_dev) { + /* Barrier after the sysfs remove */ + pm_runtime_barrier(lis3->pm_dev); + + /* SYSFS may have left chip running. Turn off if necessary */ + if (!pm_runtime_suspended(lis3->pm_dev)) + lis3lv02d_poweroff(lis3); + + pm_runtime_disable(lis3->pm_dev); + pm_runtime_set_suspended(lis3->pm_dev); + } + kfree(lis3->reg_cache); + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs); + +static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, + struct lis3lv02d_platform_data *p) +{ + int err; + int ctrl2 = p->hipass_ctrl; + + if (p->click_flags) { + lis3->write(lis3, CLICK_CFG, p->click_flags); + lis3->write(lis3, CLICK_TIMELIMIT, p->click_time_limit); + lis3->write(lis3, CLICK_LATENCY, p->click_latency); + lis3->write(lis3, CLICK_WINDOW, p->click_window); + lis3->write(lis3, CLICK_THSZ, p->click_thresh_z & 0xf); + lis3->write(lis3, CLICK_THSY_X, + (p->click_thresh_x & 0xf) | + (p->click_thresh_y << 4)); + + if (lis3->idev) { + struct input_dev *input_dev = lis3->idev->input; + input_set_capability(input_dev, EV_KEY, BTN_X); + input_set_capability(input_dev, EV_KEY, BTN_Y); + input_set_capability(input_dev, EV_KEY, BTN_Z); + } + } + + if (p->wakeup_flags) { + lis3->write(lis3, FF_WU_CFG_1, p->wakeup_flags); + lis3->write(lis3, FF_WU_THS_1, p->wakeup_thresh & 0x7f); + /* pdata value + 1 to keep this backward compatible*/ + lis3->write(lis3, FF_WU_DURATION_1, p->duration1 + 1); + ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/ + } + + if (p->wakeup_flags2) { + lis3->write(lis3, FF_WU_CFG_2, p->wakeup_flags2); + lis3->write(lis3, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f); + /* pdata value + 1 to keep this backward compatible*/ + lis3->write(lis3, FF_WU_DURATION_2, p->duration2 + 1); + ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/ + } + /* Configure hipass filters */ + lis3->write(lis3, CTRL_REG2, ctrl2); + + if (p->irq2) { + err = request_threaded_irq(p->irq2, + NULL, + lis302dl_interrupt_thread2_8b, + IRQF_TRIGGER_RISING | IRQF_ONESHOT | + (p->irq_flags2 & IRQF_TRIGGER_MASK), + DRIVER_NAME, lis3); + if (err < 0) + pr_err("No second IRQ. Limited functionality\n"); + } +} + +#ifdef CONFIG_OF +int lis3lv02d_init_dt(struct lis3lv02d *lis3) +{ + struct lis3lv02d_platform_data *pdata; + struct device_node *np = lis3->of_node; + u32 val; + s32 sval; + + if (!lis3->of_node) + return 0; + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + if (of_get_property(np, "st,click-single-x", NULL)) + pdata->click_flags |= LIS3_CLICK_SINGLE_X; + if (of_get_property(np, "st,click-double-x", NULL)) + pdata->click_flags |= LIS3_CLICK_DOUBLE_X; + + if (of_get_property(np, "st,click-single-y", NULL)) + pdata->click_flags |= LIS3_CLICK_SINGLE_Y; + if (of_get_property(np, "st,click-double-y", NULL)) + pdata->click_flags |= LIS3_CLICK_DOUBLE_Y; + + if (of_get_property(np, "st,click-single-z", NULL)) + pdata->click_flags |= LIS3_CLICK_SINGLE_Z; + if (of_get_property(np, "st,click-double-z", NULL)) + pdata->click_flags |= LIS3_CLICK_DOUBLE_Z; + + if (!of_property_read_u32(np, "st,click-threshold-x", &val)) + pdata->click_thresh_x = val; + if (!of_property_read_u32(np, "st,click-threshold-y", &val)) + pdata->click_thresh_y = val; + if (!of_property_read_u32(np, "st,click-threshold-z", &val)) + pdata->click_thresh_z = val; + + if (!of_property_read_u32(np, "st,click-time-limit", &val)) + pdata->click_time_limit = val; + if (!of_property_read_u32(np, "st,click-latency", &val)) + pdata->click_latency = val; + if (!of_property_read_u32(np, "st,click-window", &val)) + pdata->click_window = val; + + if (of_get_property(np, "st,irq1-disable", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_DISABLE; + if (of_get_property(np, "st,irq1-ff-wu-1", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_FF_WU_1; + if (of_get_property(np, "st,irq1-ff-wu-2", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_FF_WU_2; + if (of_get_property(np, "st,irq1-data-ready", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_DATA_READY; + if (of_get_property(np, "st,irq1-click", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_CLICK; + + if (of_get_property(np, "st,irq2-disable", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_DISABLE; + if (of_get_property(np, "st,irq2-ff-wu-1", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_FF_WU_1; + if (of_get_property(np, "st,irq2-ff-wu-2", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_FF_WU_2; + if (of_get_property(np, "st,irq2-data-ready", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_DATA_READY; + if (of_get_property(np, "st,irq2-click", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_CLICK; + + if (of_get_property(np, "st,irq-open-drain", NULL)) + pdata->irq_cfg |= LIS3_IRQ_OPEN_DRAIN; + if (of_get_property(np, "st,irq-active-low", NULL)) + pdata->irq_cfg |= LIS3_IRQ_ACTIVE_LOW; + + if (!of_property_read_u32(np, "st,wu-duration-1", &val)) + pdata->duration1 = val; + if (!of_property_read_u32(np, "st,wu-duration-2", &val)) + pdata->duration2 = val; + + if (of_get_property(np, "st,wakeup-x-lo", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_X_LO; + if (of_get_property(np, "st,wakeup-x-hi", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_X_HI; + if (of_get_property(np, "st,wakeup-y-lo", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Y_LO; + if (of_get_property(np, "st,wakeup-y-hi", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Y_HI; + if (of_get_property(np, "st,wakeup-z-lo", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Z_LO; + if (of_get_property(np, "st,wakeup-z-hi", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Z_HI; + if (of_get_property(np, "st,wakeup-threshold", &val)) + pdata->wakeup_thresh = val; + + if (of_get_property(np, "st,wakeup2-x-lo", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_X_LO; + if (of_get_property(np, "st,wakeup2-x-hi", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_X_HI; + if (of_get_property(np, "st,wakeup2-y-lo", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_LO; + if (of_get_property(np, "st,wakeup2-y-hi", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_HI; + if (of_get_property(np, "st,wakeup2-z-lo", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_LO; + if (of_get_property(np, "st,wakeup2-z-hi", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_HI; + if (of_get_property(np, "st,wakeup2-threshold", &val)) + pdata->wakeup_thresh2 = val; + + if (!of_property_read_u32(np, "st,highpass-cutoff-hz", &val)) { + switch (val) { + case 1: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_1HZ; + break; + case 2: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_2HZ; + break; + case 4: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_4HZ; + break; + case 8: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_8HZ; + break; + } + } + + if (of_get_property(np, "st,hipass1-disable", NULL)) + pdata->hipass_ctrl |= LIS3_HIPASS1_DISABLE; + if (of_get_property(np, "st,hipass2-disable", NULL)) + pdata->hipass_ctrl |= LIS3_HIPASS2_DISABLE; + + if (of_property_read_s32(np, "st,axis-x", &sval) == 0) + pdata->axis_x = sval; + if (of_property_read_s32(np, "st,axis-y", &sval) == 0) + pdata->axis_y = sval; + if (of_property_read_s32(np, "st,axis-z", &sval) == 0) + pdata->axis_z = sval; + + if (of_get_property(np, "st,default-rate", NULL)) + pdata->default_rate = val; + + if (of_property_read_s32(np, "st,min-limit-x", &sval) == 0) + pdata->st_min_limits[0] = sval; + if (of_property_read_s32(np, "st,min-limit-y", &sval) == 0) + pdata->st_min_limits[1] = sval; + if (of_property_read_s32(np, "st,min-limit-z", &sval) == 0) + pdata->st_min_limits[2] = sval; + + if (of_property_read_s32(np, "st,max-limit-x", &sval) == 0) + pdata->st_max_limits[0] = sval; + if (of_property_read_s32(np, "st,max-limit-y", &sval) == 0) + pdata->st_max_limits[1] = sval; + if (of_property_read_s32(np, "st,max-limit-z", &sval) == 0) + pdata->st_max_limits[2] = sval; + + + lis3->pdata = pdata; + + return 0; +} + +#else +int lis3lv02d_init_dt(struct lis3lv02d *lis3) +{ + return 0; +} +#endif +EXPORT_SYMBOL_GPL(lis3lv02d_init_dt); + +/* + * Initialise the accelerometer and the various subsystems. + * Should be rather independent of the bus system. + */ +int lis3lv02d_init_device(struct lis3lv02d *lis3) +{ + int err; + irq_handler_t thread_fn; + int irq_flags = 0; + + lis3->whoami = lis3lv02d_read_8(lis3, WHO_AM_I); + + switch (lis3->whoami) { + case WAI_12B: + pr_info("12 bits sensor found\n"); + lis3->read_data = lis3lv02d_read_12; + lis3->mdps_max_val = 2048; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_12B; + lis3->odrs = lis3_12_rates; + lis3->odr_mask = CTRL1_DF0 | CTRL1_DF1; + lis3->scale = LIS3_SENSITIVITY_12B; + lis3->regs = lis3_wai12_regs; + lis3->regs_size = ARRAY_SIZE(lis3_wai12_regs); + break; + case WAI_8B: + pr_info("8 bits sensor found\n"); + lis3->read_data = lis3lv02d_read_8; + lis3->mdps_max_val = 128; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_8_rates; + lis3->odr_mask = CTRL1_DR; + lis3->scale = LIS3_SENSITIVITY_8B; + lis3->regs = lis3_wai8_regs; + lis3->regs_size = ARRAY_SIZE(lis3_wai8_regs); + break; + case WAI_3DC: + pr_info("8 bits 3DC sensor found\n"); + lis3->read_data = lis3lv02d_read_8; + lis3->mdps_max_val = 128; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_3dc_rates; + lis3->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3; + lis3->scale = LIS3_SENSITIVITY_8B; + break; + case WAI_3DLH: + pr_info("16 bits lis331dlh sensor found\n"); + lis3->read_data = lis331dlh_read_data; + lis3->mdps_max_val = 2048; /* 12 bits for 2G */ + lis3->shift_adj = SHIFT_ADJ_2G; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_3dlh_rates; + lis3->odr_mask = CTRL1_DR0 | CTRL1_DR1; + lis3->scale = LIS3DLH_SENSITIVITY_2G; + break; + default: + pr_err("unknown sensor type 0x%X\n", lis3->whoami); + return -EINVAL; + } + + lis3->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs), + sizeof(lis3_wai12_regs)), GFP_KERNEL); + + if (lis3->reg_cache == NULL) { + printk(KERN_ERR DRIVER_NAME "out of memory\n"); + return -ENOMEM; + } + + mutex_init(&lis3->mutex); + atomic_set(&lis3->wake_thread, 0); + + lis3lv02d_add_fs(lis3); + err = lis3lv02d_poweron(lis3); + if (err) { + lis3lv02d_remove_fs(lis3); + return err; + } + + if (lis3->pm_dev) { + pm_runtime_set_active(lis3->pm_dev); + pm_runtime_enable(lis3->pm_dev); + } + + if (lis3lv02d_joystick_enable(lis3)) + pr_err("joystick initialization failed\n"); + + /* passing in platform specific data is purely optional and only + * used by the SPI transport layer at the moment */ + if (lis3->pdata) { + struct lis3lv02d_platform_data *p = lis3->pdata; + + if (lis3->whoami == WAI_8B) + lis3lv02d_8b_configure(lis3, p); + + irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK; + + lis3->irq_cfg = p->irq_cfg; + if (p->irq_cfg) + lis3->write(lis3, CTRL_REG3, p->irq_cfg); + + if (p->default_rate) + lis3lv02d_set_odr(lis3, p->default_rate); + } + + /* bail if we did not get an IRQ from the bus layer */ + if (!lis3->irq) { + pr_debug("No IRQ. Disabling /dev/freefall\n"); + goto out; + } + + /* + * The sensor can generate interrupts for free-fall and direction + * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep + * the things simple and _fast_ we activate it only for free-fall, so + * no need to read register (very slow with ACPI). For the same reason, + * we forbid shared interrupts. + * + * IRQF_TRIGGER_RISING seems pointless on HP laptops because the + * io-apic is not configurable (and generates a warning) but I keep it + * in case of support for other hardware. + */ + if (lis3->pdata && lis3->whoami == WAI_8B) + thread_fn = lis302dl_interrupt_thread1_8b; + else + thread_fn = NULL; + + err = request_threaded_irq(lis3->irq, lis302dl_interrupt, + thread_fn, + IRQF_TRIGGER_RISING | IRQF_ONESHOT | + irq_flags, + DRIVER_NAME, lis3); + + if (err < 0) { + pr_err("Cannot get IRQ\n"); + goto out; + } + + lis3->miscdev.minor = MISC_DYNAMIC_MINOR; + lis3->miscdev.name = "freefall"; + lis3->miscdev.fops = &lis3lv02d_misc_fops; + + if (misc_register(&lis3->miscdev)) + pr_err("misc_register failed\n"); +out: + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_init_device); + +MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver"); +MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d.h b/kernel/drivers/misc/lis3lv02d/lis3lv02d.h new file mode 100644 index 000000000..c439c827e --- /dev/null +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d.h @@ -0,0 +1,331 @@ +/* + * lis3lv02d.h - ST LIS3LV02DL accelerometer driver + * + * Copyright (C) 2007-2008 Yan Burman + * Copyright (C) 2008-2009 Eric Piel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/platform_device.h> +#include <linux/input-polldev.h> +#include <linux/regulator/consumer.h> +#include <linux/miscdevice.h> + +/* + * This driver tries to support the "digital" accelerometer chips from + * STMicroelectronics such as LIS3LV02DL, LIS302DL, LIS3L02DQ, LIS331DL, + * LIS331DLH, LIS35DE, or LIS202DL. They are very similar in terms of + * programming, with almost the same registers. In addition to differing + * on physical properties, they differ on the number of axes (2/3), + * precision (8/12 bits), and special features (freefall detection, + * click...). Unfortunately, not all the differences can be probed via + * a register. They can be connected either via I²C or SPI. + */ + +#include <linux/lis3lv02d.h> + +enum lis3_reg { + WHO_AM_I = 0x0F, + OFFSET_X = 0x16, + OFFSET_Y = 0x17, + OFFSET_Z = 0x18, + GAIN_X = 0x19, + GAIN_Y = 0x1A, + GAIN_Z = 0x1B, + CTRL_REG1 = 0x20, + CTRL_REG2 = 0x21, + CTRL_REG3 = 0x22, + CTRL_REG4 = 0x23, + HP_FILTER_RESET = 0x23, + STATUS_REG = 0x27, + OUTX_L = 0x28, + OUTX_H = 0x29, + OUTX = 0x29, + OUTY_L = 0x2A, + OUTY_H = 0x2B, + OUTY = 0x2B, + OUTZ_L = 0x2C, + OUTZ_H = 0x2D, + OUTZ = 0x2D, +}; + +enum lis302d_reg { + FF_WU_CFG_1 = 0x30, + FF_WU_SRC_1 = 0x31, + FF_WU_THS_1 = 0x32, + FF_WU_DURATION_1 = 0x33, + FF_WU_CFG_2 = 0x34, + FF_WU_SRC_2 = 0x35, + FF_WU_THS_2 = 0x36, + FF_WU_DURATION_2 = 0x37, + CLICK_CFG = 0x38, + CLICK_SRC = 0x39, + CLICK_THSY_X = 0x3B, + CLICK_THSZ = 0x3C, + CLICK_TIMELIMIT = 0x3D, + CLICK_LATENCY = 0x3E, + CLICK_WINDOW = 0x3F, +}; + +enum lis3lv02d_reg { + FF_WU_CFG = 0x30, + FF_WU_SRC = 0x31, + FF_WU_ACK = 0x32, + FF_WU_THS_L = 0x34, + FF_WU_THS_H = 0x35, + FF_WU_DURATION = 0x36, + DD_CFG = 0x38, + DD_SRC = 0x39, + DD_ACK = 0x3A, + DD_THSI_L = 0x3C, + DD_THSI_H = 0x3D, + DD_THSE_L = 0x3E, + DD_THSE_H = 0x3F, +}; + +enum lis3_who_am_i { + WAI_3DLH = 0x32, /* 16 bits: LIS331DLH */ + WAI_3DC = 0x33, /* 8 bits: LIS3DC, HP3DC */ + WAI_12B = 0x3A, /* 12 bits: LIS3LV02D[LQ]... */ + WAI_8B = 0x3B, /* 8 bits: LIS[23]02D[LQ]... */ + WAI_6B = 0x52, /* 6 bits: LIS331DLF - not supported */ +}; + +enum lis3_type { + LIS3LV02D, + LIS3DC, + HP3DC, + LIS2302D, + LIS331DLF, + LIS331DLH, +}; + +enum lis3lv02d_ctrl1_12b { + CTRL1_Xen = 0x01, + CTRL1_Yen = 0x02, + CTRL1_Zen = 0x04, + CTRL1_ST = 0x08, + CTRL1_DF0 = 0x10, + CTRL1_DF1 = 0x20, + CTRL1_PD0 = 0x40, + CTRL1_PD1 = 0x80, +}; + +/* Delta to ctrl1_12b version */ +enum lis3lv02d_ctrl1_8b { + CTRL1_STM = 0x08, + CTRL1_STP = 0x10, + CTRL1_FS = 0x20, + CTRL1_PD = 0x40, + CTRL1_DR = 0x80, +}; + +enum lis3lv02d_ctrl1_3dc { + CTRL1_ODR0 = 0x10, + CTRL1_ODR1 = 0x20, + CTRL1_ODR2 = 0x40, + CTRL1_ODR3 = 0x80, +}; + +enum lis331dlh_ctrl1 { + CTRL1_DR0 = 0x08, + CTRL1_DR1 = 0x10, + CTRL1_PM0 = 0x20, + CTRL1_PM1 = 0x40, + CTRL1_PM2 = 0x80, +}; + +enum lis331dlh_ctrl2 { + CTRL2_HPEN1 = 0x04, + CTRL2_HPEN2 = 0x08, + CTRL2_FDS_3DLH = 0x10, + CTRL2_BOOT_3DLH = 0x80, +}; + +enum lis331dlh_ctrl4 { + CTRL4_STSIGN = 0x08, + CTRL4_BLE = 0x40, + CTRL4_BDU = 0x80, +}; + +enum lis3lv02d_ctrl2 { + CTRL2_DAS = 0x01, + CTRL2_SIM = 0x02, + CTRL2_DRDY = 0x04, + CTRL2_IEN = 0x08, + CTRL2_BOOT = 0x10, + CTRL2_BLE = 0x20, + CTRL2_BDU = 0x40, /* Block Data Update */ + CTRL2_FS = 0x80, /* Full Scale selection */ +}; + +enum lis3lv02d_ctrl4_3dc { + CTRL4_SIM = 0x01, + CTRL4_ST0 = 0x02, + CTRL4_ST1 = 0x04, + CTRL4_FS0 = 0x10, + CTRL4_FS1 = 0x20, +}; + +enum lis302d_ctrl2 { + HP_FF_WU2 = 0x08, + HP_FF_WU1 = 0x04, + CTRL2_BOOT_8B = 0x40, +}; + +enum lis3lv02d_ctrl3 { + CTRL3_CFS0 = 0x01, + CTRL3_CFS1 = 0x02, + CTRL3_FDS = 0x10, + CTRL3_HPFF = 0x20, + CTRL3_HPDD = 0x40, + CTRL3_ECK = 0x80, +}; + +enum lis3lv02d_status_reg { + STATUS_XDA = 0x01, + STATUS_YDA = 0x02, + STATUS_ZDA = 0x04, + STATUS_XYZDA = 0x08, + STATUS_XOR = 0x10, + STATUS_YOR = 0x20, + STATUS_ZOR = 0x40, + STATUS_XYZOR = 0x80, +}; + +enum lis3lv02d_ff_wu_cfg { + FF_WU_CFG_XLIE = 0x01, + FF_WU_CFG_XHIE = 0x02, + FF_WU_CFG_YLIE = 0x04, + FF_WU_CFG_YHIE = 0x08, + FF_WU_CFG_ZLIE = 0x10, + FF_WU_CFG_ZHIE = 0x20, + FF_WU_CFG_LIR = 0x40, + FF_WU_CFG_AOI = 0x80, +}; + +enum lis3lv02d_ff_wu_src { + FF_WU_SRC_XL = 0x01, + FF_WU_SRC_XH = 0x02, + FF_WU_SRC_YL = 0x04, + FF_WU_SRC_YH = 0x08, + FF_WU_SRC_ZL = 0x10, + FF_WU_SRC_ZH = 0x20, + FF_WU_SRC_IA = 0x40, +}; + +enum lis3lv02d_dd_cfg { + DD_CFG_XLIE = 0x01, + DD_CFG_XHIE = 0x02, + DD_CFG_YLIE = 0x04, + DD_CFG_YHIE = 0x08, + DD_CFG_ZLIE = 0x10, + DD_CFG_ZHIE = 0x20, + DD_CFG_LIR = 0x40, + DD_CFG_IEND = 0x80, +}; + +enum lis3lv02d_dd_src { + DD_SRC_XL = 0x01, + DD_SRC_XH = 0x02, + DD_SRC_YL = 0x04, + DD_SRC_YH = 0x08, + DD_SRC_ZL = 0x10, + DD_SRC_ZH = 0x20, + DD_SRC_IA = 0x40, +}; + +enum lis3lv02d_click_src_8b { + CLICK_SINGLE_X = 0x01, + CLICK_DOUBLE_X = 0x02, + CLICK_SINGLE_Y = 0x04, + CLICK_DOUBLE_Y = 0x08, + CLICK_SINGLE_Z = 0x10, + CLICK_DOUBLE_Z = 0x20, + CLICK_IA = 0x40, +}; + +enum lis3lv02d_reg_state { + LIS3_REG_OFF = 0x00, + LIS3_REG_ON = 0x01, +}; + +union axis_conversion { + struct { + int x, y, z; + }; + int as_array[3]; + +}; + +struct lis3lv02d { + void *bus_priv; /* used by the bus layer only */ + struct device *pm_dev; /* for pm_runtime purposes */ + int (*init) (struct lis3lv02d *lis3); + int (*write) (struct lis3lv02d *lis3, int reg, u8 val); + int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret); + int (*blkread) (struct lis3lv02d *lis3, int reg, int len, u8 *ret); + int (*reg_ctrl) (struct lis3lv02d *lis3, bool state); + + int *odrs; /* Supported output data rates */ + u8 *regs; /* Regs to store / restore */ + int regs_size; + u8 *reg_cache; + bool regs_stored; + u8 odr_mask; /* ODR bit mask */ + u8 whoami; /* indicates measurement precision */ + s16 (*read_data) (struct lis3lv02d *lis3, int reg); + int mdps_max_val; + int pwron_delay; + int scale; /* + * relationship between 1 LBS and mG + * (1/1000th of earth gravity) + */ + + struct input_polled_dev *idev; /* input device */ + struct platform_device *pdev; /* platform device */ + struct regulator_bulk_data regulators[2]; + atomic_t count; /* interrupt count after last read */ + union axis_conversion ac; /* hw -> logical axis */ + int mapped_btns[3]; + + u32 irq; /* IRQ number */ + struct fasync_struct *async_queue; /* queue for the misc device */ + wait_queue_head_t misc_wait; /* Wait queue for the misc device */ + unsigned long misc_opened; /* bit0: whether the device is open */ + struct miscdevice miscdev; + + int data_ready_count[2]; + atomic_t wake_thread; + unsigned char irq_cfg; + unsigned int shift_adj; + + struct lis3lv02d_platform_data *pdata; /* for passing board config */ + struct mutex mutex; /* Serialize poll and selftest */ + +#ifdef CONFIG_OF + struct device_node *of_node; +#endif +}; + +int lis3lv02d_init_device(struct lis3lv02d *lis3); +int lis3lv02d_joystick_enable(struct lis3lv02d *lis3); +void lis3lv02d_joystick_disable(struct lis3lv02d *lis3); +void lis3lv02d_poweroff(struct lis3lv02d *lis3); +int lis3lv02d_poweron(struct lis3lv02d *lis3); +int lis3lv02d_remove_fs(struct lis3lv02d *lis3); +int lis3lv02d_init_dt(struct lis3lv02d *lis3); + +extern struct lis3lv02d lis3_dev; diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c new file mode 100644 index 000000000..e3e7f1dc2 --- /dev/null +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -0,0 +1,290 @@ +/* + * drivers/hwmon/lis3lv02d_i2c.c + * + * Implements I2C interface for lis3lv02d (STMicroelectronics) accelerometer. + * Driver is based on corresponding SPI driver written by Daniel Mack + * (lis3lv02d_spi.c (C) 2009 Daniel Mack <daniel@caiaq.de> ). + * + * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo <samu.p.onkalo@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/pm_runtime.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/of_device.h> + +#include "lis3lv02d.h" + +#define DRV_NAME "lis3lv02d_i2c" + +static const char reg_vdd[] = "Vdd"; +static const char reg_vdd_io[] = "Vdd_IO"; + +static int lis3_reg_ctrl(struct lis3lv02d *lis3, bool state) +{ + int ret; + if (state == LIS3_REG_OFF) { + ret = regulator_bulk_disable(ARRAY_SIZE(lis3->regulators), + lis3->regulators); + } else { + ret = regulator_bulk_enable(ARRAY_SIZE(lis3->regulators), + lis3->regulators); + /* Chip needs time to wakeup. Not mentioned in datasheet */ + usleep_range(10000, 20000); + } + return ret; +} + +static inline s32 lis3_i2c_write(struct lis3lv02d *lis3, int reg, u8 value) +{ + struct i2c_client *c = lis3->bus_priv; + return i2c_smbus_write_byte_data(c, reg, value); +} + +static inline s32 lis3_i2c_read(struct lis3lv02d *lis3, int reg, u8 *v) +{ + struct i2c_client *c = lis3->bus_priv; + *v = i2c_smbus_read_byte_data(c, reg); + return 0; +} + +static inline s32 lis3_i2c_blockread(struct lis3lv02d *lis3, int reg, int len, + u8 *v) +{ + struct i2c_client *c = lis3->bus_priv; + reg |= (1 << 7); /* 7th bit enables address auto incrementation */ + return i2c_smbus_read_i2c_block_data(c, reg, len, v); +} + +static int lis3_i2c_init(struct lis3lv02d *lis3) +{ + u8 reg; + int ret; + + lis3_reg_ctrl(lis3, LIS3_REG_ON); + + lis3->read(lis3, WHO_AM_I, ®); + if (reg != lis3->whoami) + printk(KERN_ERR "lis3: power on failure\n"); + + /* power up the device */ + ret = lis3->read(lis3, CTRL_REG1, ®); + if (ret < 0) + return ret; + + if (lis3->whoami == WAI_3DLH) + reg |= CTRL1_PM0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen; + else + reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen; + + return lis3->write(lis3, CTRL_REG1, reg); +} + +/* Default axis mapping but it can be overwritten by platform data */ +static union axis_conversion lis3lv02d_axis_map = + { .as_array = { LIS3_DEV_X, LIS3_DEV_Y, LIS3_DEV_Z } }; + +#ifdef CONFIG_OF +static const struct of_device_id lis3lv02d_i2c_dt_ids[] = { + { .compatible = "st,lis3lv02d" }, + {} +}; +MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids); +#endif + +static int lis3lv02d_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret = 0; + struct lis3lv02d_platform_data *pdata = client->dev.platform_data; + +#ifdef CONFIG_OF + if (of_match_device(lis3lv02d_i2c_dt_ids, &client->dev)) { + lis3_dev.of_node = client->dev.of_node; + ret = lis3lv02d_init_dt(&lis3_dev); + if (ret) + return ret; + pdata = lis3_dev.pdata; + } +#endif + + if (pdata) { + if ((pdata->driver_features & LIS3_USE_BLOCK_READ) && + (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_I2C_BLOCK))) + lis3_dev.blkread = lis3_i2c_blockread; + + if (pdata->axis_x) + lis3lv02d_axis_map.x = pdata->axis_x; + + if (pdata->axis_y) + lis3lv02d_axis_map.y = pdata->axis_y; + + if (pdata->axis_z) + lis3lv02d_axis_map.z = pdata->axis_z; + + if (pdata->setup_resources) + ret = pdata->setup_resources(); + + if (ret) + goto fail; + } + + lis3_dev.regulators[0].supply = reg_vdd; + lis3_dev.regulators[1].supply = reg_vdd_io; + ret = regulator_bulk_get(&client->dev, + ARRAY_SIZE(lis3_dev.regulators), + lis3_dev.regulators); + if (ret < 0) + goto fail; + + lis3_dev.pdata = pdata; + lis3_dev.bus_priv = client; + lis3_dev.init = lis3_i2c_init; + lis3_dev.read = lis3_i2c_read; + lis3_dev.write = lis3_i2c_write; + lis3_dev.irq = client->irq; + lis3_dev.ac = lis3lv02d_axis_map; + lis3_dev.pm_dev = &client->dev; + + i2c_set_clientdata(client, &lis3_dev); + + /* Provide power over the init call */ + lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON); + + ret = lis3lv02d_init_device(&lis3_dev); + + lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF); + + if (ret) + goto fail2; + return 0; + +fail2: + regulator_bulk_free(ARRAY_SIZE(lis3_dev.regulators), + lis3_dev.regulators); +fail: + if (pdata && pdata->release_resources) + pdata->release_resources(); + return ret; +} + +static int lis3lv02d_i2c_remove(struct i2c_client *client) +{ + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + struct lis3lv02d_platform_data *pdata = client->dev.platform_data; + + if (pdata && pdata->release_resources) + pdata->release_resources(); + + lis3lv02d_joystick_disable(lis3); + lis3lv02d_remove_fs(&lis3_dev); + + regulator_bulk_free(ARRAY_SIZE(lis3->regulators), + lis3_dev.regulators); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int lis3lv02d_i2c_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + if (!lis3->pdata || !lis3->pdata->wakeup_flags) + lis3lv02d_poweroff(lis3); + return 0; +} + +static int lis3lv02d_i2c_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + /* + * pm_runtime documentation says that devices should always + * be powered on at resume. Pm_runtime turns them off after system + * wide resume is complete. + */ + if (!lis3->pdata || !lis3->pdata->wakeup_flags || + pm_runtime_suspended(dev)) + lis3lv02d_poweron(lis3); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int lis3_i2c_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + lis3lv02d_poweroff(lis3); + return 0; +} + +static int lis3_i2c_runtime_resume(struct device *dev) +{ + struct i2c_client *client = container_of(dev, struct i2c_client, dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + lis3lv02d_poweron(lis3); + return 0; +} +#endif /* CONFIG_PM */ + +static const struct i2c_device_id lis3lv02d_id[] = { + {"lis3lv02d", LIS3LV02D}, + {"lis331dlh", LIS331DLH}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, lis3lv02d_id); + +static const struct dev_pm_ops lis3_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(lis3lv02d_i2c_suspend, + lis3lv02d_i2c_resume) + SET_RUNTIME_PM_OPS(lis3_i2c_runtime_suspend, + lis3_i2c_runtime_resume, + NULL) +}; + +static struct i2c_driver lis3lv02d_i2c_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .pm = &lis3_pm_ops, + .of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids), + }, + .probe = lis3lv02d_i2c_probe, + .remove = lis3lv02d_i2c_remove, + .id_table = lis3lv02d_id, +}; + +module_i2c_driver(lis3lv02d_i2c_driver); + +MODULE_AUTHOR("Nokia Corporation"); +MODULE_DESCRIPTION("lis3lv02d I2C interface"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c new file mode 100644 index 000000000..b2f6e1651 --- /dev/null +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -0,0 +1,154 @@ +/* + * lis3lv02d_spi - SPI glue layer for lis3lv02d + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * publishhed by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/spi/spi.h> +#include <linux/pm.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/of_device.h> + +#include "lis3lv02d.h" + +#define DRV_NAME "lis3lv02d_spi" +#define LIS3_SPI_READ 0x80 + +static int lis3_spi_read(struct lis3lv02d *lis3, int reg, u8 *v) +{ + struct spi_device *spi = lis3->bus_priv; + int ret = spi_w8r8(spi, reg | LIS3_SPI_READ); + if (ret < 0) + return -EINVAL; + + *v = (u8) ret; + return 0; +} + +static int lis3_spi_write(struct lis3lv02d *lis3, int reg, u8 val) +{ + u8 tmp[2] = { reg, val }; + struct spi_device *spi = lis3->bus_priv; + return spi_write(spi, tmp, sizeof(tmp)); +} + +static int lis3_spi_init(struct lis3lv02d *lis3) +{ + u8 reg; + int ret; + + /* power up the device */ + ret = lis3->read(lis3, CTRL_REG1, ®); + if (ret < 0) + return ret; + + reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen; + return lis3->write(lis3, CTRL_REG1, reg); +} + +static union axis_conversion lis3lv02d_axis_normal = + { .as_array = { 1, 2, 3 } }; + +#ifdef CONFIG_OF +static const struct of_device_id lis302dl_spi_dt_ids[] = { + { .compatible = "st,lis302dl-spi" }, + {} +}; +MODULE_DEVICE_TABLE(of, lis302dl_spi_dt_ids); +#endif + +static int lis302dl_spi_probe(struct spi_device *spi) +{ + int ret; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + ret = spi_setup(spi); + if (ret < 0) + return ret; + + lis3_dev.bus_priv = spi; + lis3_dev.init = lis3_spi_init; + lis3_dev.read = lis3_spi_read; + lis3_dev.write = lis3_spi_write; + lis3_dev.irq = spi->irq; + lis3_dev.ac = lis3lv02d_axis_normal; + lis3_dev.pdata = spi->dev.platform_data; + +#ifdef CONFIG_OF + if (of_match_device(lis302dl_spi_dt_ids, &spi->dev)) { + lis3_dev.of_node = spi->dev.of_node; + ret = lis3lv02d_init_dt(&lis3_dev); + if (ret) + return ret; + } +#endif + spi_set_drvdata(spi, &lis3_dev); + + return lis3lv02d_init_device(&lis3_dev); +} + +static int lis302dl_spi_remove(struct spi_device *spi) +{ + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + lis3lv02d_joystick_disable(lis3); + lis3lv02d_poweroff(lis3); + + return lis3lv02d_remove_fs(&lis3_dev); +} + +#ifdef CONFIG_PM_SLEEP +static int lis3lv02d_spi_suspend(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + + if (!lis3->pdata || !lis3->pdata->wakeup_flags) + lis3lv02d_poweroff(&lis3_dev); + + return 0; +} + +static int lis3lv02d_spi_resume(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + + if (!lis3->pdata || !lis3->pdata->wakeup_flags) + lis3lv02d_poweron(lis3); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend, + lis3lv02d_spi_resume); + +static struct spi_driver lis302dl_spi_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .pm = &lis3lv02d_spi_pm, + .of_match_table = of_match_ptr(lis302dl_spi_dt_ids), + }, + .probe = lis302dl_spi_probe, + .remove = lis302dl_spi_remove, +}; + +module_spi_driver(lis302dl_spi_driver); + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("lis3lv02d SPI glue layer"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:" DRV_NAME); diff --git a/kernel/drivers/misc/lkdtm.c b/kernel/drivers/misc/lkdtm.c new file mode 100644 index 000000000..b5abe3412 --- /dev/null +++ b/kernel/drivers/misc/lkdtm.c @@ -0,0 +1,873 @@ +/* + * Kprobe module for testing crash dumps + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Ankita Garg <ankita@in.ibm.com> + * + * This module induces system failures at predefined crashpoints to + * evaluate the reliability of crash dumps obtained using different dumping + * solutions. + * + * It is adapted from the Linux Kernel Dump Test Tool by + * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net> + * + * Debugfs support added by Simon Kagstrom <simon.kagstrom@netinsight.net> + * + * See Documentation/fault-injection/provoke-crashes.txt for instructions + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/buffer_head.h> +#include <linux/kprobes.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/hrtimer.h> +#include <linux/slab.h> +#include <scsi/scsi_cmnd.h> +#include <linux/debugfs.h> +#include <linux/vmalloc.h> +#include <linux/mman.h> +#include <asm/cacheflush.h> + +#ifdef CONFIG_IDE +#include <linux/ide.h> +#endif + +/* + * Make sure our attempts to over run the kernel stack doesn't trigger + * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we + * recurse past the end of THREAD_SIZE by default. + */ +#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0) +#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2) +#else +#define REC_STACK_SIZE (THREAD_SIZE / 8) +#endif +#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2) + +#define DEFAULT_COUNT 10 +#define EXEC_SIZE 64 + +enum cname { + CN_INVALID, + CN_INT_HARDWARE_ENTRY, + CN_INT_HW_IRQ_EN, + CN_INT_TASKLET_ENTRY, + CN_FS_DEVRW, + CN_MEM_SWAPOUT, + CN_TIMERADD, + CN_SCSI_DISPATCH_CMD, + CN_IDE_CORE_CP, + CN_DIRECT, +}; + +enum ctype { + CT_NONE, + CT_PANIC, + CT_BUG, + CT_WARNING, + CT_EXCEPTION, + CT_LOOP, + CT_OVERFLOW, + CT_CORRUPT_STACK, + CT_UNALIGNED_LOAD_STORE_WRITE, + CT_OVERWRITE_ALLOCATION, + CT_WRITE_AFTER_FREE, + CT_SOFTLOCKUP, + CT_HARDLOCKUP, + CT_SPINLOCKUP, + CT_HUNG_TASK, + CT_EXEC_DATA, + CT_EXEC_STACK, + CT_EXEC_KMALLOC, + CT_EXEC_VMALLOC, + CT_EXEC_USERSPACE, + CT_ACCESS_USERSPACE, + CT_WRITE_RO, + CT_WRITE_KERN, +}; + +static char* cp_name[] = { + "INT_HARDWARE_ENTRY", + "INT_HW_IRQ_EN", + "INT_TASKLET_ENTRY", + "FS_DEVRW", + "MEM_SWAPOUT", + "TIMERADD", + "SCSI_DISPATCH_CMD", + "IDE_CORE_CP", + "DIRECT", +}; + +static char* cp_type[] = { + "PANIC", + "BUG", + "WARNING", + "EXCEPTION", + "LOOP", + "OVERFLOW", + "CORRUPT_STACK", + "UNALIGNED_LOAD_STORE_WRITE", + "OVERWRITE_ALLOCATION", + "WRITE_AFTER_FREE", + "SOFTLOCKUP", + "HARDLOCKUP", + "SPINLOCKUP", + "HUNG_TASK", + "EXEC_DATA", + "EXEC_STACK", + "EXEC_KMALLOC", + "EXEC_VMALLOC", + "EXEC_USERSPACE", + "ACCESS_USERSPACE", + "WRITE_RO", + "WRITE_KERN", +}; + +static struct jprobe lkdtm; + +static int lkdtm_parse_commandline(void); +static void lkdtm_handler(void); + +static char* cpoint_name; +static char* cpoint_type; +static int cpoint_count = DEFAULT_COUNT; +static int recur_count = REC_NUM_DEFAULT; + +static enum cname cpoint = CN_INVALID; +static enum ctype cptype = CT_NONE; +static int count = DEFAULT_COUNT; +static DEFINE_SPINLOCK(count_lock); +static DEFINE_SPINLOCK(lock_me_up); + +static u8 data_area[EXEC_SIZE]; + +static const unsigned long rodata = 0xAA55AA55; + +module_param(recur_count, int, 0644); +MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test"); +module_param(cpoint_name, charp, 0444); +MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed"); +module_param(cpoint_type, charp, 0444); +MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\ + "hitting the crash point"); +module_param(cpoint_count, int, 0644); +MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\ + "crash point is to be hit to trigger action"); + +static unsigned int jp_do_irq(unsigned int irq) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static irqreturn_t jp_handle_irq_event(unsigned int irq, + struct irqaction *action) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static void jp_tasklet_action(struct softirq_action *a) +{ + lkdtm_handler(); + jprobe_return(); +} + +static void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) +{ + lkdtm_handler(); + jprobe_return(); +} + +struct scan_control; + +static unsigned long jp_shrink_inactive_list(unsigned long max_scan, + struct zone *zone, + struct scan_control *sc) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +static int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} + +#ifdef CONFIG_IDE +static int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file, + struct block_device *bdev, unsigned int cmd, + unsigned long arg) +{ + lkdtm_handler(); + jprobe_return(); + return 0; +} +#endif + +/* Return the crashpoint number or NONE if the name is invalid */ +static enum ctype parse_cp_type(const char *what, size_t count) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cp_type); i++) { + if (!strcmp(what, cp_type[i])) + return i + 1; + } + + return CT_NONE; +} + +static const char *cp_type_to_str(enum ctype type) +{ + if (type == CT_NONE || type < 0 || type > ARRAY_SIZE(cp_type)) + return "None"; + + return cp_type[type - 1]; +} + +static const char *cp_name_to_str(enum cname name) +{ + if (name == CN_INVALID || name < 0 || name > ARRAY_SIZE(cp_name)) + return "INVALID"; + + return cp_name[name - 1]; +} + + +static int lkdtm_parse_commandline(void) +{ + int i; + unsigned long flags; + + if (cpoint_count < 1 || recur_count < 1) + return -EINVAL; + + spin_lock_irqsave(&count_lock, flags); + count = cpoint_count; + spin_unlock_irqrestore(&count_lock, flags); + + /* No special parameters */ + if (!cpoint_type && !cpoint_name) + return 0; + + /* Neither or both of these need to be set */ + if (!cpoint_type || !cpoint_name) + return -EINVAL; + + cptype = parse_cp_type(cpoint_type, strlen(cpoint_type)); + if (cptype == CT_NONE) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(cp_name); i++) { + if (!strcmp(cpoint_name, cp_name[i])) { + cpoint = i + 1; + return 0; + } + } + + /* Could not find a valid crash point */ + return -EINVAL; +} + +static int recursive_loop(int remaining) +{ + char buf[REC_STACK_SIZE]; + + /* Make sure compiler does not optimize this away. */ + memset(buf, (remaining & 0xff) | 0x1, REC_STACK_SIZE); + if (!remaining) + return 0; + else + return recursive_loop(remaining - 1); +} + +static void do_nothing(void) +{ + return; +} + +/* Must immediately follow do_nothing for size calculuations to work out. */ +static void do_overwritten(void) +{ + pr_info("do_overwritten wasn't overwritten!\n"); + return; +} + +static noinline void corrupt_stack(void) +{ + /* Use default char array length that triggers stack protection. */ + char data[8]; + + memset((void *)data, 0, 64); +} + +static void execute_location(void *dst) +{ + void (*func)(void) = dst; + + pr_info("attempting ok execution at %p\n", do_nothing); + do_nothing(); + + memcpy(dst, do_nothing, EXEC_SIZE); + flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); + pr_info("attempting bad execution at %p\n", func); + func(); +} + +static void execute_user_location(void *dst) +{ + /* Intentionally crossing kernel/user memory boundary. */ + void (*func)(void) = dst; + + pr_info("attempting ok execution at %p\n", do_nothing); + do_nothing(); + + if (copy_to_user((void __user *)dst, do_nothing, EXEC_SIZE)) + return; + flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); + pr_info("attempting bad execution at %p\n", func); + func(); +} + +static void lkdtm_do_action(enum ctype which) +{ + switch (which) { + case CT_PANIC: + panic("dumptest"); + break; + case CT_BUG: + BUG(); + break; + case CT_WARNING: + WARN_ON(1); + break; + case CT_EXCEPTION: + *((int *) 0) = 0; + break; + case CT_LOOP: + for (;;) + ; + break; + case CT_OVERFLOW: + (void) recursive_loop(recur_count); + break; + case CT_CORRUPT_STACK: + corrupt_stack(); + break; + case CT_UNALIGNED_LOAD_STORE_WRITE: { + static u8 data[5] __attribute__((aligned(4))) = {1, 2, + 3, 4, 5}; + u32 *p; + u32 val = 0x12345678; + + p = (u32 *)(data + 1); + if (*p == 0) + val = 0x87654321; + *p = val; + break; + } + case CT_OVERWRITE_ALLOCATION: { + size_t len = 1020; + u32 *data = kmalloc(len, GFP_KERNEL); + + data[1024 / sizeof(u32)] = 0x12345678; + kfree(data); + break; + } + case CT_WRITE_AFTER_FREE: { + size_t len = 1024; + u32 *data = kmalloc(len, GFP_KERNEL); + + kfree(data); + schedule(); + memset(data, 0x78, len); + break; + } + case CT_SOFTLOCKUP: + preempt_disable(); + for (;;) + cpu_relax(); + break; + case CT_HARDLOCKUP: + local_irq_disable(); + for (;;) + cpu_relax(); + break; + case CT_SPINLOCKUP: + /* Must be called twice to trigger. */ + spin_lock(&lock_me_up); + /* Let sparse know we intended to exit holding the lock. */ + __release(&lock_me_up); + break; + case CT_HUNG_TASK: + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + break; + case CT_EXEC_DATA: + execute_location(data_area); + break; + case CT_EXEC_STACK: { + u8 stack_area[EXEC_SIZE]; + execute_location(stack_area); + break; + } + case CT_EXEC_KMALLOC: { + u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL); + execute_location(kmalloc_area); + kfree(kmalloc_area); + break; + } + case CT_EXEC_VMALLOC: { + u32 *vmalloc_area = vmalloc(EXEC_SIZE); + execute_location(vmalloc_area); + vfree(vmalloc_area); + break; + } + case CT_EXEC_USERSPACE: { + unsigned long user_addr; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + execute_user_location((void *)user_addr); + vm_munmap(user_addr, PAGE_SIZE); + break; + } + case CT_ACCESS_USERSPACE: { + unsigned long user_addr, tmp; + unsigned long *ptr; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + + ptr = (unsigned long *)user_addr; + + pr_info("attempting bad read at %p\n", ptr); + tmp = *ptr; + tmp += 0xc0dec0de; + + pr_info("attempting bad write at %p\n", ptr); + *ptr = tmp; + + vm_munmap(user_addr, PAGE_SIZE); + + break; + } + case CT_WRITE_RO: { + unsigned long *ptr; + + ptr = (unsigned long *)&rodata; + + pr_info("attempting bad write at %p\n", ptr); + *ptr ^= 0xabcd1234; + + break; + } + case CT_WRITE_KERN: { + size_t size; + unsigned char *ptr; + + size = (unsigned long)do_overwritten - + (unsigned long)do_nothing; + ptr = (unsigned char *)do_overwritten; + + pr_info("attempting bad %zu byte write at %p\n", size, ptr); + memcpy(ptr, (unsigned char *)do_nothing, size); + flush_icache_range((unsigned long)ptr, + (unsigned long)(ptr + size)); + + do_overwritten(); + break; + } + case CT_NONE: + default: + break; + } + +} + +static void lkdtm_handler(void) +{ + unsigned long flags; + bool do_it = false; + + spin_lock_irqsave(&count_lock, flags); + count--; + pr_info("Crash point %s of type %s hit, trigger in %d rounds\n", + cp_name_to_str(cpoint), cp_type_to_str(cptype), count); + + if (count == 0) { + do_it = true; + count = cpoint_count; + } + spin_unlock_irqrestore(&count_lock, flags); + + if (do_it) + lkdtm_do_action(cptype); +} + +static int lkdtm_register_cpoint(enum cname which) +{ + int ret; + + cpoint = CN_INVALID; + if (lkdtm.entry != NULL) + unregister_jprobe(&lkdtm); + + switch (which) { + case CN_DIRECT: + lkdtm_do_action(cptype); + return 0; + case CN_INT_HARDWARE_ENTRY: + lkdtm.kp.symbol_name = "do_IRQ"; + lkdtm.entry = (kprobe_opcode_t*) jp_do_irq; + break; + case CN_INT_HW_IRQ_EN: + lkdtm.kp.symbol_name = "handle_IRQ_event"; + lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event; + break; + case CN_INT_TASKLET_ENTRY: + lkdtm.kp.symbol_name = "tasklet_action"; + lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action; + break; + case CN_FS_DEVRW: + lkdtm.kp.symbol_name = "ll_rw_block"; + lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block; + break; + case CN_MEM_SWAPOUT: + lkdtm.kp.symbol_name = "shrink_inactive_list"; + lkdtm.entry = (kprobe_opcode_t*) jp_shrink_inactive_list; + break; + case CN_TIMERADD: + lkdtm.kp.symbol_name = "hrtimer_start"; + lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start; + break; + case CN_SCSI_DISPATCH_CMD: + lkdtm.kp.symbol_name = "scsi_dispatch_cmd"; + lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd; + break; + case CN_IDE_CORE_CP: +#ifdef CONFIG_IDE + lkdtm.kp.symbol_name = "generic_ide_ioctl"; + lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl; +#else + pr_info("Crash point not available\n"); + return -EINVAL; +#endif + break; + default: + pr_info("Invalid Crash Point\n"); + return -EINVAL; + } + + cpoint = which; + if ((ret = register_jprobe(&lkdtm)) < 0) { + pr_info("Couldn't register jprobe\n"); + cpoint = CN_INVALID; + } + + return ret; +} + +static ssize_t do_register_entry(enum cname which, struct file *f, + const char __user *user_buf, size_t count, loff_t *off) +{ + char *buf; + int err; + + if (count >= PAGE_SIZE) + return -EINVAL; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (copy_from_user(buf, user_buf, count)) { + free_page((unsigned long) buf); + return -EFAULT; + } + /* NULL-terminate and remove enter */ + buf[count] = '\0'; + strim(buf); + + cptype = parse_cp_type(buf, count); + free_page((unsigned long) buf); + + if (cptype == CT_NONE) + return -EINVAL; + + err = lkdtm_register_cpoint(which); + if (err < 0) + return err; + + *off += count; + + return count; +} + +/* Generic read callback that just prints out the available crash types */ +static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf, + size_t count, loff_t *off) +{ + char *buf; + int i, n, out; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + n = snprintf(buf, PAGE_SIZE, "Available crash types:\n"); + for (i = 0; i < ARRAY_SIZE(cp_type); i++) + n += snprintf(buf + n, PAGE_SIZE - n, "%s\n", cp_type[i]); + buf[n] = '\0'; + + out = simple_read_from_buffer(user_buf, count, off, + buf, n); + free_page((unsigned long) buf); + + return out; +} + +static int lkdtm_debugfs_open(struct inode *inode, struct file *file) +{ + return 0; +} + + +static ssize_t int_hardware_entry(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_INT_HARDWARE_ENTRY, f, buf, count, off); +} + +static ssize_t int_hw_irq_en(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_INT_HW_IRQ_EN, f, buf, count, off); +} + +static ssize_t int_tasklet_entry(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_INT_TASKLET_ENTRY, f, buf, count, off); +} + +static ssize_t fs_devrw_entry(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_FS_DEVRW, f, buf, count, off); +} + +static ssize_t mem_swapout_entry(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_MEM_SWAPOUT, f, buf, count, off); +} + +static ssize_t timeradd_entry(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_TIMERADD, f, buf, count, off); +} + +static ssize_t scsi_dispatch_cmd_entry(struct file *f, + const char __user *buf, size_t count, loff_t *off) +{ + return do_register_entry(CN_SCSI_DISPATCH_CMD, f, buf, count, off); +} + +static ssize_t ide_core_cp_entry(struct file *f, const char __user *buf, + size_t count, loff_t *off) +{ + return do_register_entry(CN_IDE_CORE_CP, f, buf, count, off); +} + +/* Special entry to just crash directly. Available without KPROBEs */ +static ssize_t direct_entry(struct file *f, const char __user *user_buf, + size_t count, loff_t *off) +{ + enum ctype type; + char *buf; + + if (count >= PAGE_SIZE) + return -EINVAL; + if (count < 1) + return -EINVAL; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (copy_from_user(buf, user_buf, count)) { + free_page((unsigned long) buf); + return -EFAULT; + } + /* NULL-terminate and remove enter */ + buf[count] = '\0'; + strim(buf); + + type = parse_cp_type(buf, count); + free_page((unsigned long) buf); + if (type == CT_NONE) + return -EINVAL; + + pr_info("Performing direct entry %s\n", cp_type_to_str(type)); + lkdtm_do_action(type); + *off += count; + + return count; +} + +struct crash_entry { + const char *name; + const struct file_operations fops; +}; + +static const struct crash_entry crash_entries[] = { + {"DIRECT", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = direct_entry} }, + {"INT_HARDWARE_ENTRY", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = int_hardware_entry} }, + {"INT_HW_IRQ_EN", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = int_hw_irq_en} }, + {"INT_TASKLET_ENTRY", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = int_tasklet_entry} }, + {"FS_DEVRW", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = fs_devrw_entry} }, + {"MEM_SWAPOUT", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = mem_swapout_entry} }, + {"TIMERADD", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = timeradd_entry} }, + {"SCSI_DISPATCH_CMD", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = scsi_dispatch_cmd_entry} }, + {"IDE_CORE_CP", {.read = lkdtm_debugfs_read, + .llseek = generic_file_llseek, + .open = lkdtm_debugfs_open, + .write = ide_core_cp_entry} }, +}; + +static struct dentry *lkdtm_debugfs_root; + +static int __init lkdtm_module_init(void) +{ + int ret = -EINVAL; + int n_debugfs_entries = 1; /* Assume only the direct entry */ + int i; + + /* Register debugfs interface */ + lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL); + if (!lkdtm_debugfs_root) { + pr_err("creating root dir failed\n"); + return -ENODEV; + } + +#ifdef CONFIG_KPROBES + n_debugfs_entries = ARRAY_SIZE(crash_entries); +#endif + + for (i = 0; i < n_debugfs_entries; i++) { + const struct crash_entry *cur = &crash_entries[i]; + struct dentry *de; + + de = debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root, + NULL, &cur->fops); + if (de == NULL) { + pr_err("could not create %s\n", cur->name); + goto out_err; + } + } + + if (lkdtm_parse_commandline() == -EINVAL) { + pr_info("Invalid command\n"); + goto out_err; + } + + if (cpoint != CN_INVALID && cptype != CT_NONE) { + ret = lkdtm_register_cpoint(cpoint); + if (ret < 0) { + pr_info("Invalid crash point %d\n", cpoint); + goto out_err; + } + pr_info("Crash point %s of type %s registered\n", + cpoint_name, cpoint_type); + } else { + pr_info("No crash points registered, enable through debugfs\n"); + } + + return 0; + +out_err: + debugfs_remove_recursive(lkdtm_debugfs_root); + return ret; +} + +static void __exit lkdtm_module_exit(void) +{ + debugfs_remove_recursive(lkdtm_debugfs_root); + + unregister_jprobe(&lkdtm); + pr_info("Crash point unregistered\n"); +} + +module_init(lkdtm_module_init); +module_exit(lkdtm_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Kprobe module for testing crash dumps"); diff --git a/kernel/drivers/misc/mei/Kconfig b/kernel/drivers/misc/mei/Kconfig new file mode 100644 index 000000000..d23384dde --- /dev/null +++ b/kernel/drivers/misc/mei/Kconfig @@ -0,0 +1,45 @@ +config INTEL_MEI + tristate "Intel Management Engine Interface" + depends on X86 && PCI && WATCHDOG_CORE + help + The Intel Management Engine (Intel ME) provides Manageability, + Security and Media services for system containing Intel chipsets. + if selected /dev/mei misc device will be created. + + For more information see + <http://software.intel.com/en-us/manageability/> + +config INTEL_MEI_ME + tristate "ME Enabled Intel Chipsets" + select INTEL_MEI + depends on X86 && PCI && WATCHDOG_CORE + help + MEI support for ME Enabled Intel chipsets. + + Supported Chipsets are: + 7 Series Chipset Family + 6 Series Chipset Family + 5 Series Chipset Family + 4 Series Chipset Family + Mobile 4 Series Chipset Family + ICH9 + 82946GZ/GL + 82G35 Express + 82Q963/Q965 + 82P965/G965 + Mobile PM965/GM965 + Mobile GME965/GLE960 + 82Q35 Express + 82G33/G31/P35/P31 Express + 82Q33 Express + 82X38/X48 Express + +config INTEL_MEI_TXE + tristate "Intel Trusted Execution Environment with ME Interface" + select INTEL_MEI + depends on X86 && PCI && WATCHDOG_CORE + help + MEI Support for Trusted Execution Environment device on Intel SoCs + + Supported SoCs: + Intel Bay Trail diff --git a/kernel/drivers/misc/mei/Makefile b/kernel/drivers/misc/mei/Makefile new file mode 100644 index 000000000..518914a82 --- /dev/null +++ b/kernel/drivers/misc/mei/Makefile @@ -0,0 +1,26 @@ +# +# Makefile - Intel Management Engine Interface (Intel MEI) Linux driver +# Copyright (c) 2010-2014, Intel Corporation. +# +obj-$(CONFIG_INTEL_MEI) += mei.o +mei-objs := init.o +mei-objs += hbm.o +mei-objs += interrupt.o +mei-objs += client.o +mei-objs += main.o +mei-objs += amthif.o +mei-objs += wd.o +mei-objs += bus.o +mei-objs += nfc.o +mei-$(CONFIG_DEBUG_FS) += debugfs.o + +obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o +mei-me-objs := pci-me.o +mei-me-objs += hw-me.o + +obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o +mei-txe-objs := pci-txe.o +mei-txe-objs += hw-txe.o + +mei-$(CONFIG_EVENT_TRACING) += mei-trace.o +CFLAGS_mei-trace.o = -I$(src) diff --git a/kernel/drivers/misc/mei/amthif.c b/kernel/drivers/misc/mei/amthif.c new file mode 100644 index 000000000..d2cd53e3f --- /dev/null +++ b/kernel/drivers/misc/mei/amthif.c @@ -0,0 +1,606 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/ioctl.h> +#include <linux/cdev.h> +#include <linux/list.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/uuid.h> +#include <linux/jiffies.h> +#include <linux/uaccess.h> +#include <linux/slab.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +const uuid_le mei_amthif_guid = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d, + 0xac, 0xa8, 0x46, 0xe0, + 0xff, 0x65, 0x81, 0x4c); + +/** + * mei_amthif_reset_params - initializes mei device iamthif + * + * @dev: the device structure + */ +void mei_amthif_reset_params(struct mei_device *dev) +{ + /* reset iamthif parameters. */ + dev->iamthif_current_cb = NULL; + dev->iamthif_canceled = false; + dev->iamthif_state = MEI_IAMTHIF_IDLE; + dev->iamthif_timer = 0; + dev->iamthif_stall_timer = 0; + dev->iamthif_open_count = 0; +} + +/** + * mei_amthif_host_init - mei initialization amthif client. + * + * @dev: the device structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_amthif_host_init(struct mei_device *dev) +{ + struct mei_cl *cl = &dev->iamthif_cl; + struct mei_me_client *me_cl; + int ret; + + dev->iamthif_state = MEI_IAMTHIF_IDLE; + + mei_cl_init(cl, dev); + + me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid); + if (!me_cl) { + dev_info(dev->dev, "amthif: failed to find the client"); + return -ENOTTY; + } + + cl->me_client_id = me_cl->client_id; + cl->cl_uuid = me_cl->props.protocol_name; + + /* Assign iamthif_mtu to the value received from ME */ + + dev->iamthif_mtu = me_cl->props.max_msg_length; + dev_dbg(dev->dev, "IAMTHIF_MTU = %d\n", dev->iamthif_mtu); + + + ret = mei_cl_link(cl, MEI_IAMTHIF_HOST_CLIENT_ID); + if (ret < 0) { + dev_err(dev->dev, "amthif: failed cl_link %d\n", ret); + goto out; + } + + ret = mei_cl_connect(cl, NULL); + + dev->iamthif_state = MEI_IAMTHIF_IDLE; + +out: + mei_me_cl_put(me_cl); + return ret; +} + +/** + * mei_amthif_find_read_list_entry - finds a amthilist entry for current file + * + * @dev: the device structure + * @file: pointer to file object + * + * Return: returned a list entry on success, NULL on failure. + */ +struct mei_cl_cb *mei_amthif_find_read_list_entry(struct mei_device *dev, + struct file *file) +{ + struct mei_cl_cb *cb; + + list_for_each_entry(cb, &dev->amthif_rd_complete_list.list, list) + if (cb->file_object == file) + return cb; + return NULL; +} + + +/** + * mei_amthif_read - read data from AMTHIF client + * + * @dev: the device structure + * @file: pointer to file object + * @ubuf: pointer to user data in user space + * @length: data length to read + * @offset: data read offset + * + * Locking: called under "dev->device_lock" lock + * + * Return: + * returned data length on success, + * zero if no data to read, + * negative on failure. + */ +int mei_amthif_read(struct mei_device *dev, struct file *file, + char __user *ubuf, size_t length, loff_t *offset) +{ + struct mei_cl *cl = file->private_data; + struct mei_cl_cb *cb; + unsigned long timeout; + int rets; + int wait_ret; + + /* Only possible if we are in timeout */ + if (!cl) { + dev_err(dev->dev, "bad file ext.\n"); + return -ETIME; + } + + dev_dbg(dev->dev, "checking amthif data\n"); + cb = mei_amthif_find_read_list_entry(dev, file); + + /* Check for if we can block or not*/ + if (cb == NULL && file->f_flags & O_NONBLOCK) + return -EAGAIN; + + + dev_dbg(dev->dev, "waiting for amthif data\n"); + while (cb == NULL) { + /* unlock the Mutex */ + mutex_unlock(&dev->device_lock); + + wait_ret = wait_event_interruptible(dev->iamthif_cl.wait, + (cb = mei_amthif_find_read_list_entry(dev, file))); + + /* Locking again the Mutex */ + mutex_lock(&dev->device_lock); + + if (wait_ret) + return -ERESTARTSYS; + + dev_dbg(dev->dev, "woke up from sleep\n"); + } + + if (cb->status) { + rets = cb->status; + dev_dbg(dev->dev, "read operation failed %d\n", rets); + goto free; + } + + dev_dbg(dev->dev, "Got amthif data\n"); + dev->iamthif_timer = 0; + + timeout = cb->read_time + + mei_secs_to_jiffies(MEI_IAMTHIF_READ_TIMER); + dev_dbg(dev->dev, "amthif timeout = %lud\n", + timeout); + + if (time_after(jiffies, timeout)) { + dev_dbg(dev->dev, "amthif Time out\n"); + /* 15 sec for the message has expired */ + list_del_init(&cb->list); + rets = -ETIME; + goto free; + } + /* if the whole message will fit remove it from the list */ + if (cb->buf_idx >= *offset && length >= (cb->buf_idx - *offset)) + list_del_init(&cb->list); + else if (cb->buf_idx > 0 && cb->buf_idx <= *offset) { + /* end of the message has been reached */ + list_del_init(&cb->list); + rets = 0; + goto free; + } + /* else means that not full buffer will be read and do not + * remove message from deletion list + */ + + dev_dbg(dev->dev, "amthif cb->buf size - %d\n", + cb->buf.size); + dev_dbg(dev->dev, "amthif cb->buf_idx - %lu\n", cb->buf_idx); + + /* length is being truncated to PAGE_SIZE, however, + * the buf_idx may point beyond */ + length = min_t(size_t, length, (cb->buf_idx - *offset)); + + if (copy_to_user(ubuf, cb->buf.data + *offset, length)) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + } else { + rets = length; + if ((*offset + length) < cb->buf_idx) { + *offset += length; + goto out; + } + } +free: + dev_dbg(dev->dev, "free amthif cb memory.\n"); + *offset = 0; + mei_io_cb_free(cb); +out: + return rets; +} + +/** + * mei_amthif_read_start - queue message for sending read credential + * + * @cl: host client + * @file: file pointer of message recipient + * + * Return: 0 on success, <0 on failure. + */ +static int mei_amthif_read_start(struct mei_cl *cl, struct file *file) +{ + struct mei_device *dev = cl->dev; + struct mei_cl_cb *cb; + size_t length = dev->iamthif_mtu; + int rets; + + cb = mei_io_cb_init(cl, MEI_FOP_READ, file); + if (!cb) { + rets = -ENOMEM; + goto err; + } + + rets = mei_io_cb_alloc_buf(cb, length); + if (rets) + goto err; + + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + + dev->iamthif_state = MEI_IAMTHIF_READING; + dev->iamthif_file_object = cb->file_object; + dev->iamthif_current_cb = cb; + + return 0; +err: + mei_io_cb_free(cb); + return rets; +} + +/** + * mei_amthif_send_cmd - send amthif command to the ME + * + * @cl: the host client + * @cb: mei call back struct + * + * Return: 0 on success, <0 on failure. + */ +static int mei_amthif_send_cmd(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev; + int ret; + + if (!cl->dev || !cb) + return -ENODEV; + + dev = cl->dev; + + dev->iamthif_state = MEI_IAMTHIF_WRITING; + dev->iamthif_current_cb = cb; + dev->iamthif_file_object = cb->file_object; + dev->iamthif_canceled = false; + + ret = mei_cl_write(cl, cb, false); + if (ret < 0) + return ret; + + if (cb->completed) + cb->status = mei_amthif_read_start(cl, cb->file_object); + + return 0; +} + +/** + * mei_amthif_run_next_cmd - send next amt command from queue + * + * @dev: the device structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_amthif_run_next_cmd(struct mei_device *dev) +{ + struct mei_cl *cl = &dev->iamthif_cl; + struct mei_cl_cb *cb; + + dev->iamthif_canceled = false; + dev->iamthif_state = MEI_IAMTHIF_IDLE; + dev->iamthif_timer = 0; + dev->iamthif_file_object = NULL; + + dev_dbg(dev->dev, "complete amthif cmd_list cb.\n"); + + cb = list_first_entry_or_null(&dev->amthif_cmd_list.list, + typeof(*cb), list); + if (!cb) + return 0; + + list_del_init(&cb->list); + return mei_amthif_send_cmd(cl, cb); +} + +/** + * mei_amthif_write - write amthif data to amthif client + * + * @cl: host client + * @cb: mei call back struct + * + * Return: 0 on success, <0 on failure. + */ +int mei_amthif_write(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + + struct mei_device *dev; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + if (WARN_ON(!cb)) + return -EINVAL; + + dev = cl->dev; + + list_add_tail(&cb->list, &dev->amthif_cmd_list.list); + return mei_amthif_run_next_cmd(dev); +} + +/** + * mei_amthif_poll - the amthif poll function + * + * @dev: the device structure + * @file: pointer to file structure + * @wait: pointer to poll_table structure + * + * Return: poll mask + * + * Locking: called under "dev->device_lock" lock + */ + +unsigned int mei_amthif_poll(struct mei_device *dev, + struct file *file, poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &dev->iamthif_cl.wait, wait); + + if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE && + dev->iamthif_file_object == file) { + + mask |= POLLIN | POLLRDNORM; + mei_amthif_run_next_cmd(dev); + } + + return mask; +} + + + +/** + * mei_amthif_irq_write - write iamthif command in irq thread context. + * + * @cl: private data of the file object. + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +int mei_amthif_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + int ret; + + ret = mei_cl_irq_write(cl, cb, cmpl_list); + if (ret) + return ret; + + if (cb->completed) + cb->status = mei_amthif_read_start(cl, cb->file_object); + + return 0; +} + +/** + * mei_amthif_irq_read_msg - read routine after ISR to + * handle the read amthif message + * + * @cl: mei client + * @mei_hdr: header of amthif message + * @cmpl_list: completed callbacks list + * + * Return: -ENODEV if cb is NULL 0 otherwise; error message is in cb->status + */ +int mei_amthif_irq_read_msg(struct mei_cl *cl, + struct mei_msg_hdr *mei_hdr, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev; + int ret; + + dev = cl->dev; + + if (dev->iamthif_state != MEI_IAMTHIF_READING) + return 0; + + ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list); + if (ret) + return ret; + + if (!mei_hdr->msg_complete) + return 0; + + dev_dbg(dev->dev, "completed amthif read.\n "); + dev->iamthif_current_cb = NULL; + dev->iamthif_stall_timer = 0; + + return 0; +} + +/** + * mei_amthif_complete - complete amthif callback. + * + * @dev: the device structure. + * @cb: callback block. + */ +void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb) +{ + + if (cb->fop_type == MEI_FOP_WRITE) { + if (!cb->status) { + dev->iamthif_stall_timer = MEI_IAMTHIF_STALL_TIMER; + mei_io_cb_free(cb); + return; + } + /* + * in case of error enqueue the write cb to complete read list + * so it can be propagated to the reader + */ + list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list); + wake_up_interruptible(&dev->iamthif_cl.wait); + return; + } + + if (dev->iamthif_canceled != 1) { + dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE; + dev->iamthif_stall_timer = 0; + list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list); + dev_dbg(dev->dev, "amthif read completed\n"); + dev->iamthif_timer = jiffies; + dev_dbg(dev->dev, "dev->iamthif_timer = %ld\n", + dev->iamthif_timer); + } else { + mei_amthif_run_next_cmd(dev); + } + + dev_dbg(dev->dev, "completing amthif call back.\n"); + wake_up_interruptible(&dev->iamthif_cl.wait); +} + +/** + * mei_clear_list - removes all callbacks associated with file + * from mei_cb_list + * + * @dev: device structure. + * @file: file structure + * @mei_cb_list: callbacks list + * + * mei_clear_list is called to clear resources associated with file + * when application calls close function or Ctrl-C was pressed + * + * Return: true if callback removed from the list, false otherwise + */ +static bool mei_clear_list(struct mei_device *dev, + const struct file *file, struct list_head *mei_cb_list) +{ + struct mei_cl *cl = &dev->iamthif_cl; + struct mei_cl_cb *cb, *next; + bool removed = false; + + /* list all list member */ + list_for_each_entry_safe(cb, next, mei_cb_list, list) { + /* check if list member associated with a file */ + if (file == cb->file_object) { + /* check if cb equal to current iamthif cb */ + if (dev->iamthif_current_cb == cb) { + dev->iamthif_current_cb = NULL; + /* send flow control to iamthif client */ + mei_hbm_cl_flow_control_req(dev, cl); + } + /* free all allocated buffers */ + mei_io_cb_free(cb); + removed = true; + } + } + return removed; +} + +/** + * mei_clear_lists - removes all callbacks associated with file + * + * @dev: device structure + * @file: file structure + * + * mei_clear_lists is called to clear resources associated with file + * when application calls close function or Ctrl-C was pressed + * + * Return: true if callback removed from the list, false otherwise + */ +static bool mei_clear_lists(struct mei_device *dev, struct file *file) +{ + bool removed = false; + + /* remove callbacks associated with a file */ + mei_clear_list(dev, file, &dev->amthif_cmd_list.list); + if (mei_clear_list(dev, file, &dev->amthif_rd_complete_list.list)) + removed = true; + + mei_clear_list(dev, file, &dev->ctrl_rd_list.list); + + if (mei_clear_list(dev, file, &dev->ctrl_wr_list.list)) + removed = true; + + if (mei_clear_list(dev, file, &dev->write_waiting_list.list)) + removed = true; + + if (mei_clear_list(dev, file, &dev->write_list.list)) + removed = true; + + /* check if iamthif_current_cb not NULL */ + if (dev->iamthif_current_cb && !removed) { + /* check file and iamthif current cb association */ + if (dev->iamthif_current_cb->file_object == file) { + /* remove cb */ + mei_io_cb_free(dev->iamthif_current_cb); + dev->iamthif_current_cb = NULL; + removed = true; + } + } + return removed; +} + +/** +* mei_amthif_release - the release function +* +* @dev: device structure +* @file: pointer to file structure +* +* Return: 0 on success, <0 on error +*/ +int mei_amthif_release(struct mei_device *dev, struct file *file) +{ + if (dev->iamthif_open_count > 0) + dev->iamthif_open_count--; + + if (dev->iamthif_file_object == file && + dev->iamthif_state != MEI_IAMTHIF_IDLE) { + + dev_dbg(dev->dev, "amthif canceled iamthif state %d\n", + dev->iamthif_state); + dev->iamthif_canceled = true; + if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE) { + dev_dbg(dev->dev, "run next amthif iamthif cb\n"); + mei_amthif_run_next_cmd(dev); + } + } + + if (mei_clear_lists(dev, file)) + dev->iamthif_state = MEI_IAMTHIF_IDLE; + + return 0; +} diff --git a/kernel/drivers/misc/mei/bus.c b/kernel/drivers/misc/mei/bus.c new file mode 100644 index 000000000..4cf38c398 --- /dev/null +++ b/kernel/drivers/misc/mei/bus.c @@ -0,0 +1,537 @@ +/* + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2012-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/interrupt.h> +#include <linux/mei_cl_bus.h> + +#include "mei_dev.h" +#include "client.h" + +#define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver) +#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) + +static int mei_cl_device_match(struct device *dev, struct device_driver *drv) +{ + struct mei_cl_device *device = to_mei_cl_device(dev); + struct mei_cl_driver *driver = to_mei_cl_driver(drv); + const struct mei_cl_device_id *id; + + if (!device) + return 0; + + if (!driver || !driver->id_table) + return 0; + + id = driver->id_table; + + while (id->name[0]) { + if (!strncmp(dev_name(dev), id->name, sizeof(id->name))) + return 1; + + id++; + } + + return 0; +} + +static int mei_cl_device_probe(struct device *dev) +{ + struct mei_cl_device *device = to_mei_cl_device(dev); + struct mei_cl_driver *driver; + struct mei_cl_device_id id; + + if (!device) + return 0; + + driver = to_mei_cl_driver(dev->driver); + if (!driver || !driver->probe) + return -ENODEV; + + dev_dbg(dev, "Device probe\n"); + + strlcpy(id.name, dev_name(dev), sizeof(id.name)); + + return driver->probe(device, &id); +} + +static int mei_cl_device_remove(struct device *dev) +{ + struct mei_cl_device *device = to_mei_cl_device(dev); + struct mei_cl_driver *driver; + + if (!device || !dev->driver) + return 0; + + if (device->event_cb) { + device->event_cb = NULL; + cancel_work_sync(&device->event_work); + } + + driver = to_mei_cl_driver(dev->driver); + if (!driver->remove) { + dev->driver = NULL; + + return 0; + } + + return driver->remove(device); +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + int len; + + len = snprintf(buf, PAGE_SIZE, "mei:%s\n", dev_name(dev)); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *mei_cl_dev_attrs[] = { + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mei_cl_dev); + +static int mei_cl_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + if (add_uevent_var(env, "MODALIAS=mei:%s", dev_name(dev))) + return -ENOMEM; + + return 0; +} + +static struct bus_type mei_cl_bus_type = { + .name = "mei", + .dev_groups = mei_cl_dev_groups, + .match = mei_cl_device_match, + .probe = mei_cl_device_probe, + .remove = mei_cl_device_remove, + .uevent = mei_cl_uevent, +}; + +static void mei_cl_dev_release(struct device *dev) +{ + kfree(to_mei_cl_device(dev)); +} + +static struct device_type mei_cl_device_type = { + .release = mei_cl_dev_release, +}; + +struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *dev, + uuid_le uuid) +{ + struct mei_cl *cl; + + list_for_each_entry(cl, &dev->device_list, device_link) { + if (!uuid_le_cmp(uuid, cl->cl_uuid)) + return cl; + } + + return NULL; +} +struct mei_cl_device *mei_cl_add_device(struct mei_device *dev, + uuid_le uuid, char *name, + struct mei_cl_ops *ops) +{ + struct mei_cl_device *device; + struct mei_cl *cl; + int status; + + cl = mei_cl_bus_find_cl_by_uuid(dev, uuid); + if (cl == NULL) + return NULL; + + device = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); + if (!device) + return NULL; + + device->cl = cl; + device->ops = ops; + + device->dev.parent = dev->dev; + device->dev.bus = &mei_cl_bus_type; + device->dev.type = &mei_cl_device_type; + + dev_set_name(&device->dev, "%s", name); + + status = device_register(&device->dev); + if (status) { + dev_err(dev->dev, "Failed to register MEI device\n"); + kfree(device); + return NULL; + } + + cl->device = device; + + dev_dbg(&device->dev, "client %s registered\n", name); + + return device; +} +EXPORT_SYMBOL_GPL(mei_cl_add_device); + +void mei_cl_remove_device(struct mei_cl_device *device) +{ + device_unregister(&device->dev); +} +EXPORT_SYMBOL_GPL(mei_cl_remove_device); + +int __mei_cl_driver_register(struct mei_cl_driver *driver, struct module *owner) +{ + int err; + + driver->driver.name = driver->name; + driver->driver.owner = owner; + driver->driver.bus = &mei_cl_bus_type; + + err = driver_register(&driver->driver); + if (err) + return err; + + pr_debug("mei: driver [%s] registered\n", driver->driver.name); + + return 0; +} +EXPORT_SYMBOL_GPL(__mei_cl_driver_register); + +void mei_cl_driver_unregister(struct mei_cl_driver *driver) +{ + driver_unregister(&driver->driver); + + pr_debug("mei: driver [%s] unregistered\n", driver->driver.name); +} +EXPORT_SYMBOL_GPL(mei_cl_driver_unregister); + +static ssize_t ___mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, + bool blocking) +{ + struct mei_device *dev; + struct mei_me_client *me_cl = NULL; + struct mei_cl_cb *cb = NULL; + ssize_t rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + + /* Check if we have an ME client device */ + me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); + if (!me_cl) { + rets = -ENOTTY; + goto out; + } + + if (length > me_cl->props.max_msg_length) { + rets = -EFBIG; + goto out; + } + + cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, NULL); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + memcpy(cb->buf.data, buf, length); + + rets = mei_cl_write(cl, cb, blocking); + +out: + mei_me_cl_put(me_cl); + mutex_unlock(&dev->device_lock); + if (rets < 0) + mei_io_cb_free(cb); + + return rets; +} + +ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + size_t r_length; + ssize_t rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + cb = mei_cl_read_cb(cl, NULL); + if (cb) + goto copy; + + rets = mei_cl_read_start(cl, length, NULL); + if (rets && rets != -EBUSY) + goto out; + + if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) { + + mutex_unlock(&dev->device_lock); + + if (wait_event_interruptible(cl->rx_wait, + (!list_empty(&cl->rd_completed)) || + (!mei_cl_is_connected(cl)))) { + + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + rets = -EBUSY; + goto out; + } + } + + cb = mei_cl_read_cb(cl, NULL); + if (!cb) { + rets = 0; + goto out; + } + +copy: + if (cb->status) { + rets = cb->status; + goto free; + } + + r_length = min_t(size_t, length, cb->buf_idx); + memcpy(buf, cb->buf.data, r_length); + rets = r_length; + +free: + mei_io_cb_free(cb); +out: + mutex_unlock(&dev->device_lock); + + return rets; +} + +inline ssize_t __mei_cl_async_send(struct mei_cl *cl, u8 *buf, size_t length) +{ + return ___mei_cl_send(cl, buf, length, 0); +} + +inline ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length) +{ + return ___mei_cl_send(cl, buf, length, 1); +} + +ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length) +{ + struct mei_cl *cl = device->cl; + + if (cl == NULL) + return -ENODEV; + + if (device->ops && device->ops->send) + return device->ops->send(device, buf, length); + + return __mei_cl_send(cl, buf, length); +} +EXPORT_SYMBOL_GPL(mei_cl_send); + +ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length) +{ + struct mei_cl *cl = device->cl; + + if (cl == NULL) + return -ENODEV; + + if (device->ops && device->ops->recv) + return device->ops->recv(device, buf, length); + + return __mei_cl_recv(cl, buf, length); +} +EXPORT_SYMBOL_GPL(mei_cl_recv); + +static void mei_bus_event_work(struct work_struct *work) +{ + struct mei_cl_device *device; + + device = container_of(work, struct mei_cl_device, event_work); + + if (device->event_cb) + device->event_cb(device, device->events, device->event_context); + + device->events = 0; + + /* Prepare for the next read */ + mei_cl_read_start(device->cl, 0, NULL); +} + +int mei_cl_register_event_cb(struct mei_cl_device *device, + mei_cl_event_cb_t event_cb, void *context) +{ + if (device->event_cb) + return -EALREADY; + + device->events = 0; + device->event_cb = event_cb; + device->event_context = context; + INIT_WORK(&device->event_work, mei_bus_event_work); + + mei_cl_read_start(device->cl, 0, NULL); + + return 0; +} +EXPORT_SYMBOL_GPL(mei_cl_register_event_cb); + +void *mei_cl_get_drvdata(const struct mei_cl_device *device) +{ + return dev_get_drvdata(&device->dev); +} +EXPORT_SYMBOL_GPL(mei_cl_get_drvdata); + +void mei_cl_set_drvdata(struct mei_cl_device *device, void *data) +{ + dev_set_drvdata(&device->dev, data); +} +EXPORT_SYMBOL_GPL(mei_cl_set_drvdata); + +int mei_cl_enable_device(struct mei_cl_device *device) +{ + int err; + struct mei_device *dev; + struct mei_cl *cl = device->cl; + + if (cl == NULL) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + err = mei_cl_connect(cl, NULL); + if (err < 0) { + mutex_unlock(&dev->device_lock); + dev_err(dev->dev, "Could not connect to the ME client"); + + return err; + } + + mutex_unlock(&dev->device_lock); + + if (device->event_cb) + mei_cl_read_start(device->cl, 0, NULL); + + if (!device->ops || !device->ops->enable) + return 0; + + return device->ops->enable(device); +} +EXPORT_SYMBOL_GPL(mei_cl_enable_device); + +int mei_cl_disable_device(struct mei_cl_device *device) +{ + int err; + struct mei_device *dev; + struct mei_cl *cl = device->cl; + + if (cl == NULL) + return -ENODEV; + + dev = cl->dev; + + if (device->ops && device->ops->disable) + device->ops->disable(device); + + device->event_cb = NULL; + + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + dev_err(dev->dev, "Already disconnected"); + err = 0; + goto out; + } + + cl->state = MEI_FILE_DISCONNECTING; + + err = mei_cl_disconnect(cl); + if (err < 0) { + dev_err(dev->dev, "Could not disconnect from the ME client"); + goto out; + } + + /* Flush queues and remove any pending read */ + mei_cl_flush_queues(cl, NULL); + +out: + mutex_unlock(&dev->device_lock); + return err; + +} +EXPORT_SYMBOL_GPL(mei_cl_disable_device); + +void mei_cl_bus_rx_event(struct mei_cl *cl) +{ + struct mei_cl_device *device = cl->device; + + if (!device || !device->event_cb) + return; + + set_bit(MEI_CL_EVENT_RX, &device->events); + + schedule_work(&device->event_work); +} + +void mei_cl_bus_remove_devices(struct mei_device *dev) +{ + struct mei_cl *cl, *next; + + mutex_lock(&dev->device_lock); + list_for_each_entry_safe(cl, next, &dev->device_list, device_link) { + if (cl->device) + mei_cl_remove_device(cl->device); + + list_del(&cl->device_link); + mei_cl_unlink(cl); + kfree(cl); + } + mutex_unlock(&dev->device_lock); +} + +int __init mei_cl_bus_init(void) +{ + return bus_register(&mei_cl_bus_type); +} + +void __exit mei_cl_bus_exit(void) +{ + bus_unregister(&mei_cl_bus_type); +} diff --git a/kernel/drivers/misc/mei/client.c b/kernel/drivers/misc/mei/client.c new file mode 100644 index 000000000..b2b9f4382 --- /dev/null +++ b/kernel/drivers/misc/mei/client.c @@ -0,0 +1,1360 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/pm_runtime.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +/** + * mei_me_cl_init - initialize me client + * + * @me_cl: me client + */ +void mei_me_cl_init(struct mei_me_client *me_cl) +{ + INIT_LIST_HEAD(&me_cl->list); + kref_init(&me_cl->refcnt); +} + +/** + * mei_me_cl_get - increases me client refcount + * + * @me_cl: me client + * + * Locking: called under "dev->device_lock" lock + * + * Return: me client or NULL + */ +struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl) +{ + if (me_cl && kref_get_unless_zero(&me_cl->refcnt)) + return me_cl; + + return NULL; +} + +/** + * mei_me_cl_release - free me client + * + * Locking: called under "dev->device_lock" lock + * + * @ref: me_client refcount + */ +static void mei_me_cl_release(struct kref *ref) +{ + struct mei_me_client *me_cl = + container_of(ref, struct mei_me_client, refcnt); + + kfree(me_cl); +} + +/** + * mei_me_cl_put - decrease me client refcount and free client if necessary + * + * Locking: called under "dev->device_lock" lock + * + * @me_cl: me client + */ +void mei_me_cl_put(struct mei_me_client *me_cl) +{ + if (me_cl) + kref_put(&me_cl->refcnt, mei_me_cl_release); +} + +/** + * __mei_me_cl_del - delete me client form the list and decrease + * reference counter + * + * @dev: mei device + * @me_cl: me client + * + * Locking: dev->me_clients_rwsem + */ +static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl) +{ + if (!me_cl) + return; + + list_del(&me_cl->list); + mei_me_cl_put(me_cl); +} + +/** + * mei_me_cl_add - add me client to the list + * + * @dev: mei device + * @me_cl: me client + */ +void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl) +{ + down_write(&dev->me_clients_rwsem); + list_add(&me_cl->list, &dev->me_clients); + up_write(&dev->me_clients_rwsem); +} + +/** + * __mei_me_cl_by_uuid - locate me client by uuid + * increases ref count + * + * @dev: mei device + * @uuid: me client uuid + * + * Return: me client or NULL if not found + * + * Locking: dev->me_clients_rwsem + */ +static struct mei_me_client *__mei_me_cl_by_uuid(struct mei_device *dev, + const uuid_le *uuid) +{ + struct mei_me_client *me_cl; + const uuid_le *pn; + + WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem)); + + list_for_each_entry(me_cl, &dev->me_clients, list) { + pn = &me_cl->props.protocol_name; + if (uuid_le_cmp(*uuid, *pn) == 0) + return mei_me_cl_get(me_cl); + } + + return NULL; +} + +/** + * mei_me_cl_by_uuid - locate me client by uuid + * increases ref count + * + * @dev: mei device + * @uuid: me client uuid + * + * Return: me client or NULL if not found + * + * Locking: dev->me_clients_rwsem + */ +struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev, + const uuid_le *uuid) +{ + struct mei_me_client *me_cl; + + down_read(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid(dev, uuid); + up_read(&dev->me_clients_rwsem); + + return me_cl; +} + +/** + * mei_me_cl_by_id - locate me client by client id + * increases ref count + * + * @dev: the device structure + * @client_id: me client id + * + * Return: me client or NULL if not found + * + * Locking: dev->me_clients_rwsem + */ +struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id) +{ + + struct mei_me_client *__me_cl, *me_cl = NULL; + + down_read(&dev->me_clients_rwsem); + list_for_each_entry(__me_cl, &dev->me_clients, list) { + if (__me_cl->client_id == client_id) { + me_cl = mei_me_cl_get(__me_cl); + break; + } + } + up_read(&dev->me_clients_rwsem); + + return me_cl; +} + +/** + * __mei_me_cl_by_uuid_id - locate me client by client id and uuid + * increases ref count + * + * @dev: the device structure + * @uuid: me client uuid + * @client_id: me client id + * + * Return: me client or null if not found + * + * Locking: dev->me_clients_rwsem + */ +static struct mei_me_client *__mei_me_cl_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 client_id) +{ + struct mei_me_client *me_cl; + const uuid_le *pn; + + WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem)); + + list_for_each_entry(me_cl, &dev->me_clients, list) { + pn = &me_cl->props.protocol_name; + if (uuid_le_cmp(*uuid, *pn) == 0 && + me_cl->client_id == client_id) + return mei_me_cl_get(me_cl); + } + + return NULL; +} + + +/** + * mei_me_cl_by_uuid_id - locate me client by client id and uuid + * increases ref count + * + * @dev: the device structure + * @uuid: me client uuid + * @client_id: me client id + * + * Return: me client or null if not found + */ +struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 client_id) +{ + struct mei_me_client *me_cl; + + down_read(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid_id(dev, uuid, client_id); + up_read(&dev->me_clients_rwsem); + + return me_cl; +} + +/** + * mei_me_cl_rm_by_uuid - remove all me clients matching uuid + * + * @dev: the device structure + * @uuid: me client uuid + * + * Locking: called under "dev->device_lock" lock + */ +void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid) +{ + struct mei_me_client *me_cl; + + dev_dbg(dev->dev, "remove %pUl\n", uuid); + + down_write(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid(dev, uuid); + __mei_me_cl_del(dev, me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_me_cl_rm_by_uuid_id - remove all me clients matching client id + * + * @dev: the device structure + * @uuid: me client uuid + * @id: me client id + * + * Locking: called under "dev->device_lock" lock + */ +void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id) +{ + struct mei_me_client *me_cl; + + dev_dbg(dev->dev, "remove %pUl %d\n", uuid, id); + + down_write(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid_id(dev, uuid, id); + __mei_me_cl_del(dev, me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_me_cl_rm_all - remove all me clients + * + * @dev: the device structure + * + * Locking: called under "dev->device_lock" lock + */ +void mei_me_cl_rm_all(struct mei_device *dev) +{ + struct mei_me_client *me_cl, *next; + + down_write(&dev->me_clients_rwsem); + list_for_each_entry_safe(me_cl, next, &dev->me_clients, list) + __mei_me_cl_del(dev, me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_cl_cmp_id - tells if the clients are the same + * + * @cl1: host client 1 + * @cl2: host client 2 + * + * Return: true - if the clients has same host and me ids + * false - otherwise + */ +static inline bool mei_cl_cmp_id(const struct mei_cl *cl1, + const struct mei_cl *cl2) +{ + return cl1 && cl2 && + (cl1->host_client_id == cl2->host_client_id) && + (cl1->me_client_id == cl2->me_client_id); +} + +/** + * mei_io_cb_free - free mei_cb_private related memory + * + * @cb: mei callback struct + */ +void mei_io_cb_free(struct mei_cl_cb *cb) +{ + if (cb == NULL) + return; + + list_del(&cb->list); + kfree(cb->buf.data); + kfree(cb); +} + +/** + * mei_io_cb_init - allocate and initialize io callback + * + * @cl: mei client + * @type: operation type + * @fp: pointer to file structure + * + * Return: mei_cl_cb pointer or NULL; + */ +struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type, + struct file *fp) +{ + struct mei_cl_cb *cb; + + cb = kzalloc(sizeof(struct mei_cl_cb), GFP_KERNEL); + if (!cb) + return NULL; + + INIT_LIST_HEAD(&cb->list); + cb->file_object = fp; + cb->cl = cl; + cb->buf_idx = 0; + cb->fop_type = type; + return cb; +} + +/** + * __mei_io_list_flush - removes and frees cbs belonging to cl. + * + * @list: an instance of our list structure + * @cl: host client, can be NULL for flushing the whole list + * @free: whether to free the cbs + */ +static void __mei_io_list_flush(struct mei_cl_cb *list, + struct mei_cl *cl, bool free) +{ + struct mei_cl_cb *cb, *next; + + /* enable removing everything if no cl is specified */ + list_for_each_entry_safe(cb, next, &list->list, list) { + if (!cl || mei_cl_cmp_id(cl, cb->cl)) { + list_del_init(&cb->list); + if (free) + mei_io_cb_free(cb); + } + } +} + +/** + * mei_io_list_flush - removes list entry belonging to cl. + * + * @list: An instance of our list structure + * @cl: host client + */ +void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl) +{ + __mei_io_list_flush(list, cl, false); +} + +/** + * mei_io_list_free - removes cb belonging to cl and free them + * + * @list: An instance of our list structure + * @cl: host client + */ +static inline void mei_io_list_free(struct mei_cl_cb *list, struct mei_cl *cl) +{ + __mei_io_list_flush(list, cl, true); +} + +/** + * mei_io_cb_alloc_buf - allocate callback buffer + * + * @cb: io callback structure + * @length: size of the buffer + * + * Return: 0 on success + * -EINVAL if cb is NULL + * -ENOMEM if allocation failed + */ +int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length) +{ + if (!cb) + return -EINVAL; + + if (length == 0) + return 0; + + cb->buf.data = kmalloc(length, GFP_KERNEL); + if (!cb->buf.data) + return -ENOMEM; + cb->buf.size = length; + return 0; +} + +/** + * mei_cl_alloc_cb - a convenient wrapper for allocating read cb + * + * @cl: host client + * @length: size of the buffer + * @type: operation type + * @fp: associated file pointer (might be NULL) + * + * Return: cb on success and NULL on failure + */ +struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, + enum mei_cb_file_ops type, struct file *fp) +{ + struct mei_cl_cb *cb; + + cb = mei_io_cb_init(cl, type, fp); + if (!cb) + return NULL; + + if (mei_io_cb_alloc_buf(cb, length)) { + mei_io_cb_free(cb); + return NULL; + } + + return cb; +} + +/** + * mei_cl_read_cb - find this cl's callback in the read list + * for a specific file + * + * @cl: host client + * @fp: file pointer (matching cb file object), may be NULL + * + * Return: cb on success, NULL if cb is not found + */ +struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl, const struct file *fp) +{ + struct mei_cl_cb *cb; + + list_for_each_entry(cb, &cl->rd_completed, list) + if (!fp || fp == cb->file_object) + return cb; + + return NULL; +} + +/** + * mei_cl_read_cb_flush - free client's read pending and completed cbs + * for a specific file + * + * @cl: host client + * @fp: file pointer (matching cb file object), may be NULL + */ +void mei_cl_read_cb_flush(const struct mei_cl *cl, const struct file *fp) +{ + struct mei_cl_cb *cb, *next; + + list_for_each_entry_safe(cb, next, &cl->rd_completed, list) + if (!fp || fp == cb->file_object) + mei_io_cb_free(cb); + + + list_for_each_entry_safe(cb, next, &cl->rd_pending, list) + if (!fp || fp == cb->file_object) + mei_io_cb_free(cb); +} + +/** + * mei_cl_flush_queues - flushes queue lists belonging to cl. + * + * @cl: host client + * @fp: file pointer (matching cb file object), may be NULL + * + * Return: 0 on success, -EINVAL if cl or cl->dev is NULL. + */ +int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp) +{ + struct mei_device *dev; + + if (WARN_ON(!cl || !cl->dev)) + return -EINVAL; + + dev = cl->dev; + + cl_dbg(dev, cl, "remove list entry belonging to cl\n"); + mei_io_list_free(&cl->dev->write_list, cl); + mei_io_list_free(&cl->dev->write_waiting_list, cl); + mei_io_list_flush(&cl->dev->ctrl_wr_list, cl); + mei_io_list_flush(&cl->dev->ctrl_rd_list, cl); + mei_io_list_flush(&cl->dev->amthif_cmd_list, cl); + mei_io_list_flush(&cl->dev->amthif_rd_complete_list, cl); + + mei_cl_read_cb_flush(cl, fp); + + return 0; +} + + +/** + * mei_cl_init - initializes cl. + * + * @cl: host client to be initialized + * @dev: mei device + */ +void mei_cl_init(struct mei_cl *cl, struct mei_device *dev) +{ + memset(cl, 0, sizeof(struct mei_cl)); + init_waitqueue_head(&cl->wait); + init_waitqueue_head(&cl->rx_wait); + init_waitqueue_head(&cl->tx_wait); + INIT_LIST_HEAD(&cl->rd_completed); + INIT_LIST_HEAD(&cl->rd_pending); + INIT_LIST_HEAD(&cl->link); + INIT_LIST_HEAD(&cl->device_link); + cl->writing_state = MEI_IDLE; + cl->dev = dev; +} + +/** + * mei_cl_allocate - allocates cl structure and sets it up. + * + * @dev: mei device + * Return: The allocated file or NULL on failure + */ +struct mei_cl *mei_cl_allocate(struct mei_device *dev) +{ + struct mei_cl *cl; + + cl = kmalloc(sizeof(struct mei_cl), GFP_KERNEL); + if (!cl) + return NULL; + + mei_cl_init(cl, dev); + + return cl; +} + +/** + * mei_cl_link - allocate host id in the host map + * + * @cl: host client + * @id: fixed host id or MEI_HOST_CLIENT_ID_ANY (-1) for generic one + * + * Return: 0 on success + * -EINVAL on incorrect values + * -EMFILE if open count exceeded. + */ +int mei_cl_link(struct mei_cl *cl, int id) +{ + struct mei_device *dev; + long open_handle_count; + + if (WARN_ON(!cl || !cl->dev)) + return -EINVAL; + + dev = cl->dev; + + /* If Id is not assigned get one*/ + if (id == MEI_HOST_CLIENT_ID_ANY) + id = find_first_zero_bit(dev->host_clients_map, + MEI_CLIENTS_MAX); + + if (id >= MEI_CLIENTS_MAX) { + dev_err(dev->dev, "id exceeded %d", MEI_CLIENTS_MAX); + return -EMFILE; + } + + open_handle_count = dev->open_handle_count + dev->iamthif_open_count; + if (open_handle_count >= MEI_MAX_OPEN_HANDLE_COUNT) { + dev_err(dev->dev, "open_handle_count exceeded %d", + MEI_MAX_OPEN_HANDLE_COUNT); + return -EMFILE; + } + + dev->open_handle_count++; + + cl->host_client_id = id; + list_add_tail(&cl->link, &dev->file_list); + + set_bit(id, dev->host_clients_map); + + cl->state = MEI_FILE_INITIALIZING; + + cl_dbg(dev, cl, "link cl\n"); + return 0; +} + +/** + * mei_cl_unlink - remove me_cl from the list + * + * @cl: host client + * + * Return: always 0 + */ +int mei_cl_unlink(struct mei_cl *cl) +{ + struct mei_device *dev; + + /* don't shout on error exit path */ + if (!cl) + return 0; + + /* wd and amthif might not be initialized */ + if (!cl->dev) + return 0; + + dev = cl->dev; + + cl_dbg(dev, cl, "unlink client"); + + if (dev->open_handle_count > 0) + dev->open_handle_count--; + + /* never clear the 0 bit */ + if (cl->host_client_id) + clear_bit(cl->host_client_id, dev->host_clients_map); + + list_del_init(&cl->link); + + cl->state = MEI_FILE_INITIALIZING; + + return 0; +} + + +void mei_host_client_init(struct work_struct *work) +{ + struct mei_device *dev = + container_of(work, struct mei_device, init_work); + struct mei_me_client *me_cl; + + mutex_lock(&dev->device_lock); + + + me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid); + if (me_cl) + mei_amthif_host_init(dev); + mei_me_cl_put(me_cl); + + me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid); + if (me_cl) + mei_wd_host_init(dev); + mei_me_cl_put(me_cl); + + me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_guid); + if (me_cl) + mei_nfc_host_init(dev); + mei_me_cl_put(me_cl); + + + dev->dev_state = MEI_DEV_ENABLED; + dev->reset_count = 0; + mutex_unlock(&dev->device_lock); + + pm_runtime_mark_last_busy(dev->dev); + dev_dbg(dev->dev, "rpm: autosuspend\n"); + pm_runtime_autosuspend(dev->dev); +} + +/** + * mei_hbuf_acquire - try to acquire host buffer + * + * @dev: the device structure + * Return: true if host buffer was acquired + */ +bool mei_hbuf_acquire(struct mei_device *dev) +{ + if (mei_pg_state(dev) == MEI_PG_ON || + mei_pg_in_transition(dev)) { + dev_dbg(dev->dev, "device is in pg\n"); + return false; + } + + if (!dev->hbuf_is_ready) { + dev_dbg(dev->dev, "hbuf is not ready\n"); + return false; + } + + dev->hbuf_is_ready = false; + + return true; +} + +/** + * mei_cl_disconnect - disconnect host client from the me one + * + * @cl: host client + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on success, <0 on failure. + */ +int mei_cl_disconnect(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + cl_dbg(dev, cl, "disconnecting"); + + if (cl->state != MEI_FILE_DISCONNECTING) + return 0; + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL); + rets = cb ? 0 : -ENOMEM; + if (rets) + goto free; + + if (mei_hbuf_acquire(dev)) { + if (mei_hbm_cl_disconnect_req(dev, cl)) { + rets = -ENODEV; + cl_err(dev, cl, "failed to disconnect.\n"); + goto free; + } + cl->timer_count = MEI_CONNECT_TIMEOUT; + mdelay(10); /* Wait for hardware disconnection ready */ + list_add_tail(&cb->list, &dev->ctrl_rd_list.list); + } else { + cl_dbg(dev, cl, "add disconnect cb to control write list\n"); + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + + } + mutex_unlock(&dev->device_lock); + + wait_event_timeout(cl->wait, + MEI_FILE_DISCONNECTED == cl->state, + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + + mutex_lock(&dev->device_lock); + + if (MEI_FILE_DISCONNECTED == cl->state) { + rets = 0; + cl_dbg(dev, cl, "successfully disconnected from FW client.\n"); + } else { + cl_dbg(dev, cl, "timeout on disconnect from FW client.\n"); + rets = -ETIME; + } + + mei_io_list_flush(&dev->ctrl_rd_list, cl); + mei_io_list_flush(&dev->ctrl_wr_list, cl); +free: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + mei_io_cb_free(cb); + return rets; +} + + +/** + * mei_cl_is_other_connecting - checks if other + * client with the same me client id is connecting + * + * @cl: private data of the file object + * + * Return: true if other client is connected, false - otherwise. + */ +bool mei_cl_is_other_connecting(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_cl *ocl; /* the other client */ + + if (WARN_ON(!cl || !cl->dev)) + return false; + + dev = cl->dev; + + list_for_each_entry(ocl, &dev->file_list, link) { + if (ocl->state == MEI_FILE_CONNECTING && + ocl != cl && + cl->me_client_id == ocl->me_client_id) + return true; + + } + + return false; +} + +/** + * mei_cl_connect - connect host client to the me one + * + * @cl: host client + * @file: pointer to file structure + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on success, <0 on failure. + */ +int mei_cl_connect(struct mei_cl *cl, struct file *file) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + cb = mei_io_cb_init(cl, MEI_FOP_CONNECT, file); + rets = cb ? 0 : -ENOMEM; + if (rets) + goto out; + + /* run hbuf acquire last so we don't have to undo */ + if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) { + cl->state = MEI_FILE_CONNECTING; + if (mei_hbm_cl_connect_req(dev, cl)) { + rets = -ENODEV; + goto out; + } + cl->timer_count = MEI_CONNECT_TIMEOUT; + list_add_tail(&cb->list, &dev->ctrl_rd_list.list); + } else { + cl->state = MEI_FILE_INITIALIZING; + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(cl->wait, + (cl->state == MEI_FILE_CONNECTED || + cl->state == MEI_FILE_DISCONNECTED), + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + cl->state = MEI_FILE_DISCONNECTED; + /* something went really wrong */ + if (!cl->status) + cl->status = -EFAULT; + + mei_io_list_flush(&dev->ctrl_rd_list, cl); + mei_io_list_flush(&dev->ctrl_wr_list, cl); + } + + rets = cl->status; + +out: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + mei_io_cb_free(cb); + return rets; +} + +/** + * mei_cl_alloc_linked - allocate and link host client + * + * @dev: the device structure + * @id: fixed host id or MEI_HOST_CLIENT_ID_ANY (-1) for generic one + * + * Return: cl on success ERR_PTR on failure + */ +struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev, int id) +{ + struct mei_cl *cl; + int ret; + + cl = mei_cl_allocate(dev); + if (!cl) { + ret = -ENOMEM; + goto err; + } + + ret = mei_cl_link(cl, id); + if (ret) + goto err; + + return cl; +err: + kfree(cl); + return ERR_PTR(ret); +} + + + +/** + * mei_cl_flow_ctrl_creds - checks flow_control credits for cl. + * + * @cl: private data of the file object + * + * Return: 1 if mei_flow_ctrl_creds >0, 0 - otherwise. + * -ENOENT if mei_cl is not present + * -EINVAL if single_recv_buf == 0 + */ +int mei_cl_flow_ctrl_creds(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_me_client *me_cl; + int rets = 0; + + if (WARN_ON(!cl || !cl->dev)) + return -EINVAL; + + dev = cl->dev; + + if (cl->mei_flow_ctrl_creds > 0) + return 1; + + me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); + if (!me_cl) { + cl_err(dev, cl, "no such me client %d\n", cl->me_client_id); + return -ENOENT; + } + + if (me_cl->mei_flow_ctrl_creds > 0) { + rets = 1; + if (WARN_ON(me_cl->props.single_recv_buf == 0)) + rets = -EINVAL; + } + mei_me_cl_put(me_cl); + return rets; +} + +/** + * mei_cl_flow_ctrl_reduce - reduces flow_control. + * + * @cl: private data of the file object + * + * Return: + * 0 on success + * -ENOENT when me client is not found + * -EINVAL when ctrl credits are <= 0 + */ +int mei_cl_flow_ctrl_reduce(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_me_client *me_cl; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -EINVAL; + + dev = cl->dev; + + me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); + if (!me_cl) { + cl_err(dev, cl, "no such me client %d\n", cl->me_client_id); + return -ENOENT; + } + + if (me_cl->props.single_recv_buf) { + if (WARN_ON(me_cl->mei_flow_ctrl_creds <= 0)) { + rets = -EINVAL; + goto out; + } + me_cl->mei_flow_ctrl_creds--; + } else { + if (WARN_ON(cl->mei_flow_ctrl_creds <= 0)) { + rets = -EINVAL; + goto out; + } + cl->mei_flow_ctrl_creds--; + } + rets = 0; +out: + mei_me_cl_put(me_cl); + return rets; +} + +/** + * mei_cl_read_start - the start read client message function. + * + * @cl: host client + * @length: number of bytes to read + * @fp: pointer to file structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + struct mei_me_client *me_cl; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + /* HW currently supports only one pending read */ + if (!list_empty(&cl->rd_pending)) + return -EBUSY; + + me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); + if (!me_cl) { + cl_err(dev, cl, "no such me client %d\n", cl->me_client_id); + return -ENOTTY; + } + /* always allocate at least client max message */ + length = max_t(size_t, length, me_cl->props.max_msg_length); + mei_me_cl_put(me_cl); + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp); + rets = cb ? 0 : -ENOMEM; + if (rets) + goto out; + + if (mei_hbuf_acquire(dev)) { + rets = mei_hbm_cl_flow_control_req(dev, cl); + if (rets < 0) + goto out; + + list_add_tail(&cb->list, &cl->rd_pending); + } else { + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + } + +out: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + if (rets) + mei_io_cb_free(cb); + + return rets; +} + +/** + * mei_cl_irq_write - write a message to device + * from the interrupt thread context + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise error. + */ +int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev; + struct mei_msg_data *buf; + struct mei_msg_hdr mei_hdr; + size_t len; + u32 msg_slots; + int slots; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + buf = &cb->buf; + + rets = mei_cl_flow_ctrl_creds(cl); + if (rets < 0) + return rets; + + if (rets == 0) { + cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); + return 0; + } + + slots = mei_hbuf_empty_slots(dev); + len = buf->size - cb->buf_idx; + msg_slots = mei_data2slots(len); + + mei_hdr.host_addr = cl->host_client_id; + mei_hdr.me_addr = cl->me_client_id; + mei_hdr.reserved = 0; + mei_hdr.internal = cb->internal; + + if (slots >= msg_slots) { + mei_hdr.length = len; + mei_hdr.msg_complete = 1; + /* Split the message only if we can write the whole host buffer */ + } else if (slots == dev->hbuf_depth) { + msg_slots = slots; + len = (slots * sizeof(u32)) - sizeof(struct mei_msg_hdr); + mei_hdr.length = len; + mei_hdr.msg_complete = 0; + } else { + /* wait for next time the host buffer is empty */ + return 0; + } + + cl_dbg(dev, cl, "buf: size = %d idx = %lu\n", + cb->buf.size, cb->buf_idx); + + rets = mei_write_message(dev, &mei_hdr, buf->data + cb->buf_idx); + if (rets) { + cl->status = rets; + list_move_tail(&cb->list, &cmpl_list->list); + return rets; + } + + cl->status = 0; + cl->writing_state = MEI_WRITING; + cb->buf_idx += mei_hdr.length; + cb->completed = mei_hdr.msg_complete == 1; + + if (mei_hdr.msg_complete) { + if (mei_cl_flow_ctrl_reduce(cl)) + return -EIO; + list_move_tail(&cb->list, &dev->write_waiting_list.list); + } + + return 0; +} + +/** + * mei_cl_write - submit a write cb to mei device + * assumes device_lock is locked + * + * @cl: host client + * @cb: write callback with filled data + * @blocking: block until completed + * + * Return: number of bytes sent on success, <0 on failure. + */ +int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking) +{ + struct mei_device *dev; + struct mei_msg_data *buf; + struct mei_msg_hdr mei_hdr; + int rets; + + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + if (WARN_ON(!cb)) + return -EINVAL; + + dev = cl->dev; + + + buf = &cb->buf; + + cl_dbg(dev, cl, "size=%d\n", buf->size); + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + cb->buf_idx = 0; + cl->writing_state = MEI_IDLE; + + mei_hdr.host_addr = cl->host_client_id; + mei_hdr.me_addr = cl->me_client_id; + mei_hdr.reserved = 0; + mei_hdr.msg_complete = 0; + mei_hdr.internal = cb->internal; + + rets = mei_cl_flow_ctrl_creds(cl); + if (rets < 0) + goto err; + + if (rets == 0) { + cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); + rets = buf->size; + goto out; + } + if (!mei_hbuf_acquire(dev)) { + cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n"); + rets = buf->size; + goto out; + } + + /* Check for a maximum length */ + if (buf->size > mei_hbuf_max_len(dev)) { + mei_hdr.length = mei_hbuf_max_len(dev); + mei_hdr.msg_complete = 0; + } else { + mei_hdr.length = buf->size; + mei_hdr.msg_complete = 1; + } + + rets = mei_write_message(dev, &mei_hdr, buf->data); + if (rets) + goto err; + + cl->writing_state = MEI_WRITING; + cb->buf_idx = mei_hdr.length; + cb->completed = mei_hdr.msg_complete == 1; + +out: + if (mei_hdr.msg_complete) { + rets = mei_cl_flow_ctrl_reduce(cl); + if (rets < 0) + goto err; + + list_add_tail(&cb->list, &dev->write_waiting_list.list); + } else { + list_add_tail(&cb->list, &dev->write_list.list); + } + + + if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) { + + mutex_unlock(&dev->device_lock); + rets = wait_event_interruptible(cl->tx_wait, + cl->writing_state == MEI_WRITE_COMPLETE); + mutex_lock(&dev->device_lock); + /* wait_event_interruptible returns -ERESTARTSYS */ + if (rets) { + if (signal_pending(current)) + rets = -EINTR; + goto err; + } + } + + rets = buf->size; +err: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return rets; +} + + +/** + * mei_cl_complete - processes completed operation for a client + * + * @cl: private data of the file object. + * @cb: callback block. + */ +void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + if (cb->fop_type == MEI_FOP_WRITE) { + mei_io_cb_free(cb); + cb = NULL; + cl->writing_state = MEI_WRITE_COMPLETE; + if (waitqueue_active(&cl->tx_wait)) + wake_up_interruptible(&cl->tx_wait); + + } else if (cb->fop_type == MEI_FOP_READ) { + list_add_tail(&cb->list, &cl->rd_completed); + if (waitqueue_active(&cl->rx_wait)) + wake_up_interruptible_all(&cl->rx_wait); + else + mei_cl_bus_rx_event(cl); + + } +} + + +/** + * mei_cl_all_disconnect - disconnect forcefully all connected clients + * + * @dev: mei device + */ + +void mei_cl_all_disconnect(struct mei_device *dev) +{ + struct mei_cl *cl; + + list_for_each_entry(cl, &dev->file_list, link) { + cl->state = MEI_FILE_DISCONNECTED; + cl->mei_flow_ctrl_creds = 0; + cl->timer_count = 0; + } +} + + +/** + * mei_cl_all_wakeup - wake up all readers and writers they can be interrupted + * + * @dev: mei device + */ +void mei_cl_all_wakeup(struct mei_device *dev) +{ + struct mei_cl *cl; + + list_for_each_entry(cl, &dev->file_list, link) { + if (waitqueue_active(&cl->rx_wait)) { + cl_dbg(dev, cl, "Waking up reading client!\n"); + wake_up_interruptible(&cl->rx_wait); + } + if (waitqueue_active(&cl->tx_wait)) { + cl_dbg(dev, cl, "Waking up writing client!\n"); + wake_up_interruptible(&cl->tx_wait); + } + } +} + +/** + * mei_cl_all_write_clear - clear all pending writes + * + * @dev: mei device + */ +void mei_cl_all_write_clear(struct mei_device *dev) +{ + mei_io_list_free(&dev->write_list, NULL); + mei_io_list_free(&dev->write_waiting_list, NULL); +} + + diff --git a/kernel/drivers/misc/mei/client.h b/kernel/drivers/misc/mei/client.h new file mode 100644 index 000000000..0a39e5d45 --- /dev/null +++ b/kernel/drivers/misc/mei/client.h @@ -0,0 +1,135 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_CLIENT_H_ +#define _MEI_CLIENT_H_ + +#include <linux/types.h> +#include <linux/watchdog.h> +#include <linux/poll.h> +#include <linux/mei.h> + +#include "mei_dev.h" + +/* + * reference counting base function + */ +void mei_me_cl_init(struct mei_me_client *me_cl); +void mei_me_cl_put(struct mei_me_client *me_cl); +struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl); + +void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl); +void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl); + +struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev, + const uuid_le *uuid); +struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id); +struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 client_id); +void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid); +void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 id); +void mei_me_cl_rm_all(struct mei_device *dev); + +/* + * MEI IO Functions + */ +struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type, + struct file *fp); +void mei_io_cb_free(struct mei_cl_cb *priv_cb); +int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length); + + +/** + * mei_io_list_init - Sets up a queue list. + * + * @list: An instance cl callback structure + */ +static inline void mei_io_list_init(struct mei_cl_cb *list) +{ + INIT_LIST_HEAD(&list->list); +} +void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl); + +/* + * MEI Host Client Functions + */ + +struct mei_cl *mei_cl_allocate(struct mei_device *dev); +void mei_cl_init(struct mei_cl *cl, struct mei_device *dev); + + +int mei_cl_link(struct mei_cl *cl, int id); +int mei_cl_unlink(struct mei_cl *cl); + +struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev, int id); + +struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl, + const struct file *fp); +void mei_cl_read_cb_flush(const struct mei_cl *cl, const struct file *fp); +struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, + enum mei_cb_file_ops type, struct file *fp); +int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp); + +int mei_cl_flow_ctrl_creds(struct mei_cl *cl); + +int mei_cl_flow_ctrl_reduce(struct mei_cl *cl); +/* + * MEI input output function prototype + */ + +/** + * mei_cl_is_connected - host client is connected + * + * @cl: host clinet + * + * Return: true if the host clinet is connected + */ +static inline bool mei_cl_is_connected(struct mei_cl *cl) +{ + return cl->state == MEI_FILE_CONNECTED; +} + +bool mei_cl_is_other_connecting(struct mei_cl *cl); +int mei_cl_disconnect(struct mei_cl *cl); +int mei_cl_connect(struct mei_cl *cl, struct file *file); +int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp); +int mei_cl_irq_read_msg(struct mei_cl *cl, struct mei_msg_hdr *hdr, + struct mei_cl_cb *cmpl_list); +int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking); +int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list); + +void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb); + +void mei_host_client_init(struct work_struct *work); + + + +void mei_cl_all_disconnect(struct mei_device *dev); +void mei_cl_all_wakeup(struct mei_device *dev); +void mei_cl_all_write_clear(struct mei_device *dev); + +#define MEI_CL_FMT "cl:host=%02d me=%02d " +#define MEI_CL_PRM(cl) (cl)->host_client_id, (cl)->me_client_id + +#define cl_dbg(dev, cl, format, arg...) \ + dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg) + +#define cl_err(dev, cl, format, arg...) \ + dev_err((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg) + +#endif /* _MEI_CLIENT_H_ */ diff --git a/kernel/drivers/misc/mei/debugfs.c b/kernel/drivers/misc/mei/debugfs.c new file mode 100644 index 000000000..d9cd7e6ee --- /dev/null +++ b/kernel/drivers/misc/mei/debugfs.c @@ -0,0 +1,218 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2012-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/debugfs.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "client.h" +#include "hw.h" + +static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct mei_device *dev = fp->private_data; + struct mei_me_client *me_cl; + size_t bufsz = 1; + char *buf; + int i = 0; + int pos = 0; + int ret; + +#define HDR \ +" |id|fix| UUID |con|msg len|sb|refc|\n" + + down_read(&dev->me_clients_rwsem); + list_for_each_entry(me_cl, &dev->me_clients, list) + bufsz++; + + bufsz *= sizeof(HDR) + 1; + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) { + up_read(&dev->me_clients_rwsem); + return -ENOMEM; + } + + pos += scnprintf(buf + pos, bufsz - pos, HDR); + + /* if the driver is not enabled the list won't be consistent */ + if (dev->dev_state != MEI_DEV_ENABLED) + goto out; + + list_for_each_entry(me_cl, &dev->me_clients, list) { + + if (mei_me_cl_get(me_cl)) { + pos += scnprintf(buf + pos, bufsz - pos, + "%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|\n", + i++, me_cl->client_id, + me_cl->props.fixed_address, + &me_cl->props.protocol_name, + me_cl->props.max_number_of_connections, + me_cl->props.max_msg_length, + me_cl->props.single_recv_buf, + atomic_read(&me_cl->refcnt.refcount)); + + mei_me_cl_put(me_cl); + } + } + +out: + up_read(&dev->me_clients_rwsem); + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos); + kfree(buf); + return ret; +} + +static const struct file_operations mei_dbgfs_fops_meclients = { + .open = simple_open, + .read = mei_dbgfs_read_meclients, + .llseek = generic_file_llseek, +}; + +static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct mei_device *dev = fp->private_data; + struct mei_cl *cl; + const size_t bufsz = 1024; + char *buf; + int i = 0; + int pos = 0; + int ret; + + if (!dev) + return -ENODEV; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos += scnprintf(buf + pos, bufsz - pos, + " |me|host|state|rd|wr|\n"); + + mutex_lock(&dev->device_lock); + + /* if the driver is not enabled the list won't be consistent */ + if (dev->dev_state != MEI_DEV_ENABLED) + goto out; + + list_for_each_entry(cl, &dev->file_list, link) { + + pos += scnprintf(buf + pos, bufsz - pos, + "%2d|%2d|%4d|%5d|%2d|%2d|\n", + i, cl->me_client_id, cl->host_client_id, cl->state, + !list_empty(&cl->rd_completed), cl->writing_state); + i++; + } +out: + mutex_unlock(&dev->device_lock); + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos); + kfree(buf); + return ret; +} + +static const struct file_operations mei_dbgfs_fops_active = { + .open = simple_open, + .read = mei_dbgfs_read_active, + .llseek = generic_file_llseek, +}; + +static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct mei_device *dev = fp->private_data; + const size_t bufsz = 1024; + char *buf = kzalloc(bufsz, GFP_KERNEL); + int pos = 0; + int ret; + + if (!buf) + return -ENOMEM; + + pos += scnprintf(buf + pos, bufsz - pos, "dev: %s\n", + mei_dev_state_str(dev->dev_state)); + pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n", + mei_hbm_state_str(dev->hbm_state)); + pos += scnprintf(buf + pos, bufsz - pos, "pg: %s, %s\n", + mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED", + mei_pg_state_str(mei_pg_state(dev))); + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos); + kfree(buf); + return ret; +} +static const struct file_operations mei_dbgfs_fops_devstate = { + .open = simple_open, + .read = mei_dbgfs_read_devstate, + .llseek = generic_file_llseek, +}; + +/** + * mei_dbgfs_deregister - Remove the debugfs files and directories + * + * @dev: the mei device structure + */ +void mei_dbgfs_deregister(struct mei_device *dev) +{ + if (!dev->dbgfs_dir) + return; + debugfs_remove_recursive(dev->dbgfs_dir); + dev->dbgfs_dir = NULL; +} + +/** + * mei_dbgfs_register - Add the debugfs files + * + * @dev: the mei device structure + * @name: the mei device name + * + * Return: 0 on success, <0 on failure. + */ +int mei_dbgfs_register(struct mei_device *dev, const char *name) +{ + struct dentry *dir, *f; + + dir = debugfs_create_dir(name, NULL); + if (!dir) + return -ENOMEM; + + f = debugfs_create_file("meclients", S_IRUSR, dir, + dev, &mei_dbgfs_fops_meclients); + if (!f) { + dev_err(dev->dev, "meclients: registration failed\n"); + goto err; + } + f = debugfs_create_file("active", S_IRUSR, dir, + dev, &mei_dbgfs_fops_active); + if (!f) { + dev_err(dev->dev, "meclients: registration failed\n"); + goto err; + } + f = debugfs_create_file("devstate", S_IRUSR, dir, + dev, &mei_dbgfs_fops_devstate); + if (!f) { + dev_err(dev->dev, "devstate: registration failed\n"); + goto err; + } + dev->dbgfs_dir = dir; + return 0; +err: + mei_dbgfs_deregister(dev); + return -ENODEV; +} + diff --git a/kernel/drivers/misc/mei/hbm.c b/kernel/drivers/misc/mei/hbm.c new file mode 100644 index 000000000..58da92565 --- /dev/null +++ b/kernel/drivers/misc/mei/hbm.c @@ -0,0 +1,947 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +static const char *mei_hbm_status_str(enum mei_hbm_status status) +{ +#define MEI_HBM_STATUS(status) case MEI_HBMS_##status: return #status + switch (status) { + MEI_HBM_STATUS(SUCCESS); + MEI_HBM_STATUS(CLIENT_NOT_FOUND); + MEI_HBM_STATUS(ALREADY_EXISTS); + MEI_HBM_STATUS(REJECTED); + MEI_HBM_STATUS(INVALID_PARAMETER); + MEI_HBM_STATUS(NOT_ALLOWED); + MEI_HBM_STATUS(ALREADY_STARTED); + MEI_HBM_STATUS(NOT_STARTED); + default: return "unknown"; + } +#undef MEI_HBM_STATUS +}; + +static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status) +{ +#define MEI_CL_CS(status) case MEI_CL_CONN_##status: return #status + switch (status) { + MEI_CL_CS(SUCCESS); + MEI_CL_CS(NOT_FOUND); + MEI_CL_CS(ALREADY_STARTED); + MEI_CL_CS(OUT_OF_RESOURCES); + MEI_CL_CS(MESSAGE_SMALL); + default: return "unknown"; + } +#undef MEI_CL_CCS +} + +const char *mei_hbm_state_str(enum mei_hbm_state state) +{ +#define MEI_HBM_STATE(state) case MEI_HBM_##state: return #state + switch (state) { + MEI_HBM_STATE(IDLE); + MEI_HBM_STATE(STARTING); + MEI_HBM_STATE(STARTED); + MEI_HBM_STATE(ENUM_CLIENTS); + MEI_HBM_STATE(CLIENT_PROPERTIES); + MEI_HBM_STATE(STOPPED); + default: + return "unknown"; + } +#undef MEI_HBM_STATE +} + +/** + * mei_cl_conn_status_to_errno - convert client connect response + * status to error code + * + * @status: client connect response status + * + * Return: corresponding error code + */ +static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status) +{ + switch (status) { + case MEI_CL_CONN_SUCCESS: return 0; + case MEI_CL_CONN_NOT_FOUND: return -ENOTTY; + case MEI_CL_CONN_ALREADY_STARTED: return -EBUSY; + case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY; + case MEI_CL_CONN_MESSAGE_SMALL: return -EINVAL; + default: return -EINVAL; + } +} + +/** + * mei_hbm_idle - set hbm to idle state + * + * @dev: the device structure + */ +void mei_hbm_idle(struct mei_device *dev) +{ + dev->init_clients_timer = 0; + dev->hbm_state = MEI_HBM_IDLE; +} + +/** + * mei_hbm_reset - reset hbm counters and book keeping data structurs + * + * @dev: the device structure + */ +void mei_hbm_reset(struct mei_device *dev) +{ + dev->me_client_index = 0; + + mei_me_cl_rm_all(dev); + + mei_hbm_idle(dev); +} + +/** + * mei_hbm_hdr - construct hbm header + * + * @hdr: hbm header + * @length: payload length + */ + +static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length) +{ + hdr->host_addr = 0; + hdr->me_addr = 0; + hdr->length = length; + hdr->msg_complete = 1; + hdr->reserved = 0; +} + +/** + * mei_hbm_cl_hdr - construct client hbm header + * + * @cl: client + * @hbm_cmd: host bus message command + * @buf: buffer for cl header + * @len: buffer length + */ +static inline +void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len) +{ + struct mei_hbm_cl_cmd *cmd = buf; + + memset(cmd, 0, len); + + cmd->hbm_cmd = hbm_cmd; + cmd->host_addr = cl->host_client_id; + cmd->me_addr = cl->me_client_id; +} + +/** + * mei_hbm_cl_write - write simple hbm client message + * + * @dev: the device structure + * @cl: client + * @hbm_cmd: host bus message command + * @len: buffer length + * + * Return: 0 on success, <0 on failure. + */ +static inline +int mei_hbm_cl_write(struct mei_device *dev, + struct mei_cl *cl, u8 hbm_cmd, size_t len) +{ + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + + mei_hbm_hdr(mei_hdr, len); + mei_hbm_cl_hdr(cl, hbm_cmd, dev->wr_msg.data, len); + + return mei_write_message(dev, mei_hdr, dev->wr_msg.data); +} + +/** + * mei_hbm_cl_addr_equal - check if the client's and + * the message address match + * + * @cl: client + * @cmd: hbm client message + * + * Return: true if addresses are the same + */ +static inline +bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd) +{ + return cl->host_client_id == cmd->host_addr && + cl->me_client_id == cmd->me_addr; +} + +/** + * mei_hbm_cl_find_by_cmd - find recipient client + * + * @dev: the device structure + * @buf: a buffer with hbm cl command + * + * Return: the recipient client or NULL if not found + */ +static inline +struct mei_cl *mei_hbm_cl_find_by_cmd(struct mei_device *dev, void *buf) +{ + struct mei_hbm_cl_cmd *cmd = (struct mei_hbm_cl_cmd *)buf; + struct mei_cl *cl; + + list_for_each_entry(cl, &dev->file_list, link) + if (mei_hbm_cl_addr_equal(cl, cmd)) + return cl; + return NULL; +} + + +/** + * mei_hbm_start_wait - wait for start response message. + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ +int mei_hbm_start_wait(struct mei_device *dev) +{ + int ret; + + if (dev->hbm_state > MEI_HBM_STARTING) + return 0; + + mutex_unlock(&dev->device_lock); + ret = wait_event_timeout(dev->wait_hbm_start, + dev->hbm_state != MEI_HBM_STARTING, + mei_secs_to_jiffies(MEI_HBM_TIMEOUT)); + mutex_lock(&dev->device_lock); + + if (ret == 0 && (dev->hbm_state <= MEI_HBM_STARTING)) { + dev->hbm_state = MEI_HBM_IDLE; + dev_err(dev->dev, "waiting for mei start failed\n"); + return -ETIME; + } + return 0; +} + +/** + * mei_hbm_start_req - sends start request message. + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ +int mei_hbm_start_req(struct mei_device *dev) +{ + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_host_version_request *start_req; + const size_t len = sizeof(struct hbm_host_version_request); + int ret; + + mei_hbm_reset(dev); + + mei_hbm_hdr(mei_hdr, len); + + /* host start message */ + start_req = (struct hbm_host_version_request *)dev->wr_msg.data; + memset(start_req, 0, len); + start_req->hbm_cmd = HOST_START_REQ_CMD; + start_req->host_version.major_version = HBM_MAJOR_VERSION; + start_req->host_version.minor_version = HBM_MINOR_VERSION; + + dev->hbm_state = MEI_HBM_IDLE; + ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); + if (ret) { + dev_err(dev->dev, "version message write failed: ret = %d\n", + ret); + return ret; + } + + dev->hbm_state = MEI_HBM_STARTING; + dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT; + return 0; +} + +/* + * mei_hbm_enum_clients_req - sends enumeration client request message. + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_enum_clients_req(struct mei_device *dev) +{ + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_host_enum_request *enum_req; + const size_t len = sizeof(struct hbm_host_enum_request); + int ret; + + /* enumerate clients */ + mei_hbm_hdr(mei_hdr, len); + + enum_req = (struct hbm_host_enum_request *)dev->wr_msg.data; + memset(enum_req, 0, len); + enum_req->hbm_cmd = HOST_ENUM_REQ_CMD; + + ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); + if (ret) { + dev_err(dev->dev, "enumeration request write failed: ret = %d.\n", + ret); + return ret; + } + dev->hbm_state = MEI_HBM_ENUM_CLIENTS; + dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT; + return 0; +} + +/* + * mei_hbm_me_cl_add - add new me client to the list + * + * @dev: the device structure + * @res: hbm property response + * + * Return: 0 on success and -ENOMEM on allocation failure + */ + +static int mei_hbm_me_cl_add(struct mei_device *dev, + struct hbm_props_response *res) +{ + struct mei_me_client *me_cl; + const uuid_le *uuid = &res->client_properties.protocol_name; + + mei_me_cl_rm_by_uuid(dev, uuid); + + me_cl = kzalloc(sizeof(struct mei_me_client), GFP_KERNEL); + if (!me_cl) + return -ENOMEM; + + mei_me_cl_init(me_cl); + + me_cl->props = res->client_properties; + me_cl->client_id = res->me_addr; + me_cl->mei_flow_ctrl_creds = 0; + + mei_me_cl_add(dev, me_cl); + + return 0; +} + +/** + * mei_hbm_prop_req - request property for a single client + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ + +static int mei_hbm_prop_req(struct mei_device *dev) +{ + + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_props_request *prop_req; + const size_t len = sizeof(struct hbm_props_request); + unsigned long next_client_index; + int ret; + + next_client_index = find_next_bit(dev->me_clients_map, MEI_CLIENTS_MAX, + dev->me_client_index); + + /* We got all client properties */ + if (next_client_index == MEI_CLIENTS_MAX) { + dev->hbm_state = MEI_HBM_STARTED; + schedule_work(&dev->init_work); + + return 0; + } + + mei_hbm_hdr(mei_hdr, len); + prop_req = (struct hbm_props_request *)dev->wr_msg.data; + + memset(prop_req, 0, sizeof(struct hbm_props_request)); + + prop_req->hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD; + prop_req->me_addr = next_client_index; + + ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); + if (ret) { + dev_err(dev->dev, "properties request write failed: ret = %d\n", + ret); + return ret; + } + + dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT; + dev->me_client_index = next_client_index; + + return 0; +} + +/* + * mei_hbm_pg - sends pg command + * + * @dev: the device structure + * @pg_cmd: the pg command code + * + * Return: -EIO on write failure + * -EOPNOTSUPP if the operation is not supported by the protocol + */ +int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd) +{ + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_power_gate *req; + const size_t len = sizeof(struct hbm_power_gate); + int ret; + + if (!dev->hbm_f_pg_supported) + return -EOPNOTSUPP; + + mei_hbm_hdr(mei_hdr, len); + + req = (struct hbm_power_gate *)dev->wr_msg.data; + memset(req, 0, len); + req->hbm_cmd = pg_cmd; + + ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); + if (ret) + dev_err(dev->dev, "power gate command write failed.\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mei_hbm_pg); + +/** + * mei_hbm_stop_req - send stop request message + * + * @dev: mei device + * + * Return: -EIO on write failure + */ +static int mei_hbm_stop_req(struct mei_device *dev) +{ + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_host_stop_request *req = + (struct hbm_host_stop_request *)dev->wr_msg.data; + const size_t len = sizeof(struct hbm_host_stop_request); + + mei_hbm_hdr(mei_hdr, len); + + memset(req, 0, len); + req->hbm_cmd = HOST_STOP_REQ_CMD; + req->reason = DRIVER_STOP_REQUEST; + + return mei_write_message(dev, mei_hdr, dev->wr_msg.data); +} + +/** + * mei_hbm_cl_flow_control_req - sends flow control request. + * + * @dev: the device structure + * @cl: client info + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl) +{ + const size_t len = sizeof(struct hbm_flow_control); + + cl_dbg(dev, cl, "sending flow control\n"); + return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD, len); +} + +/** + * mei_hbm_add_single_flow_creds - adds single buffer credentials. + * + * @dev: the device structure + * @flow: flow control. + * + * Return: 0 on success, < 0 otherwise + */ +static int mei_hbm_add_single_flow_creds(struct mei_device *dev, + struct hbm_flow_control *flow) +{ + struct mei_me_client *me_cl; + int rets; + + me_cl = mei_me_cl_by_id(dev, flow->me_addr); + if (!me_cl) { + dev_err(dev->dev, "no such me client %d\n", + flow->me_addr); + return -ENOENT; + } + + if (WARN_ON(me_cl->props.single_recv_buf == 0)) { + rets = -EINVAL; + goto out; + } + + me_cl->mei_flow_ctrl_creds++; + dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n", + flow->me_addr, me_cl->mei_flow_ctrl_creds); + + rets = 0; +out: + mei_me_cl_put(me_cl); + return rets; +} + +/** + * mei_hbm_cl_flow_control_res - flow control response from me + * + * @dev: the device structure + * @flow_control: flow control response bus message + */ +static void mei_hbm_cl_flow_control_res(struct mei_device *dev, + struct hbm_flow_control *flow_control) +{ + struct mei_cl *cl; + + if (!flow_control->host_addr) { + /* single receive buffer */ + mei_hbm_add_single_flow_creds(dev, flow_control); + return; + } + + cl = mei_hbm_cl_find_by_cmd(dev, flow_control); + if (cl) { + cl->mei_flow_ctrl_creds++; + cl_dbg(dev, cl, "flow control creds = %d.\n", + cl->mei_flow_ctrl_creds); + } +} + + +/** + * mei_hbm_cl_disconnect_req - sends disconnect message to fw. + * + * @dev: the device structure + * @cl: a client to disconnect from + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl) +{ + const size_t len = sizeof(struct hbm_client_connect_request); + + return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD, len); +} + +/** + * mei_hbm_cl_disconnect_rsp - sends disconnect respose to the FW + * + * @dev: the device structure + * @cl: a client to disconnect from + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl) +{ + const size_t len = sizeof(struct hbm_client_connect_response); + + return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD, len); +} + +/** + * mei_hbm_cl_disconnect_res - update the client state according + * disconnect response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: disconnect client response host bus message + */ +static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_client_connect_response *rs = + (struct hbm_client_connect_response *)cmd; + + cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status); + + if (rs->status == MEI_CL_DISCONN_SUCCESS) + cl->state = MEI_FILE_DISCONNECTED; + cl->status = 0; +} + +/** + * mei_hbm_cl_connect_req - send connection request to specific me client + * + * @dev: the device structure + * @cl: a client to connect to + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl) +{ + const size_t len = sizeof(struct hbm_client_connect_request); + + return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD, len); +} + +/** + * mei_hbm_cl_connect_res - update the client state according + * connection response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: connect client response host bus message + */ +static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_client_connect_response *rs = + (struct hbm_client_connect_response *)cmd; + + cl_dbg(dev, cl, "hbm: connect response status=%s\n", + mei_cl_conn_status_str(rs->status)); + + if (rs->status == MEI_CL_CONN_SUCCESS) + cl->state = MEI_FILE_CONNECTED; + else + cl->state = MEI_FILE_DISCONNECTED; + cl->status = mei_cl_conn_status_to_errno(rs->status); +} + +/** + * mei_hbm_cl_res - process hbm response received on behalf + * an client + * + * @dev: the device structure + * @rs: hbm client message + * @fop_type: file operation type + */ +static void mei_hbm_cl_res(struct mei_device *dev, + struct mei_hbm_cl_cmd *rs, + enum mei_cb_file_ops fop_type) +{ + struct mei_cl *cl; + struct mei_cl_cb *cb, *next; + + cl = NULL; + list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list.list, list) { + + cl = cb->cl; + + if (cb->fop_type != fop_type) + continue; + + if (mei_hbm_cl_addr_equal(cl, rs)) { + list_del_init(&cb->list); + break; + } + } + + if (!cl) + return; + + switch (fop_type) { + case MEI_FOP_CONNECT: + mei_hbm_cl_connect_res(dev, cl, rs); + break; + case MEI_FOP_DISCONNECT: + mei_hbm_cl_disconnect_res(dev, cl, rs); + break; + default: + return; + } + + cl->timer_count = 0; + wake_up(&cl->wait); +} + + +/** + * mei_hbm_fw_disconnect_req - disconnect request initiated by ME firmware + * host sends disconnect response + * + * @dev: the device structure. + * @disconnect_req: disconnect request bus message from the me + * + * Return: -ENOMEM on allocation failure + */ +static int mei_hbm_fw_disconnect_req(struct mei_device *dev, + struct hbm_client_connect_request *disconnect_req) +{ + struct mei_cl *cl; + struct mei_cl_cb *cb; + + cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req); + if (cl) { + cl_dbg(dev, cl, "disconnect request received\n"); + cl->state = MEI_FILE_DISCONNECTED; + cl->timer_count = 0; + + cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL); + if (!cb) + return -ENOMEM; + cl_dbg(dev, cl, "add disconnect response as first\n"); + list_add(&cb->list, &dev->ctrl_wr_list.list); + } + return 0; +} + +/** + * mei_hbm_config_features - check what hbm features and commands + * are supported by the fw + * + * @dev: the device structure + */ +static void mei_hbm_config_features(struct mei_device *dev) +{ + /* Power Gating Isolation Support */ + dev->hbm_f_pg_supported = 0; + if (dev->version.major_version > HBM_MAJOR_VERSION_PGI) + dev->hbm_f_pg_supported = 1; + + if (dev->version.major_version == HBM_MAJOR_VERSION_PGI && + dev->version.minor_version >= HBM_MINOR_VERSION_PGI) + dev->hbm_f_pg_supported = 1; +} + +/** + * mei_hbm_version_is_supported - checks whether the driver can + * support the hbm version of the device + * + * @dev: the device structure + * Return: true if driver can support hbm version of the device + */ +bool mei_hbm_version_is_supported(struct mei_device *dev) +{ + return (dev->version.major_version < HBM_MAJOR_VERSION) || + (dev->version.major_version == HBM_MAJOR_VERSION && + dev->version.minor_version <= HBM_MINOR_VERSION); +} + +/** + * mei_hbm_dispatch - bottom half read routine after ISR to + * handle the read bus message cmd processing. + * + * @dev: the device structure + * @hdr: header of bus message + * + * Return: 0 on success and < 0 on failure + */ +int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) +{ + struct mei_bus_message *mei_msg; + struct hbm_host_version_response *version_res; + struct hbm_props_response *props_res; + struct hbm_host_enum_response *enum_res; + + struct mei_hbm_cl_cmd *cl_cmd; + struct hbm_client_connect_request *disconnect_req; + struct hbm_flow_control *flow_control; + + /* read the message to our buffer */ + BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf)); + mei_read_slots(dev, dev->rd_msg_buf, hdr->length); + mei_msg = (struct mei_bus_message *)dev->rd_msg_buf; + cl_cmd = (struct mei_hbm_cl_cmd *)mei_msg; + + /* ignore spurious message and prevent reset nesting + * hbm is put to idle during system reset + */ + if (dev->hbm_state == MEI_HBM_IDLE) { + dev_dbg(dev->dev, "hbm: state is idle ignore spurious messages\n"); + return 0; + } + + switch (mei_msg->hbm_cmd) { + case HOST_START_RES_CMD: + dev_dbg(dev->dev, "hbm: start: response message received.\n"); + + dev->init_clients_timer = 0; + + version_res = (struct hbm_host_version_response *)mei_msg; + + dev_dbg(dev->dev, "HBM VERSION: DRIVER=%02d:%02d DEVICE=%02d:%02d\n", + HBM_MAJOR_VERSION, HBM_MINOR_VERSION, + version_res->me_max_version.major_version, + version_res->me_max_version.minor_version); + + if (version_res->host_version_supported) { + dev->version.major_version = HBM_MAJOR_VERSION; + dev->version.minor_version = HBM_MINOR_VERSION; + } else { + dev->version.major_version = + version_res->me_max_version.major_version; + dev->version.minor_version = + version_res->me_max_version.minor_version; + } + + if (!mei_hbm_version_is_supported(dev)) { + dev_warn(dev->dev, "hbm: start: version mismatch - stopping the driver.\n"); + + dev->hbm_state = MEI_HBM_STOPPED; + if (mei_hbm_stop_req(dev)) { + dev_err(dev->dev, "hbm: start: failed to send stop request\n"); + return -EIO; + } + break; + } + + mei_hbm_config_features(dev); + + if (dev->dev_state != MEI_DEV_INIT_CLIENTS || + dev->hbm_state != MEI_HBM_STARTING) { + dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + if (mei_hbm_enum_clients_req(dev)) { + dev_err(dev->dev, "hbm: start: failed to send enumeration request\n"); + return -EIO; + } + + wake_up(&dev->wait_hbm_start); + break; + + case CLIENT_CONNECT_RES_CMD: + dev_dbg(dev->dev, "hbm: client connect response: message received.\n"); + mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_CONNECT); + break; + + case CLIENT_DISCONNECT_RES_CMD: + dev_dbg(dev->dev, "hbm: client disconnect response: message received.\n"); + mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_DISCONNECT); + break; + + case MEI_FLOW_CONTROL_CMD: + dev_dbg(dev->dev, "hbm: client flow control response: message received.\n"); + + flow_control = (struct hbm_flow_control *) mei_msg; + mei_hbm_cl_flow_control_res(dev, flow_control); + break; + + case MEI_PG_ISOLATION_ENTRY_RES_CMD: + dev_dbg(dev->dev, "power gate isolation entry response received\n"); + dev->pg_event = MEI_PG_EVENT_RECEIVED; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); + break; + + case MEI_PG_ISOLATION_EXIT_REQ_CMD: + dev_dbg(dev->dev, "power gate isolation exit request received\n"); + dev->pg_event = MEI_PG_EVENT_RECEIVED; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); + else + /* + * If the driver is not waiting on this then + * this is HW initiated exit from PG. + * Start runtime pm resume sequence to exit from PG. + */ + pm_request_resume(dev->dev); + break; + + case HOST_CLIENT_PROPERTIES_RES_CMD: + dev_dbg(dev->dev, "hbm: properties response: message received.\n"); + + dev->init_clients_timer = 0; + + if (dev->dev_state != MEI_DEV_INIT_CLIENTS || + dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) { + dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + props_res = (struct hbm_props_response *)mei_msg; + + if (props_res->status) { + dev_err(dev->dev, "hbm: properties response: wrong status = %d %s\n", + props_res->status, + mei_hbm_status_str(props_res->status)); + return -EPROTO; + } + + mei_hbm_me_cl_add(dev, props_res); + + dev->me_client_index++; + + /* request property for the next client */ + if (mei_hbm_prop_req(dev)) + return -EIO; + + break; + + case HOST_ENUM_RES_CMD: + dev_dbg(dev->dev, "hbm: enumeration response: message received\n"); + + dev->init_clients_timer = 0; + + enum_res = (struct hbm_host_enum_response *) mei_msg; + BUILD_BUG_ON(sizeof(dev->me_clients_map) + < sizeof(enum_res->valid_addresses)); + memcpy(dev->me_clients_map, enum_res->valid_addresses, + sizeof(enum_res->valid_addresses)); + + if (dev->dev_state != MEI_DEV_INIT_CLIENTS || + dev->hbm_state != MEI_HBM_ENUM_CLIENTS) { + dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES; + + /* first property request */ + if (mei_hbm_prop_req(dev)) + return -EIO; + + break; + + case HOST_STOP_RES_CMD: + dev_dbg(dev->dev, "hbm: stop response: message received\n"); + + dev->init_clients_timer = 0; + + if (dev->hbm_state != MEI_HBM_STOPPED) { + dev_err(dev->dev, "hbm: stop response: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + dev->dev_state = MEI_DEV_POWER_DOWN; + dev_info(dev->dev, "hbm: stop response: resetting.\n"); + /* force the reset */ + return -EPROTO; + break; + + case CLIENT_DISCONNECT_REQ_CMD: + dev_dbg(dev->dev, "hbm: disconnect request: message received\n"); + + disconnect_req = (struct hbm_client_connect_request *)mei_msg; + mei_hbm_fw_disconnect_req(dev, disconnect_req); + break; + + case ME_STOP_REQ_CMD: + dev_dbg(dev->dev, "hbm: stop request: message received\n"); + dev->hbm_state = MEI_HBM_STOPPED; + if (mei_hbm_stop_req(dev)) { + dev_err(dev->dev, "hbm: stop request: failed to send stop request\n"); + return -EIO; + } + break; + default: + BUG(); + break; + + } + return 0; +} + diff --git a/kernel/drivers/misc/mei/hbm.h b/kernel/drivers/misc/mei/hbm.h new file mode 100644 index 000000000..2544db7d1 --- /dev/null +++ b/kernel/drivers/misc/mei/hbm.h @@ -0,0 +1,59 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_HBM_H_ +#define _MEI_HBM_H_ + +struct mei_device; +struct mei_msg_hdr; +struct mei_cl; + +/** + * enum mei_hbm_state - host bus message protocol state + * + * @MEI_HBM_IDLE : protocol not started + * @MEI_HBM_STARTING : start request message was sent + * @MEI_HBM_ENUM_CLIENTS : enumeration request was sent + * @MEI_HBM_CLIENT_PROPERTIES : acquiring clients properties + * @MEI_HBM_STARTED : enumeration was completed + * @MEI_HBM_STOPPED : stopping exchange + */ +enum mei_hbm_state { + MEI_HBM_IDLE = 0, + MEI_HBM_STARTING, + MEI_HBM_ENUM_CLIENTS, + MEI_HBM_CLIENT_PROPERTIES, + MEI_HBM_STARTED, + MEI_HBM_STOPPED, +}; + +const char *mei_hbm_state_str(enum mei_hbm_state state); + +int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr); + +void mei_hbm_idle(struct mei_device *dev); +void mei_hbm_reset(struct mei_device *dev); +int mei_hbm_start_req(struct mei_device *dev); +int mei_hbm_start_wait(struct mei_device *dev); +int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl); +int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl); +int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl); +int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl); +bool mei_hbm_version_is_supported(struct mei_device *dev); +int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd); + +#endif /* _MEI_HBM_H_ */ + diff --git a/kernel/drivers/misc/mei/hw-me-regs.h b/kernel/drivers/misc/mei/hw-me-regs.h new file mode 100644 index 000000000..9eb7ed70a --- /dev/null +++ b/kernel/drivers/misc/mei/hw-me-regs.h @@ -0,0 +1,190 @@ +/****************************************************************************** + * Intel Management Engine Interface (Intel MEI) Linux driver + * Intel MEI Interface Header + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, + * USA + * + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation. + * linux-mei@linux.intel.com + * http://www.intel.com + * + * BSD LICENSE + * + * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#ifndef _MEI_HW_MEI_REGS_H_ +#define _MEI_HW_MEI_REGS_H_ + +/* + * MEI device IDs + */ +#define MEI_DEV_ID_82946GZ 0x2974 /* 82946GZ/GL */ +#define MEI_DEV_ID_82G35 0x2984 /* 82G35 Express */ +#define MEI_DEV_ID_82Q965 0x2994 /* 82Q963/Q965 */ +#define MEI_DEV_ID_82G965 0x29A4 /* 82P965/G965 */ + +#define MEI_DEV_ID_82GM965 0x2A04 /* Mobile PM965/GM965 */ +#define MEI_DEV_ID_82GME965 0x2A14 /* Mobile GME965/GLE960 */ + +#define MEI_DEV_ID_ICH9_82Q35 0x29B4 /* 82Q35 Express */ +#define MEI_DEV_ID_ICH9_82G33 0x29C4 /* 82G33/G31/P35/P31 Express */ +#define MEI_DEV_ID_ICH9_82Q33 0x29D4 /* 82Q33 Express */ +#define MEI_DEV_ID_ICH9_82X38 0x29E4 /* 82X38/X48 Express */ +#define MEI_DEV_ID_ICH9_3200 0x29F4 /* 3200/3210 Server */ + +#define MEI_DEV_ID_ICH9_6 0x28B4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_7 0x28C4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_8 0x28D4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_9 0x28E4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_10 0x28F4 /* Bearlake */ + +#define MEI_DEV_ID_ICH9M_1 0x2A44 /* Cantiga */ +#define MEI_DEV_ID_ICH9M_2 0x2A54 /* Cantiga */ +#define MEI_DEV_ID_ICH9M_3 0x2A64 /* Cantiga */ +#define MEI_DEV_ID_ICH9M_4 0x2A74 /* Cantiga */ + +#define MEI_DEV_ID_ICH10_1 0x2E04 /* Eaglelake */ +#define MEI_DEV_ID_ICH10_2 0x2E14 /* Eaglelake */ +#define MEI_DEV_ID_ICH10_3 0x2E24 /* Eaglelake */ +#define MEI_DEV_ID_ICH10_4 0x2E34 /* Eaglelake */ + +#define MEI_DEV_ID_IBXPK_1 0x3B64 /* Calpella */ +#define MEI_DEV_ID_IBXPK_2 0x3B65 /* Calpella */ + +#define MEI_DEV_ID_CPT_1 0x1C3A /* Couger Point */ +#define MEI_DEV_ID_PBG_1 0x1D3A /* C600/X79 Patsburg */ + +#define MEI_DEV_ID_PPT_1 0x1E3A /* Panther Point */ +#define MEI_DEV_ID_PPT_2 0x1CBA /* Panther Point */ +#define MEI_DEV_ID_PPT_3 0x1DBA /* Panther Point */ + +#define MEI_DEV_ID_LPT_H 0x8C3A /* Lynx Point H */ +#define MEI_DEV_ID_LPT_W 0x8D3A /* Lynx Point - Wellsburg */ +#define MEI_DEV_ID_LPT_LP 0x9C3A /* Lynx Point LP */ +#define MEI_DEV_ID_LPT_HR 0x8CBA /* Lynx Point H Refresh */ + +#define MEI_DEV_ID_WPT_LP 0x9CBA /* Wildcat Point LP */ +#define MEI_DEV_ID_WPT_LP_2 0x9CBB /* Wildcat Point LP 2 */ + +/* + * MEI HW Section + */ + +/* Host Firmware Status Registers in PCI Config Space */ +#define PCI_CFG_HFS_1 0x40 +#define PCI_CFG_HFS_2 0x48 +#define PCI_CFG_HFS_3 0x60 +#define PCI_CFG_HFS_4 0x64 +#define PCI_CFG_HFS_5 0x68 +#define PCI_CFG_HFS_6 0x6C + +/* MEI registers */ +/* H_CB_WW - Host Circular Buffer (CB) Write Window register */ +#define H_CB_WW 0 +/* H_CSR - Host Control Status register */ +#define H_CSR 4 +/* ME_CB_RW - ME Circular Buffer Read Window register (read only) */ +#define ME_CB_RW 8 +/* ME_CSR_HA - ME Control Status Host Access register (read only) */ +#define ME_CSR_HA 0xC +/* H_HGC_CSR - PGI register */ +#define H_HPG_CSR 0x10 + + +/* register bits of H_CSR (Host Control Status register) */ +/* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */ +#define H_CBD 0xFF000000 +/* Host Circular Buffer Write Pointer */ +#define H_CBWP 0x00FF0000 +/* Host Circular Buffer Read Pointer */ +#define H_CBRP 0x0000FF00 +/* Host Reset */ +#define H_RST 0x00000010 +/* Host Ready */ +#define H_RDY 0x00000008 +/* Host Interrupt Generate */ +#define H_IG 0x00000004 +/* Host Interrupt Status */ +#define H_IS 0x00000002 +/* Host Interrupt Enable */ +#define H_IE 0x00000001 + + +/* register bits of ME_CSR_HA (ME Control Status Host Access register) */ +/* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only +access to ME_CBD */ +#define ME_CBD_HRA 0xFF000000 +/* ME CB Write Pointer HRA - host read only access to ME_CBWP */ +#define ME_CBWP_HRA 0x00FF0000 +/* ME CB Read Pointer HRA - host read only access to ME_CBRP */ +#define ME_CBRP_HRA 0x0000FF00 +/* ME Power Gate Isolation Capability HRA - host ready only access */ +#define ME_PGIC_HRA 0x00000040 +/* ME Reset HRA - host read only access to ME_RST */ +#define ME_RST_HRA 0x00000010 +/* ME Ready HRA - host read only access to ME_RDY */ +#define ME_RDY_HRA 0x00000008 +/* ME Interrupt Generate HRA - host read only access to ME_IG */ +#define ME_IG_HRA 0x00000004 +/* ME Interrupt Status HRA - host read only access to ME_IS */ +#define ME_IS_HRA 0x00000002 +/* ME Interrupt Enable HRA - host read only access to ME_IE */ +#define ME_IE_HRA 0x00000001 + + +/* register bits - H_HPG_CSR */ +#define H_HPG_CSR_PGIHEXR 0x00000001 +#define H_HPG_CSR_PGI 0x00000002 + +#endif /* _MEI_HW_MEI_REGS_H_ */ diff --git a/kernel/drivers/misc/mei/hw-me.c b/kernel/drivers/misc/mei/hw-me.c new file mode 100644 index 000000000..43d7101ff --- /dev/null +++ b/kernel/drivers/misc/mei/hw-me.c @@ -0,0 +1,998 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/pci.h> + +#include <linux/kthread.h> +#include <linux/interrupt.h> + +#include "mei_dev.h" +#include "hbm.h" + +#include "hw-me.h" +#include "hw-me-regs.h" + +#include "mei-trace.h" + +/** + * mei_me_reg_read - Reads 32bit data from the mei device + * + * @hw: the me hardware structure + * @offset: offset from which to read the data + * + * Return: register value (u32) + */ +static inline u32 mei_me_reg_read(const struct mei_me_hw *hw, + unsigned long offset) +{ + return ioread32(hw->mem_addr + offset); +} + + +/** + * mei_me_reg_write - Writes 32bit data to the mei device + * + * @hw: the me hardware structure + * @offset: offset from which to write the data + * @value: register value to write (u32) + */ +static inline void mei_me_reg_write(const struct mei_me_hw *hw, + unsigned long offset, u32 value) +{ + iowrite32(value, hw->mem_addr + offset); +} + +/** + * mei_me_mecbrw_read - Reads 32bit data from ME circular buffer + * read window register + * + * @dev: the device structure + * + * Return: ME_CB_RW register value (u32) + */ +static inline u32 mei_me_mecbrw_read(const struct mei_device *dev) +{ + return mei_me_reg_read(to_me_hw(dev), ME_CB_RW); +} + +/** + * mei_me_hcbww_write - write 32bit data to the host circular buffer + * + * @dev: the device structure + * @data: 32bit data to be written to the host circular buffer + */ +static inline void mei_me_hcbww_write(struct mei_device *dev, u32 data) +{ + mei_me_reg_write(to_me_hw(dev), H_CB_WW, data); +} + +/** + * mei_me_mecsr_read - Reads 32bit data from the ME CSR + * + * @dev: the device structure + * + * Return: ME_CSR_HA register value (u32) + */ +static inline u32 mei_me_mecsr_read(const struct mei_device *dev) +{ + u32 reg; + + reg = mei_me_reg_read(to_me_hw(dev), ME_CSR_HA); + trace_mei_reg_read(dev->dev, "ME_CSR_HA", ME_CSR_HA, reg); + + return reg; +} + +/** + * mei_hcsr_read - Reads 32bit data from the host CSR + * + * @dev: the device structure + * + * Return: H_CSR register value (u32) + */ +static inline u32 mei_hcsr_read(const struct mei_device *dev) +{ + u32 reg; + + reg = mei_me_reg_read(to_me_hw(dev), H_CSR); + trace_mei_reg_read(dev->dev, "H_CSR", H_CSR, reg); + + return reg; +} + +/** + * mei_hcsr_write - writes H_CSR register to the mei device + * + * @dev: the device structure + * @reg: new register value + */ +static inline void mei_hcsr_write(struct mei_device *dev, u32 reg) +{ + trace_mei_reg_write(dev->dev, "H_CSR", H_CSR, reg); + mei_me_reg_write(to_me_hw(dev), H_CSR, reg); +} + +/** + * mei_hcsr_set - writes H_CSR register to the mei device, + * and ignores the H_IS bit for it is write-one-to-zero. + * + * @dev: the device structure + * @reg: new register value + */ +static inline void mei_hcsr_set(struct mei_device *dev, u32 reg) +{ + reg &= ~H_IS; + mei_hcsr_write(dev, reg); +} + +/** + * mei_me_fw_status - read fw status register from pci config space + * + * @dev: mei device + * @fw_status: fw status register values + * + * Return: 0 on success, error otherwise + */ +static int mei_me_fw_status(struct mei_device *dev, + struct mei_fw_status *fw_status) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct mei_me_hw *hw = to_me_hw(dev); + const struct mei_fw_status *fw_src = &hw->cfg->fw_status; + int ret; + int i; + + if (!fw_status) + return -EINVAL; + + fw_status->count = fw_src->count; + for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { + ret = pci_read_config_dword(pdev, + fw_src->status[i], &fw_status->status[i]); + if (ret) + return ret; + } + + return 0; +} + +/** + * mei_me_hw_config - configure hw dependent settings + * + * @dev: mei device + */ +static void mei_me_hw_config(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 hcsr = mei_hcsr_read(dev); + /* Doesn't change in runtime */ + dev->hbuf_depth = (hcsr & H_CBD) >> 24; + + hw->pg_state = MEI_PG_OFF; +} + +/** + * mei_me_pg_state - translate internal pg state + * to the mei power gating state + * + * @dev: mei device + * + * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise + */ +static inline enum mei_pg_state mei_me_pg_state(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + return hw->pg_state; +} + +/** + * mei_me_intr_clear - clear and stop interrupts + * + * @dev: the device structure + */ +static void mei_me_intr_clear(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + if ((hcsr & H_IS) == H_IS) + mei_hcsr_write(dev, hcsr); +} +/** + * mei_me_intr_enable - enables mei device interrupts + * + * @dev: the device structure + */ +static void mei_me_intr_enable(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr |= H_IE; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_intr_disable - disables mei device interrupts + * + * @dev: the device structure + */ +static void mei_me_intr_disable(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr &= ~H_IE; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_hw_reset_release - release device from the reset + * + * @dev: the device structure + */ +static void mei_me_hw_reset_release(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr |= H_IG; + hcsr &= ~H_RST; + mei_hcsr_set(dev, hcsr); + + /* complete this write before we set host ready on another CPU */ + mmiowb(); +} +/** + * mei_me_hw_reset - resets fw via mei csr register. + * + * @dev: the device structure + * @intr_enable: if interrupt should be enabled after reset. + * + * Return: always 0 + */ +static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) +{ + u32 hcsr = mei_hcsr_read(dev); + + /* H_RST may be found lit before reset is started, + * for example if preceding reset flow hasn't completed. + * In that case asserting H_RST will be ignored, therefore + * we need to clean H_RST bit to start a successful reset sequence. + */ + if ((hcsr & H_RST) == H_RST) { + dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr); + hcsr &= ~H_RST; + mei_hcsr_set(dev, hcsr); + hcsr = mei_hcsr_read(dev); + } + + hcsr |= H_RST | H_IG | H_IS; + + if (intr_enable) + hcsr |= H_IE; + else + hcsr &= ~H_IE; + + dev->recvd_hw_ready = false; + mei_hcsr_write(dev, hcsr); + + /* + * Host reads the H_CSR once to ensure that the + * posted write to H_CSR completes. + */ + hcsr = mei_hcsr_read(dev); + + if ((hcsr & H_RST) == 0) + dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr); + + if ((hcsr & H_RDY) == H_RDY) + dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr); + + if (intr_enable == false) + mei_me_hw_reset_release(dev); + + return 0; +} + +/** + * mei_me_host_set_ready - enable device + * + * @dev: mei device + */ +static void mei_me_host_set_ready(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr |= H_IE | H_IG | H_RDY; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_host_is_ready - check whether the host has turned ready + * + * @dev: mei device + * Return: bool + */ +static bool mei_me_host_is_ready(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + return (hcsr & H_RDY) == H_RDY; +} + +/** + * mei_me_hw_is_ready - check whether the me(hw) has turned ready + * + * @dev: mei device + * Return: bool + */ +static bool mei_me_hw_is_ready(struct mei_device *dev) +{ + u32 mecsr = mei_me_mecsr_read(dev); + + return (mecsr & ME_RDY_HRA) == ME_RDY_HRA; +} + +/** + * mei_me_hw_ready_wait - wait until the me(hw) has turned ready + * or timeout is reached + * + * @dev: mei device + * Return: 0 on success, error otherwise + */ +static int mei_me_hw_ready_wait(struct mei_device *dev) +{ + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_hw_ready, + dev->recvd_hw_ready, + mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT)); + mutex_lock(&dev->device_lock); + if (!dev->recvd_hw_ready) { + dev_err(dev->dev, "wait hw ready failed\n"); + return -ETIME; + } + + mei_me_hw_reset_release(dev); + dev->recvd_hw_ready = false; + return 0; +} + +/** + * mei_me_hw_start - hw start routine + * + * @dev: mei device + * Return: 0 on success, error otherwise + */ +static int mei_me_hw_start(struct mei_device *dev) +{ + int ret = mei_me_hw_ready_wait(dev); + + if (ret) + return ret; + dev_dbg(dev->dev, "hw is ready\n"); + + mei_me_host_set_ready(dev); + return ret; +} + + +/** + * mei_hbuf_filled_slots - gets number of device filled buffer slots + * + * @dev: the device structure + * + * Return: number of filled slots + */ +static unsigned char mei_hbuf_filled_slots(struct mei_device *dev) +{ + u32 hcsr; + char read_ptr, write_ptr; + + hcsr = mei_hcsr_read(dev); + + read_ptr = (char) ((hcsr & H_CBRP) >> 8); + write_ptr = (char) ((hcsr & H_CBWP) >> 16); + + return (unsigned char) (write_ptr - read_ptr); +} + +/** + * mei_me_hbuf_is_empty - checks if host buffer is empty. + * + * @dev: the device structure + * + * Return: true if empty, false - otherwise. + */ +static bool mei_me_hbuf_is_empty(struct mei_device *dev) +{ + return mei_hbuf_filled_slots(dev) == 0; +} + +/** + * mei_me_hbuf_empty_slots - counts write empty slots. + * + * @dev: the device structure + * + * Return: -EOVERFLOW if overflow, otherwise empty slots count + */ +static int mei_me_hbuf_empty_slots(struct mei_device *dev) +{ + unsigned char filled_slots, empty_slots; + + filled_slots = mei_hbuf_filled_slots(dev); + empty_slots = dev->hbuf_depth - filled_slots; + + /* check for overflow */ + if (filled_slots > dev->hbuf_depth) + return -EOVERFLOW; + + return empty_slots; +} + +/** + * mei_me_hbuf_max_len - returns size of hw buffer. + * + * @dev: the device structure + * + * Return: size of hw buffer in bytes + */ +static size_t mei_me_hbuf_max_len(const struct mei_device *dev) +{ + return dev->hbuf_depth * sizeof(u32) - sizeof(struct mei_msg_hdr); +} + + +/** + * mei_me_write_message - writes a message to mei device. + * + * @dev: the device structure + * @header: mei HECI header of message + * @buf: message payload will be written + * + * Return: -EIO if write has failed + */ +static int mei_me_write_message(struct mei_device *dev, + struct mei_msg_hdr *header, + unsigned char *buf) +{ + unsigned long rem; + unsigned long length = header->length; + u32 *reg_buf = (u32 *)buf; + u32 hcsr; + u32 dw_cnt; + int i; + int empty_slots; + + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(header)); + + empty_slots = mei_hbuf_empty_slots(dev); + dev_dbg(dev->dev, "empty slots = %hu.\n", empty_slots); + + dw_cnt = mei_data2slots(length); + if (empty_slots < 0 || dw_cnt > empty_slots) + return -EMSGSIZE; + + mei_me_hcbww_write(dev, *((u32 *) header)); + + for (i = 0; i < length / 4; i++) + mei_me_hcbww_write(dev, reg_buf[i]); + + rem = length & 0x3; + if (rem > 0) { + u32 reg = 0; + + memcpy(®, &buf[length - rem], rem); + mei_me_hcbww_write(dev, reg); + } + + hcsr = mei_hcsr_read(dev) | H_IG; + mei_hcsr_set(dev, hcsr); + if (!mei_me_hw_is_ready(dev)) + return -EIO; + + return 0; +} + +/** + * mei_me_count_full_read_slots - counts read full slots. + * + * @dev: the device structure + * + * Return: -EOVERFLOW if overflow, otherwise filled slots count + */ +static int mei_me_count_full_read_slots(struct mei_device *dev) +{ + u32 me_csr; + char read_ptr, write_ptr; + unsigned char buffer_depth, filled_slots; + + me_csr = mei_me_mecsr_read(dev); + buffer_depth = (unsigned char)((me_csr & ME_CBD_HRA) >> 24); + read_ptr = (char) ((me_csr & ME_CBRP_HRA) >> 8); + write_ptr = (char) ((me_csr & ME_CBWP_HRA) >> 16); + filled_slots = (unsigned char) (write_ptr - read_ptr); + + /* check for overflow */ + if (filled_slots > buffer_depth) + return -EOVERFLOW; + + dev_dbg(dev->dev, "filled_slots =%08x\n", filled_slots); + return (int)filled_slots; +} + +/** + * mei_me_read_slots - reads a message from mei device. + * + * @dev: the device structure + * @buffer: message buffer will be written + * @buffer_length: message size will be read + * + * Return: always 0 + */ +static int mei_me_read_slots(struct mei_device *dev, unsigned char *buffer, + unsigned long buffer_length) +{ + u32 *reg_buf = (u32 *)buffer; + u32 hcsr; + + for (; buffer_length >= sizeof(u32); buffer_length -= sizeof(u32)) + *reg_buf++ = mei_me_mecbrw_read(dev); + + if (buffer_length > 0) { + u32 reg = mei_me_mecbrw_read(dev); + + memcpy(reg_buf, ®, buffer_length); + } + + hcsr = mei_hcsr_read(dev) | H_IG; + mei_hcsr_set(dev, hcsr); + return 0; +} + +/** + * mei_me_pg_set - write pg enter register + * + * @dev: the device structure + */ +static void mei_me_pg_set(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg; + + reg = mei_me_reg_read(hw, H_HPG_CSR); + trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + + reg |= H_HPG_CSR_PGI; + + trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + mei_me_reg_write(hw, H_HPG_CSR, reg); +} + +/** + * mei_me_pg_unset - write pg exit register + * + * @dev: the device structure + */ +static void mei_me_pg_unset(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg; + + reg = mei_me_reg_read(hw, H_HPG_CSR); + trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + + WARN(!(reg & H_HPG_CSR_PGI), "PGI is not set\n"); + + reg |= H_HPG_CSR_PGIHEXR; + + trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + mei_me_reg_write(hw, H_HPG_CSR, reg); +} + +/** + * mei_me_pg_enter_sync - perform pg entry procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +int mei_me_pg_enter_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); + int ret; + + dev->pg_event = MEI_PG_EVENT_WAIT; + + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_RECEIVED) { + mei_me_pg_set(dev); + ret = 0; + } else { + ret = -ETIME; + } + + dev->pg_event = MEI_PG_EVENT_IDLE; + hw->pg_state = MEI_PG_ON; + + return ret; +} + +/** + * mei_me_pg_exit_sync - perform pg exit procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +int mei_me_pg_exit_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); + int ret; + + if (dev->pg_event == MEI_PG_EVENT_RECEIVED) + goto reply; + + dev->pg_event = MEI_PG_EVENT_WAIT; + + mei_me_pg_unset(dev); + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + +reply: + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED) + ret = 0; + else + ret = -ETIME; + +out: + dev->pg_event = MEI_PG_EVENT_IDLE; + hw->pg_state = MEI_PG_OFF; + + return ret; +} + +/** + * mei_me_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_me_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event >= MEI_PG_EVENT_WAIT && + dev->pg_event <= MEI_PG_EVENT_INTR_WAIT; +} + +/** + * mei_me_pg_is_enabled - detect if PG is supported by HW + * + * @dev: the device structure + * + * Return: true is pg supported, false otherwise + */ +static bool mei_me_pg_is_enabled(struct mei_device *dev) +{ + u32 reg = mei_me_mecsr_read(dev); + + if ((reg & ME_PGIC_HRA) == 0) + goto notsupported; + + if (!dev->hbm_f_pg_supported) + goto notsupported; + + return true; + +notsupported: + dev_dbg(dev->dev, "pg: not supported: HGP = %d hbm version %d.%d ?= %d.%d\n", + !!(reg & ME_PGIC_HRA), + dev->version.major_version, + dev->version.minor_version, + HBM_MAJOR_VERSION_PGI, + HBM_MINOR_VERSION_PGI); + + return false; +} + +/** + * mei_me_pg_intr - perform pg processing in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT) + return; + + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + hw->pg_state = MEI_PG_OFF; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); +} + +/** + * mei_me_irq_quick_handler - The ISR of the MEI device + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: irqreturn_t + */ + +irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id) +{ + struct mei_device *dev = (struct mei_device *) dev_id; + u32 hcsr = mei_hcsr_read(dev); + + if ((hcsr & H_IS) != H_IS) + return IRQ_NONE; + + /* clear H_IS bit in H_CSR */ + mei_hcsr_write(dev, hcsr); + + return IRQ_WAKE_THREAD; +} + +/** + * mei_me_irq_thread_handler - function called after ISR to handle the interrupt + * processing. + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: irqreturn_t + * + */ +irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) +{ + struct mei_device *dev = (struct mei_device *) dev_id; + struct mei_cl_cb complete_list; + s32 slots; + int rets = 0; + + dev_dbg(dev->dev, "function called after ISR to handle the interrupt processing.\n"); + /* initialize our complete list */ + mutex_lock(&dev->device_lock); + mei_io_list_init(&complete_list); + + /* Ack the interrupt here + * In case of MSI we don't go through the quick handler */ + if (pci_dev_msi_enabled(to_pci_dev(dev->dev))) + mei_clear_interrupts(dev); + + /* check if ME wants a reset */ + if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) { + dev_warn(dev->dev, "FW not ready: resetting.\n"); + schedule_work(&dev->reset_work); + goto end; + } + + mei_me_pg_intr(dev); + + /* check if we need to start the dev */ + if (!mei_host_is_ready(dev)) { + if (mei_hw_is_ready(dev)) { + dev_dbg(dev->dev, "we need to start the dev.\n"); + dev->recvd_hw_ready = true; + wake_up(&dev->wait_hw_ready); + } else { + dev_dbg(dev->dev, "Spurious Interrupt\n"); + } + goto end; + } + /* check slots available for reading */ + slots = mei_count_full_read_slots(dev); + while (slots > 0) { + dev_dbg(dev->dev, "slots to read = %08x\n", slots); + rets = mei_irq_read_handler(dev, &complete_list, &slots); + /* There is a race between ME write and interrupt delivery: + * Not all data is always available immediately after the + * interrupt, so try to read again on the next interrupt. + */ + if (rets == -ENODATA) + break; + + if (rets && dev->dev_state != MEI_DEV_RESETTING) { + dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n", + rets); + schedule_work(&dev->reset_work); + goto end; + } + } + + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + + /* + * During PG handshake only allowed write is the replay to the + * PG exit message, so block calling write function + * if the pg event is in PG handshake + */ + if (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_RECEIVED) { + rets = mei_irq_write_handler(dev, &complete_list); + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + } + + mei_irq_compl_handler(dev, &complete_list); + +end: + dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets); + mutex_unlock(&dev->device_lock); + return IRQ_HANDLED; +} + +static const struct mei_hw_ops mei_me_hw_ops = { + + .fw_status = mei_me_fw_status, + .pg_state = mei_me_pg_state, + + .host_is_ready = mei_me_host_is_ready, + + .hw_is_ready = mei_me_hw_is_ready, + .hw_reset = mei_me_hw_reset, + .hw_config = mei_me_hw_config, + .hw_start = mei_me_hw_start, + + .pg_in_transition = mei_me_pg_in_transition, + .pg_is_enabled = mei_me_pg_is_enabled, + + .intr_clear = mei_me_intr_clear, + .intr_enable = mei_me_intr_enable, + .intr_disable = mei_me_intr_disable, + + .hbuf_free_slots = mei_me_hbuf_empty_slots, + .hbuf_is_ready = mei_me_hbuf_is_empty, + .hbuf_max_len = mei_me_hbuf_max_len, + + .write = mei_me_write_message, + + .rdbuf_full_slots = mei_me_count_full_read_slots, + .read_hdr = mei_me_mecbrw_read, + .read = mei_me_read_slots +}; + +static bool mei_me_fw_type_nm(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, PCI_CFG_HFS_2, ®); + /* make sure that bit 9 (NM) is up and bit 10 (DM) is down */ + return (reg & 0x600) == 0x200; +} + +#define MEI_CFG_FW_NM \ + .quirk_probe = mei_me_fw_type_nm + +static bool mei_me_fw_type_sps(struct pci_dev *pdev) +{ + u32 reg; + /* Read ME FW Status check for SPS Firmware */ + pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + /* if bits [19:16] = 15, running SPS Firmware */ + return (reg & 0xf0000) == 0xf0000; +} + +#define MEI_CFG_FW_SPS \ + .quirk_probe = mei_me_fw_type_sps + + +#define MEI_CFG_LEGACY_HFS \ + .fw_status.count = 0 + +#define MEI_CFG_ICH_HFS \ + .fw_status.count = 1, \ + .fw_status.status[0] = PCI_CFG_HFS_1 + +#define MEI_CFG_PCH_HFS \ + .fw_status.count = 2, \ + .fw_status.status[0] = PCI_CFG_HFS_1, \ + .fw_status.status[1] = PCI_CFG_HFS_2 + +#define MEI_CFG_PCH8_HFS \ + .fw_status.count = 6, \ + .fw_status.status[0] = PCI_CFG_HFS_1, \ + .fw_status.status[1] = PCI_CFG_HFS_2, \ + .fw_status.status[2] = PCI_CFG_HFS_3, \ + .fw_status.status[3] = PCI_CFG_HFS_4, \ + .fw_status.status[4] = PCI_CFG_HFS_5, \ + .fw_status.status[5] = PCI_CFG_HFS_6 + +/* ICH Legacy devices */ +const struct mei_cfg mei_me_legacy_cfg = { + MEI_CFG_LEGACY_HFS, +}; + +/* ICH devices */ +const struct mei_cfg mei_me_ich_cfg = { + MEI_CFG_ICH_HFS, +}; + +/* PCH devices */ +const struct mei_cfg mei_me_pch_cfg = { + MEI_CFG_PCH_HFS, +}; + + +/* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */ +const struct mei_cfg mei_me_pch_cpt_pbg_cfg = { + MEI_CFG_PCH_HFS, + MEI_CFG_FW_NM, +}; + +/* PCH8 Lynx Point and newer devices */ +const struct mei_cfg mei_me_pch8_cfg = { + MEI_CFG_PCH8_HFS, +}; + +/* PCH8 Lynx Point with quirk for SPS Firmware exclusion */ +const struct mei_cfg mei_me_pch8_sps_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_SPS, +}; + +/** + * mei_me_dev_init - allocates and initializes the mei device structure + * + * @pdev: The pci device structure + * @cfg: per device generation config + * + * Return: The mei_device_device pointer on success, NULL on failure. + */ +struct mei_device *mei_me_dev_init(struct pci_dev *pdev, + const struct mei_cfg *cfg) +{ + struct mei_device *dev; + struct mei_me_hw *hw; + + dev = kzalloc(sizeof(struct mei_device) + + sizeof(struct mei_me_hw), GFP_KERNEL); + if (!dev) + return NULL; + hw = to_me_hw(dev); + + mei_device_init(dev, &pdev->dev, &mei_me_hw_ops); + hw->cfg = cfg; + return dev; +} + diff --git a/kernel/drivers/misc/mei/hw-me.h b/kernel/drivers/misc/mei/hw-me.h new file mode 100644 index 000000000..6022d52af --- /dev/null +++ b/kernel/drivers/misc/mei/hw-me.h @@ -0,0 +1,80 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + + + +#ifndef _MEI_INTERFACE_H_ +#define _MEI_INTERFACE_H_ + +#include <linux/irqreturn.h> +#include <linux/pci.h> +#include <linux/mei.h> + +#include "mei_dev.h" +#include "client.h" + +/* + * mei_cfg - mei device configuration + * + * @fw_status: FW status + * @quirk_probe: device exclusion quirk + */ +struct mei_cfg { + const struct mei_fw_status fw_status; + bool (*quirk_probe)(struct pci_dev *pdev); +}; + + +#define MEI_PCI_DEVICE(dev, cfg) \ + .vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \ + .driver_data = (kernel_ulong_t)&(cfg) + + +#define MEI_ME_RPM_TIMEOUT 500 /* ms */ + +/** + * struct mei_me_hw - me hw specific data + * + * @cfg: per device generation config and ops + * @mem_addr: io memory address + * @pg_state: power gating state + */ +struct mei_me_hw { + const struct mei_cfg *cfg; + void __iomem *mem_addr; + enum mei_pg_state pg_state; +}; + +#define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw) + +extern const struct mei_cfg mei_me_legacy_cfg; +extern const struct mei_cfg mei_me_ich_cfg; +extern const struct mei_cfg mei_me_pch_cfg; +extern const struct mei_cfg mei_me_pch_cpt_pbg_cfg; +extern const struct mei_cfg mei_me_pch8_cfg; +extern const struct mei_cfg mei_me_pch8_sps_cfg; + +struct mei_device *mei_me_dev_init(struct pci_dev *pdev, + const struct mei_cfg *cfg); + +int mei_me_pg_enter_sync(struct mei_device *dev); +int mei_me_pg_exit_sync(struct mei_device *dev); + +irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id); +irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id); + +#endif /* _MEI_INTERFACE_H_ */ diff --git a/kernel/drivers/misc/mei/hw-txe-regs.h b/kernel/drivers/misc/mei/hw-txe-regs.h new file mode 100644 index 000000000..f19229c4e --- /dev/null +++ b/kernel/drivers/misc/mei/hw-txe-regs.h @@ -0,0 +1,294 @@ +/****************************************************************************** + * Intel Management Engine Interface (Intel MEI) Linux driver + * Intel MEI Interface Header + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING + * + * Contact Information: + * Intel Corporation. + * linux-mei@linux.intel.com + * http://www.intel.com + * + * BSD LICENSE + * + * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#ifndef _MEI_HW_TXE_REGS_H_ +#define _MEI_HW_TXE_REGS_H_ + +#include "hw.h" + +#define SEC_ALIVENESS_TIMER_TIMEOUT (5 * MSEC_PER_SEC) +#define SEC_ALIVENESS_WAIT_TIMEOUT (1 * MSEC_PER_SEC) +#define SEC_RESET_WAIT_TIMEOUT (1 * MSEC_PER_SEC) +#define SEC_READY_WAIT_TIMEOUT (5 * MSEC_PER_SEC) +#define START_MESSAGE_RESPONSE_WAIT_TIMEOUT (5 * MSEC_PER_SEC) +#define RESET_CANCEL_WAIT_TIMEOUT (1 * MSEC_PER_SEC) + +enum { + SEC_BAR, + BRIDGE_BAR, + + NUM_OF_MEM_BARS +}; + +/* SeC FW Status Register + * + * FW uses this register in order to report its status to host. + * This register resides in PCI-E config space. + */ +#define PCI_CFG_TXE_FW_STS0 0x40 +# define PCI_CFG_TXE_FW_STS0_WRK_ST_MSK 0x0000000F +# define PCI_CFG_TXE_FW_STS0_OP_ST_MSK 0x000001C0 +# define PCI_CFG_TXE_FW_STS0_FW_INIT_CMPLT 0x00000200 +# define PCI_CFG_TXE_FW_STS0_ERR_CODE_MSK 0x0000F000 +# define PCI_CFG_TXE_FW_STS0_OP_MODE_MSK 0x000F0000 +# define PCI_CFG_TXE_FW_STS0_RST_CNT_MSK 0x00F00000 +#define PCI_CFG_TXE_FW_STS1 0x48 + +#define IPC_BASE_ADDR 0x80400 /* SeC IPC Base Address */ + +/* IPC Input Doorbell Register */ +#define SEC_IPC_INPUT_DOORBELL_REG (0x0000 + IPC_BASE_ADDR) + +/* IPC Input Status Register + * This register indicates whether or not processing of + * the most recent command has been completed by the SEC + * New commands and payloads should not be written by the Host + * until this indicates that the previous command has been processed. + */ +#define SEC_IPC_INPUT_STATUS_REG (0x0008 + IPC_BASE_ADDR) +# define SEC_IPC_INPUT_STATUS_RDY BIT(0) + +/* IPC Host Interrupt Status Register */ +#define SEC_IPC_HOST_INT_STATUS_REG (0x0010 + IPC_BASE_ADDR) +#define SEC_IPC_HOST_INT_STATUS_OUT_DB BIT(0) +#define SEC_IPC_HOST_INT_STATUS_IN_RDY BIT(1) +#define SEC_IPC_HOST_INT_STATUS_HDCP_M0_RCVD BIT(5) +#define SEC_IPC_HOST_INT_STATUS_ILL_MEM_ACCESS BIT(17) +#define SEC_IPC_HOST_INT_STATUS_AES_HKEY_ERR BIT(18) +#define SEC_IPC_HOST_INT_STATUS_DES_HKEY_ERR BIT(19) +#define SEC_IPC_HOST_INT_STATUS_TMRMTB_OVERFLOW BIT(21) + +/* Convenient mask for pending interrupts */ +#define SEC_IPC_HOST_INT_STATUS_PENDING \ + (SEC_IPC_HOST_INT_STATUS_OUT_DB| \ + SEC_IPC_HOST_INT_STATUS_IN_RDY) + +/* IPC Host Interrupt Mask Register */ +#define SEC_IPC_HOST_INT_MASK_REG (0x0014 + IPC_BASE_ADDR) + +# define SEC_IPC_HOST_INT_MASK_OUT_DB BIT(0) /* Output Doorbell Int Mask */ +# define SEC_IPC_HOST_INT_MASK_IN_RDY BIT(1) /* Input Ready Int Mask */ + +/* IPC Input Payload RAM */ +#define SEC_IPC_INPUT_PAYLOAD_REG (0x0100 + IPC_BASE_ADDR) +/* IPC Shared Payload RAM */ +#define IPC_SHARED_PAYLOAD_REG (0x0200 + IPC_BASE_ADDR) + +/* SeC Address Translation Table Entry 2 - Ctrl + * + * This register resides also in SeC's PCI-E Memory space. + */ +#define SATT2_CTRL_REG 0x1040 +# define SATT2_CTRL_VALID_MSK BIT(0) +# define SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT 8 +# define SATT2_CTRL_BRIDGE_HOST_EN_MSK BIT(12) + +/* SATT Table Entry 2 SAP Base Address Register */ +#define SATT2_SAP_BA_REG 0x1044 +/* SATT Table Entry 2 SAP Size Register. */ +#define SATT2_SAP_SIZE_REG 0x1048 + /* SATT Table Entry 2 SAP Bridge Address - LSB Register */ +#define SATT2_BRG_BA_LSB_REG 0x104C + +/* Host High-level Interrupt Status Register */ +#define HHISR_REG 0x2020 +/* Host High-level Interrupt Enable Register + * + * Resides in PCI memory space. This is the top hierarchy for + * interrupts from SeC to host, aggregating both interrupts that + * arrive through HICR registers as well as interrupts + * that arrive via IPC. + */ +#define HHIER_REG 0x2024 +#define IPC_HHIER_SEC BIT(0) +#define IPC_HHIER_BRIDGE BIT(1) +#define IPC_HHIER_MSK (IPC_HHIER_SEC | IPC_HHIER_BRIDGE) + +/* Host High-level Interrupt Mask Register. + * + * Resides in PCI memory space. + * This is the top hierarchy for masking interrupts from SeC to host. + */ +#define HHIMR_REG 0x2028 +#define IPC_HHIMR_SEC BIT(0) +#define IPC_HHIMR_BRIDGE BIT(1) + +/* Host High-level IRQ Status Register */ +#define HHIRQSR_REG 0x202C + +/* Host Interrupt Cause Register 0 - SeC IPC Readiness + * + * This register is both an ICR to Host from PCI Memory Space + * and it is also exposed in the SeC memory space. + * This register is used by SeC's IPC driver in order + * to synchronize with host about IPC interface state. + */ +#define HICR_SEC_IPC_READINESS_REG 0x2040 +#define HICR_SEC_IPC_READINESS_HOST_RDY BIT(0) +#define HICR_SEC_IPC_READINESS_SEC_RDY BIT(1) +#define HICR_SEC_IPC_READINESS_SYS_RDY \ + (HICR_SEC_IPC_READINESS_HOST_RDY | \ + HICR_SEC_IPC_READINESS_SEC_RDY) +#define HICR_SEC_IPC_READINESS_RDY_CLR BIT(2) + +/* Host Interrupt Cause Register 1 - Aliveness Response */ +/* This register is both an ICR to Host from PCI Memory Space + * and it is also exposed in the SeC memory space. + * The register may be used by SeC to ACK a host request for aliveness. + */ +#define HICR_HOST_ALIVENESS_RESP_REG 0x2044 +#define HICR_HOST_ALIVENESS_RESP_ACK BIT(0) + +/* Host Interrupt Cause Register 2 - SeC IPC Output Doorbell */ +#define HICR_SEC_IPC_OUTPUT_DOORBELL_REG 0x2048 + +/* Host Interrupt Status Register. + * + * Resides in PCI memory space. + * This is the main register involved in generating interrupts + * from SeC to host via HICRs. + * The interrupt generation rules are as follows: + * An interrupt will be generated whenever for any i, + * there is a transition from a state where at least one of + * the following conditions did not hold, to a state where + * ALL the following conditions hold: + * A) HISR.INT[i]_STS == 1. + * B) HIER.INT[i]_EN == 1. + */ +#define HISR_REG 0x2060 +#define HISR_INT_0_STS BIT(0) +#define HISR_INT_1_STS BIT(1) +#define HISR_INT_2_STS BIT(2) +#define HISR_INT_3_STS BIT(3) +#define HISR_INT_4_STS BIT(4) +#define HISR_INT_5_STS BIT(5) +#define HISR_INT_6_STS BIT(6) +#define HISR_INT_7_STS BIT(7) +#define HISR_INT_STS_MSK \ + (HISR_INT_0_STS | HISR_INT_1_STS | HISR_INT_2_STS) + +/* Host Interrupt Enable Register. Resides in PCI memory space. */ +#define HIER_REG 0x2064 +#define HIER_INT_0_EN BIT(0) +#define HIER_INT_1_EN BIT(1) +#define HIER_INT_2_EN BIT(2) +#define HIER_INT_3_EN BIT(3) +#define HIER_INT_4_EN BIT(4) +#define HIER_INT_5_EN BIT(5) +#define HIER_INT_6_EN BIT(6) +#define HIER_INT_7_EN BIT(7) + +#define HIER_INT_EN_MSK \ + (HIER_INT_0_EN | HIER_INT_1_EN | HIER_INT_2_EN) + + +/* SEC Memory Space IPC output payload. + * + * This register is part of the output payload which SEC provides to host. + */ +#define BRIDGE_IPC_OUTPUT_PAYLOAD_REG 0x20C0 + +/* SeC Interrupt Cause Register - Host Aliveness Request + * This register is both an ICR to SeC and it is also exposed + * in the host-visible PCI memory space. + * The register is used by host to request SeC aliveness. + */ +#define SICR_HOST_ALIVENESS_REQ_REG 0x214C +#define SICR_HOST_ALIVENESS_REQ_REQUESTED BIT(0) + + +/* SeC Interrupt Cause Register - Host IPC Readiness + * + * This register is both an ICR to SeC and it is also exposed + * in the host-visible PCI memory space. + * This register is used by the host's SeC driver uses in order + * to synchronize with SeC about IPC interface state. + */ +#define SICR_HOST_IPC_READINESS_REQ_REG 0x2150 + + +#define SICR_HOST_IPC_READINESS_HOST_RDY BIT(0) +#define SICR_HOST_IPC_READINESS_SEC_RDY BIT(1) +#define SICR_HOST_IPC_READINESS_SYS_RDY \ + (SICR_HOST_IPC_READINESS_HOST_RDY | \ + SICR_HOST_IPC_READINESS_SEC_RDY) +#define SICR_HOST_IPC_READINESS_RDY_CLR BIT(2) + +/* SeC Interrupt Cause Register - SeC IPC Output Status + * + * This register indicates whether or not processing of the most recent + * command has been completed by the Host. + * New commands and payloads should not be written by SeC until this + * register indicates that the previous command has been processed. + */ +#define SICR_SEC_IPC_OUTPUT_STATUS_REG 0x2154 +# define SEC_IPC_OUTPUT_STATUS_RDY BIT(0) + + + +/* MEI IPC Message payload size 64 bytes */ +#define PAYLOAD_SIZE 64 + +/* MAX size for SATT range 32MB */ +#define SATT_RANGE_MAX (32 << 20) + + +#endif /* _MEI_HW_TXE_REGS_H_ */ + diff --git a/kernel/drivers/misc/mei/hw-txe.c b/kernel/drivers/misc/mei/hw-txe.c new file mode 100644 index 000000000..bae680c64 --- /dev/null +++ b/kernel/drivers/misc/mei/hw-txe.c @@ -0,0 +1,1242 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/pci.h> +#include <linux/jiffies.h> +#include <linux/ktime.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/irqreturn.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hw-txe.h" +#include "client.h" +#include "hbm.h" + +/** + * mei_txe_reg_read - Reads 32bit data from the txe device + * + * @base_addr: registers base address + * @offset: register offset + * + * Return: register value + */ +static inline u32 mei_txe_reg_read(void __iomem *base_addr, + unsigned long offset) +{ + return ioread32(base_addr + offset); +} + +/** + * mei_txe_reg_write - Writes 32bit data to the txe device + * + * @base_addr: registers base address + * @offset: register offset + * @value: the value to write + */ +static inline void mei_txe_reg_write(void __iomem *base_addr, + unsigned long offset, u32 value) +{ + iowrite32(value, base_addr + offset); +} + +/** + * mei_txe_sec_reg_read_silent - Reads 32bit data from the SeC BAR + * + * @hw: the txe hardware structure + * @offset: register offset + * + * Doesn't check for aliveness while Reads 32bit data from the SeC BAR + * + * Return: register value + */ +static inline u32 mei_txe_sec_reg_read_silent(struct mei_txe_hw *hw, + unsigned long offset) +{ + return mei_txe_reg_read(hw->mem_addr[SEC_BAR], offset); +} + +/** + * mei_txe_sec_reg_read - Reads 32bit data from the SeC BAR + * + * @hw: the txe hardware structure + * @offset: register offset + * + * Reads 32bit data from the SeC BAR and shout loud if aliveness is not set + * + * Return: register value + */ +static inline u32 mei_txe_sec_reg_read(struct mei_txe_hw *hw, + unsigned long offset) +{ + WARN(!hw->aliveness, "sec read: aliveness not asserted\n"); + return mei_txe_sec_reg_read_silent(hw, offset); +} +/** + * mei_txe_sec_reg_write_silent - Writes 32bit data to the SeC BAR + * doesn't check for aliveness + * + * @hw: the txe hardware structure + * @offset: register offset + * @value: value to write + * + * Doesn't check for aliveness while writes 32bit data from to the SeC BAR + */ +static inline void mei_txe_sec_reg_write_silent(struct mei_txe_hw *hw, + unsigned long offset, u32 value) +{ + mei_txe_reg_write(hw->mem_addr[SEC_BAR], offset, value); +} + +/** + * mei_txe_sec_reg_write - Writes 32bit data to the SeC BAR + * + * @hw: the txe hardware structure + * @offset: register offset + * @value: value to write + * + * Writes 32bit data from the SeC BAR and shout loud if aliveness is not set + */ +static inline void mei_txe_sec_reg_write(struct mei_txe_hw *hw, + unsigned long offset, u32 value) +{ + WARN(!hw->aliveness, "sec write: aliveness not asserted\n"); + mei_txe_sec_reg_write_silent(hw, offset, value); +} +/** + * mei_txe_br_reg_read - Reads 32bit data from the Bridge BAR + * + * @hw: the txe hardware structure + * @offset: offset from which to read the data + * + * Return: the byte read. + */ +static inline u32 mei_txe_br_reg_read(struct mei_txe_hw *hw, + unsigned long offset) +{ + return mei_txe_reg_read(hw->mem_addr[BRIDGE_BAR], offset); +} + +/** + * mei_txe_br_reg_write - Writes 32bit data to the Bridge BAR + * + * @hw: the txe hardware structure + * @offset: offset from which to write the data + * @value: the byte to write + */ +static inline void mei_txe_br_reg_write(struct mei_txe_hw *hw, + unsigned long offset, u32 value) +{ + mei_txe_reg_write(hw->mem_addr[BRIDGE_BAR], offset, value); +} + +/** + * mei_txe_aliveness_set - request for aliveness change + * + * @dev: the device structure + * @req: requested aliveness value + * + * Request for aliveness change and returns true if the change is + * really needed and false if aliveness is already + * in the requested state + * + * Locking: called under "dev->device_lock" lock + * + * Return: true if request was send + */ +static bool mei_txe_aliveness_set(struct mei_device *dev, u32 req) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + bool do_req = hw->aliveness != req; + + dev_dbg(dev->dev, "Aliveness current=%d request=%d\n", + hw->aliveness, req); + if (do_req) { + dev->pg_event = MEI_PG_EVENT_WAIT; + mei_txe_br_reg_write(hw, SICR_HOST_ALIVENESS_REQ_REG, req); + } + return do_req; +} + + +/** + * mei_txe_aliveness_req_get - get aliveness requested register value + * + * @dev: the device structure + * + * Extract HICR_HOST_ALIVENESS_RESP_ACK bit from + * from HICR_HOST_ALIVENESS_REQ register value + * + * Return: SICR_HOST_ALIVENESS_REQ_REQUESTED bit value + */ +static u32 mei_txe_aliveness_req_get(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 reg; + + reg = mei_txe_br_reg_read(hw, SICR_HOST_ALIVENESS_REQ_REG); + return reg & SICR_HOST_ALIVENESS_REQ_REQUESTED; +} + +/** + * mei_txe_aliveness_get - get aliveness response register value + * + * @dev: the device structure + * + * Return: HICR_HOST_ALIVENESS_RESP_ACK bit from HICR_HOST_ALIVENESS_RESP + * register + */ +static u32 mei_txe_aliveness_get(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 reg; + + reg = mei_txe_br_reg_read(hw, HICR_HOST_ALIVENESS_RESP_REG); + return reg & HICR_HOST_ALIVENESS_RESP_ACK; +} + +/** + * mei_txe_aliveness_poll - waits for aliveness to settle + * + * @dev: the device structure + * @expected: expected aliveness value + * + * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set + * + * Return: 0 if the expected value was received, -ETIME otherwise + */ +static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + ktime_t stop, start; + + start = ktime_get(); + stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT)); + do { + hw->aliveness = mei_txe_aliveness_get(dev); + if (hw->aliveness == expected) { + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_dbg(dev->dev, "aliveness settled after %lld usecs\n", + ktime_to_us(ktime_sub(ktime_get(), start))); + return 0; + } + usleep_range(20, 50); + } while (ktime_before(ktime_get(), stop)); + + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_err(dev->dev, "aliveness timed out\n"); + return -ETIME; +} + +/** + * mei_txe_aliveness_wait - waits for aliveness to settle + * + * @dev: the device structure + * @expected: expected aliveness value + * + * Waits for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set + * + * Return: 0 on success and < 0 otherwise + */ +static int mei_txe_aliveness_wait(struct mei_device *dev, u32 expected) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + const unsigned long timeout = + msecs_to_jiffies(SEC_ALIVENESS_WAIT_TIMEOUT); + long err; + int ret; + + hw->aliveness = mei_txe_aliveness_get(dev); + if (hw->aliveness == expected) + return 0; + + mutex_unlock(&dev->device_lock); + err = wait_event_timeout(hw->wait_aliveness_resp, + dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + hw->aliveness = mei_txe_aliveness_get(dev); + ret = hw->aliveness == expected ? 0 : -ETIME; + + if (ret) + dev_warn(dev->dev, "aliveness timed out = %ld aliveness = %d event = %d\n", + err, hw->aliveness, dev->pg_event); + else + dev_dbg(dev->dev, "aliveness settled after = %d msec aliveness = %d event = %d\n", + jiffies_to_msecs(timeout - err), + hw->aliveness, dev->pg_event); + + dev->pg_event = MEI_PG_EVENT_IDLE; + return ret; +} + +/** + * mei_txe_aliveness_set_sync - sets an wait for aliveness to complete + * + * @dev: the device structure + * @req: requested aliveness value + * + * Return: 0 on success and < 0 otherwise + */ +int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req) +{ + if (mei_txe_aliveness_set(dev, req)) + return mei_txe_aliveness_wait(dev, req); + return 0; +} + +/** + * mei_txe_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_txe_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event == MEI_PG_EVENT_WAIT; +} + +/** + * mei_txe_pg_is_enabled - detect if PG is supported by HW + * + * @dev: the device structure + * + * Return: true is pg supported, false otherwise + */ +static bool mei_txe_pg_is_enabled(struct mei_device *dev) +{ + return true; +} + +/** + * mei_txe_pg_state - translate aliveness register value + * to the mei power gating state + * + * @dev: the device structure + * + * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise + */ +static inline enum mei_pg_state mei_txe_pg_state(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return hw->aliveness ? MEI_PG_OFF : MEI_PG_ON; +} + +/** + * mei_txe_input_ready_interrupt_enable - sets the Input Ready Interrupt + * + * @dev: the device structure + */ +static void mei_txe_input_ready_interrupt_enable(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 hintmsk; + /* Enable the SEC_IPC_HOST_INT_MASK_IN_RDY interrupt */ + hintmsk = mei_txe_sec_reg_read(hw, SEC_IPC_HOST_INT_MASK_REG); + hintmsk |= SEC_IPC_HOST_INT_MASK_IN_RDY; + mei_txe_sec_reg_write(hw, SEC_IPC_HOST_INT_MASK_REG, hintmsk); +} + +/** + * mei_txe_input_doorbell_set - sets bit 0 in + * SEC_IPC_INPUT_DOORBELL.IPC_INPUT_DOORBELL. + * + * @hw: the txe hardware structure + */ +static void mei_txe_input_doorbell_set(struct mei_txe_hw *hw) +{ + /* Clear the interrupt cause */ + clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause); + mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_DOORBELL_REG, 1); +} + +/** + * mei_txe_output_ready_set - Sets the SICR_SEC_IPC_OUTPUT_STATUS bit to 1 + * + * @hw: the txe hardware structure + */ +static void mei_txe_output_ready_set(struct mei_txe_hw *hw) +{ + mei_txe_br_reg_write(hw, + SICR_SEC_IPC_OUTPUT_STATUS_REG, + SEC_IPC_OUTPUT_STATUS_RDY); +} + +/** + * mei_txe_is_input_ready - check if TXE is ready for receiving data + * + * @dev: the device structure + * + * Return: true if INPUT STATUS READY bit is set + */ +static bool mei_txe_is_input_ready(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 status; + + status = mei_txe_sec_reg_read(hw, SEC_IPC_INPUT_STATUS_REG); + return !!(SEC_IPC_INPUT_STATUS_RDY & status); +} + +/** + * mei_txe_intr_clear - clear all interrupts + * + * @dev: the device structure + */ +static inline void mei_txe_intr_clear(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_sec_reg_write_silent(hw, SEC_IPC_HOST_INT_STATUS_REG, + SEC_IPC_HOST_INT_STATUS_PENDING); + mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_STS_MSK); + mei_txe_br_reg_write(hw, HHISR_REG, IPC_HHIER_MSK); +} + +/** + * mei_txe_intr_disable - disable all interrupts + * + * @dev: the device structure + */ +static void mei_txe_intr_disable(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, HHIER_REG, 0); + mei_txe_br_reg_write(hw, HIER_REG, 0); +} +/** + * mei_txe_intr_enable - enable all interrupts + * + * @dev: the device structure + */ +static void mei_txe_intr_enable(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, HHIER_REG, IPC_HHIER_MSK); + mei_txe_br_reg_write(hw, HIER_REG, HIER_INT_EN_MSK); +} + +/** + * mei_txe_pending_interrupts - check if there are pending interrupts + * only Aliveness, Input ready, and output doorbell are of relevance + * + * @dev: the device structure + * + * Checks if there are pending interrupts + * only Aliveness, Readiness, Input ready, and Output doorbell are relevant + * + * Return: true if there are pending interrupts + */ +static bool mei_txe_pending_interrupts(struct mei_device *dev) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + bool ret = (hw->intr_cause & (TXE_INTR_READINESS | + TXE_INTR_ALIVENESS | + TXE_INTR_IN_READY | + TXE_INTR_OUT_DB)); + + if (ret) { + dev_dbg(dev->dev, + "Pending Interrupts InReady=%01d Readiness=%01d, Aliveness=%01d, OutDoor=%01d\n", + !!(hw->intr_cause & TXE_INTR_IN_READY), + !!(hw->intr_cause & TXE_INTR_READINESS), + !!(hw->intr_cause & TXE_INTR_ALIVENESS), + !!(hw->intr_cause & TXE_INTR_OUT_DB)); + } + return ret; +} + +/** + * mei_txe_input_payload_write - write a dword to the host buffer + * at offset idx + * + * @dev: the device structure + * @idx: index in the host buffer + * @value: value + */ +static void mei_txe_input_payload_write(struct mei_device *dev, + unsigned long idx, u32 value) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_PAYLOAD_REG + + (idx * sizeof(u32)), value); +} + +/** + * mei_txe_out_data_read - read dword from the device buffer + * at offset idx + * + * @dev: the device structure + * @idx: index in the device buffer + * + * Return: register value at index + */ +static u32 mei_txe_out_data_read(const struct mei_device *dev, + unsigned long idx) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return mei_txe_br_reg_read(hw, + BRIDGE_IPC_OUTPUT_PAYLOAD_REG + (idx * sizeof(u32))); +} + +/* Readiness */ + +/** + * mei_txe_readiness_set_host_rdy - set host readiness bit + * + * @dev: the device structure + */ +static void mei_txe_readiness_set_host_rdy(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, + SICR_HOST_IPC_READINESS_REQ_REG, + SICR_HOST_IPC_READINESS_HOST_RDY); +} + +/** + * mei_txe_readiness_clear - clear host readiness bit + * + * @dev: the device structure + */ +static void mei_txe_readiness_clear(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, SICR_HOST_IPC_READINESS_REQ_REG, + SICR_HOST_IPC_READINESS_RDY_CLR); +} +/** + * mei_txe_readiness_get - Reads and returns + * the HICR_SEC_IPC_READINESS register value + * + * @dev: the device structure + * + * Return: the HICR_SEC_IPC_READINESS register value + */ +static u32 mei_txe_readiness_get(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG); +} + + +/** + * mei_txe_readiness_is_sec_rdy - check readiness + * for HICR_SEC_IPC_READINESS_SEC_RDY + * + * @readiness: cached readiness state + * + * Return: true if readiness bit is set + */ +static inline bool mei_txe_readiness_is_sec_rdy(u32 readiness) +{ + return !!(readiness & HICR_SEC_IPC_READINESS_SEC_RDY); +} + +/** + * mei_txe_hw_is_ready - check if the hw is ready + * + * @dev: the device structure + * + * Return: true if sec is ready + */ +static bool mei_txe_hw_is_ready(struct mei_device *dev) +{ + u32 readiness = mei_txe_readiness_get(dev); + + return mei_txe_readiness_is_sec_rdy(readiness); +} + +/** + * mei_txe_host_is_ready - check if the host is ready + * + * @dev: the device structure + * + * Return: true if host is ready + */ +static inline bool mei_txe_host_is_ready(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 reg = mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG); + + return !!(reg & HICR_SEC_IPC_READINESS_HOST_RDY); +} + +/** + * mei_txe_readiness_wait - wait till readiness settles + * + * @dev: the device structure + * + * Return: 0 on success and -ETIME on timeout + */ +static int mei_txe_readiness_wait(struct mei_device *dev) +{ + if (mei_txe_hw_is_ready(dev)) + return 0; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_hw_ready, dev->recvd_hw_ready, + msecs_to_jiffies(SEC_RESET_WAIT_TIMEOUT)); + mutex_lock(&dev->device_lock); + if (!dev->recvd_hw_ready) { + dev_err(dev->dev, "wait for readiness failed\n"); + return -ETIME; + } + + dev->recvd_hw_ready = false; + return 0; +} + +static const struct mei_fw_status mei_txe_fw_sts = { + .count = 2, + .status[0] = PCI_CFG_TXE_FW_STS0, + .status[1] = PCI_CFG_TXE_FW_STS1 +}; + +/** + * mei_txe_fw_status - read fw status register from pci config space + * + * @dev: mei device + * @fw_status: fw status register values + * + * Return: 0 on success, error otherwise + */ +static int mei_txe_fw_status(struct mei_device *dev, + struct mei_fw_status *fw_status) +{ + const struct mei_fw_status *fw_src = &mei_txe_fw_sts; + struct pci_dev *pdev = to_pci_dev(dev->dev); + int ret; + int i; + + if (!fw_status) + return -EINVAL; + + fw_status->count = fw_src->count; + for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { + ret = pci_read_config_dword(pdev, + fw_src->status[i], &fw_status->status[i]); + if (ret) + return ret; + } + + return 0; +} + +/** + * mei_txe_hw_config - configure hardware at the start of the devices + * + * @dev: the device structure + * + * Configure hardware at the start of the device should be done only + * once at the device probe time + */ +static void mei_txe_hw_config(struct mei_device *dev) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + + /* Doesn't change in runtime */ + dev->hbuf_depth = PAYLOAD_SIZE / 4; + + hw->aliveness = mei_txe_aliveness_get(dev); + hw->readiness = mei_txe_readiness_get(dev); + + dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n", + hw->aliveness, hw->readiness); +} + + +/** + * mei_txe_write - writes a message to device. + * + * @dev: the device structure + * @header: header of message + * @buf: message buffer will be written + * + * Return: 0 if success, <0 - otherwise. + */ + +static int mei_txe_write(struct mei_device *dev, + struct mei_msg_hdr *header, unsigned char *buf) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + unsigned long rem; + unsigned long length; + int slots = dev->hbuf_depth; + u32 *reg_buf = (u32 *)buf; + u32 dw_cnt; + int i; + + if (WARN_ON(!header || !buf)) + return -EINVAL; + + length = header->length; + + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(header)); + + dw_cnt = mei_data2slots(length); + if (dw_cnt > slots) + return -EMSGSIZE; + + if (WARN(!hw->aliveness, "txe write: aliveness not asserted\n")) + return -EAGAIN; + + /* Enable Input Ready Interrupt. */ + mei_txe_input_ready_interrupt_enable(dev); + + if (!mei_txe_is_input_ready(dev)) { + char fw_sts_str[MEI_FW_STATUS_STR_SZ]; + + mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ); + dev_err(dev->dev, "Input is not ready %s\n", fw_sts_str); + return -EAGAIN; + } + + mei_txe_input_payload_write(dev, 0, *((u32 *)header)); + + for (i = 0; i < length / 4; i++) + mei_txe_input_payload_write(dev, i + 1, reg_buf[i]); + + rem = length & 0x3; + if (rem > 0) { + u32 reg = 0; + + memcpy(®, &buf[length - rem], rem); + mei_txe_input_payload_write(dev, i + 1, reg); + } + + /* after each write the whole buffer is consumed */ + hw->slots = 0; + + /* Set Input-Doorbell */ + mei_txe_input_doorbell_set(hw); + + return 0; +} + +/** + * mei_txe_hbuf_max_len - mimics the me hbuf circular buffer + * + * @dev: the device structure + * + * Return: the PAYLOAD_SIZE - 4 + */ +static size_t mei_txe_hbuf_max_len(const struct mei_device *dev) +{ + return PAYLOAD_SIZE - sizeof(struct mei_msg_hdr); +} + +/** + * mei_txe_hbuf_empty_slots - mimics the me hbuf circular buffer + * + * @dev: the device structure + * + * Return: always hbuf_depth + */ +static int mei_txe_hbuf_empty_slots(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return hw->slots; +} + +/** + * mei_txe_count_full_read_slots - mimics the me device circular buffer + * + * @dev: the device structure + * + * Return: always buffer size in dwords count + */ +static int mei_txe_count_full_read_slots(struct mei_device *dev) +{ + /* read buffers has static size */ + return PAYLOAD_SIZE / 4; +} + +/** + * mei_txe_read_hdr - read message header which is always in 4 first bytes + * + * @dev: the device structure + * + * Return: mei message header + */ + +static u32 mei_txe_read_hdr(const struct mei_device *dev) +{ + return mei_txe_out_data_read(dev, 0); +} +/** + * mei_txe_read - reads a message from the txe device. + * + * @dev: the device structure + * @buf: message buffer will be written + * @len: message size will be read + * + * Return: -EINVAL on error wrong argument and 0 on success + */ +static int mei_txe_read(struct mei_device *dev, + unsigned char *buf, unsigned long len) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 *reg_buf, reg; + u32 rem; + u32 i; + + if (WARN_ON(!buf || !len)) + return -EINVAL; + + reg_buf = (u32 *)buf; + rem = len & 0x3; + + dev_dbg(dev->dev, "buffer-length = %lu buf[0]0x%08X\n", + len, mei_txe_out_data_read(dev, 0)); + + for (i = 0; i < len / 4; i++) { + /* skip header: index starts from 1 */ + reg = mei_txe_out_data_read(dev, i + 1); + dev_dbg(dev->dev, "buf[%d] = 0x%08X\n", i, reg); + *reg_buf++ = reg; + } + + if (rem) { + reg = mei_txe_out_data_read(dev, i + 1); + memcpy(reg_buf, ®, rem); + } + + mei_txe_output_ready_set(hw); + return 0; +} + +/** + * mei_txe_hw_reset - resets host and fw. + * + * @dev: the device structure + * @intr_enable: if interrupt should be enabled after reset. + * + * Return: 0 on success and < 0 in case of error + */ +static int mei_txe_hw_reset(struct mei_device *dev, bool intr_enable) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + u32 aliveness_req; + /* + * read input doorbell to ensure consistency between Bridge and SeC + * return value might be garbage return + */ + (void)mei_txe_sec_reg_read_silent(hw, SEC_IPC_INPUT_DOORBELL_REG); + + aliveness_req = mei_txe_aliveness_req_get(dev); + hw->aliveness = mei_txe_aliveness_get(dev); + + /* Disable interrupts in this stage we will poll */ + mei_txe_intr_disable(dev); + + /* + * If Aliveness Request and Aliveness Response are not equal then + * wait for them to be equal + * Since we might have interrupts disabled - poll for it + */ + if (aliveness_req != hw->aliveness) + if (mei_txe_aliveness_poll(dev, aliveness_req) < 0) { + dev_err(dev->dev, "wait for aliveness settle failed ... bailing out\n"); + return -EIO; + } + + /* + * If Aliveness Request and Aliveness Response are set then clear them + */ + if (aliveness_req) { + mei_txe_aliveness_set(dev, 0); + if (mei_txe_aliveness_poll(dev, 0) < 0) { + dev_err(dev->dev, "wait for aliveness failed ... bailing out\n"); + return -EIO; + } + } + + /* + * Set readiness RDY_CLR bit + */ + mei_txe_readiness_clear(dev); + + return 0; +} + +/** + * mei_txe_hw_start - start the hardware after reset + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_txe_hw_start(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + int ret; + + u32 hisr; + + /* bring back interrupts */ + mei_txe_intr_enable(dev); + + ret = mei_txe_readiness_wait(dev); + if (ret < 0) { + dev_err(dev->dev, "waiting for readiness failed\n"); + return ret; + } + + /* + * If HISR.INT2_STS interrupt status bit is set then clear it. + */ + hisr = mei_txe_br_reg_read(hw, HISR_REG); + if (hisr & HISR_INT_2_STS) + mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_2_STS); + + /* Clear the interrupt cause of OutputDoorbell */ + clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause); + + ret = mei_txe_aliveness_set_sync(dev, 1); + if (ret < 0) { + dev_err(dev->dev, "wait for aliveness failed ... bailing out\n"); + return ret; + } + + /* enable input ready interrupts: + * SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK + */ + mei_txe_input_ready_interrupt_enable(dev); + + + /* Set the SICR_SEC_IPC_OUTPUT_STATUS.IPC_OUTPUT_READY bit */ + mei_txe_output_ready_set(hw); + + /* Set bit SICR_HOST_IPC_READINESS.HOST_RDY + */ + mei_txe_readiness_set_host_rdy(dev); + + return 0; +} + +/** + * mei_txe_check_and_ack_intrs - translate multi BAR interrupt into + * single bit mask and acknowledge the interrupts + * + * @dev: the device structure + * @do_ack: acknowledge interrupts + * + * Return: true if found interrupts to process. + */ +static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 hisr; + u32 hhisr; + u32 ipc_isr; + u32 aliveness; + bool generated; + + /* read interrupt registers */ + hhisr = mei_txe_br_reg_read(hw, HHISR_REG); + generated = (hhisr & IPC_HHIER_MSK); + if (!generated) + goto out; + + hisr = mei_txe_br_reg_read(hw, HISR_REG); + + aliveness = mei_txe_aliveness_get(dev); + if (hhisr & IPC_HHIER_SEC && aliveness) + ipc_isr = mei_txe_sec_reg_read_silent(hw, + SEC_IPC_HOST_INT_STATUS_REG); + else + ipc_isr = 0; + + generated = generated || + (hisr & HISR_INT_STS_MSK) || + (ipc_isr & SEC_IPC_HOST_INT_STATUS_PENDING); + + if (generated && do_ack) { + /* Save the interrupt causes */ + hw->intr_cause |= hisr & HISR_INT_STS_MSK; + if (ipc_isr & SEC_IPC_HOST_INT_STATUS_IN_RDY) + hw->intr_cause |= TXE_INTR_IN_READY; + + + mei_txe_intr_disable(dev); + /* Clear the interrupts in hierarchy: + * IPC and Bridge, than the High Level */ + mei_txe_sec_reg_write_silent(hw, + SEC_IPC_HOST_INT_STATUS_REG, ipc_isr); + mei_txe_br_reg_write(hw, HISR_REG, hisr); + mei_txe_br_reg_write(hw, HHISR_REG, hhisr); + } + +out: + return generated; +} + +/** + * mei_txe_irq_quick_handler - The ISR of the MEI device + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: IRQ_WAKE_THREAD if interrupt is designed for the device + * IRQ_NONE otherwise + */ +irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id) +{ + struct mei_device *dev = dev_id; + + if (mei_txe_check_and_ack_intrs(dev, true)) + return IRQ_WAKE_THREAD; + return IRQ_NONE; +} + + +/** + * mei_txe_irq_thread_handler - txe interrupt thread + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: IRQ_HANDLED + */ +irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id) +{ + struct mei_device *dev = (struct mei_device *) dev_id; + struct mei_txe_hw *hw = to_txe_hw(dev); + struct mei_cl_cb complete_list; + s32 slots; + int rets = 0; + + dev_dbg(dev->dev, "irq thread: Interrupt Registers HHISR|HISR|SEC=%02X|%04X|%02X\n", + mei_txe_br_reg_read(hw, HHISR_REG), + mei_txe_br_reg_read(hw, HISR_REG), + mei_txe_sec_reg_read_silent(hw, SEC_IPC_HOST_INT_STATUS_REG)); + + + /* initialize our complete list */ + mutex_lock(&dev->device_lock); + mei_io_list_init(&complete_list); + + if (pci_dev_msi_enabled(to_pci_dev(dev->dev))) + mei_txe_check_and_ack_intrs(dev, true); + + /* show irq events */ + mei_txe_pending_interrupts(dev); + + hw->aliveness = mei_txe_aliveness_get(dev); + hw->readiness = mei_txe_readiness_get(dev); + + /* Readiness: + * Detection of TXE driver going through reset + * or TXE driver resetting the HECI interface. + */ + if (test_and_clear_bit(TXE_INTR_READINESS_BIT, &hw->intr_cause)) { + dev_dbg(dev->dev, "Readiness Interrupt was received...\n"); + + /* Check if SeC is going through reset */ + if (mei_txe_readiness_is_sec_rdy(hw->readiness)) { + dev_dbg(dev->dev, "we need to start the dev.\n"); + dev->recvd_hw_ready = true; + } else { + dev->recvd_hw_ready = false; + if (dev->dev_state != MEI_DEV_RESETTING) { + + dev_warn(dev->dev, "FW not ready: resetting.\n"); + schedule_work(&dev->reset_work); + goto end; + + } + } + wake_up(&dev->wait_hw_ready); + } + + /************************************************************/ + /* Check interrupt cause: + * Aliveness: Detection of SeC acknowledge of host request that + * it remain alive or host cancellation of that request. + */ + + if (test_and_clear_bit(TXE_INTR_ALIVENESS_BIT, &hw->intr_cause)) { + /* Clear the interrupt cause */ + dev_dbg(dev->dev, + "Aliveness Interrupt: Status: %d\n", hw->aliveness); + dev->pg_event = MEI_PG_EVENT_RECEIVED; + if (waitqueue_active(&hw->wait_aliveness_resp)) + wake_up(&hw->wait_aliveness_resp); + } + + + /* Output Doorbell: + * Detection of SeC having sent output to host + */ + slots = mei_count_full_read_slots(dev); + if (test_and_clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause)) { + /* Read from TXE */ + rets = mei_irq_read_handler(dev, &complete_list, &slots); + if (rets && dev->dev_state != MEI_DEV_RESETTING) { + dev_err(dev->dev, + "mei_irq_read_handler ret = %d.\n", rets); + + schedule_work(&dev->reset_work); + goto end; + } + } + /* Input Ready: Detection if host can write to SeC */ + if (test_and_clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause)) { + dev->hbuf_is_ready = true; + hw->slots = dev->hbuf_depth; + } + + if (hw->aliveness && dev->hbuf_is_ready) { + /* get the real register value */ + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + rets = mei_irq_write_handler(dev, &complete_list); + if (rets && rets != -EMSGSIZE) + dev_err(dev->dev, "mei_irq_write_handler ret = %d.\n", + rets); + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + } + + mei_irq_compl_handler(dev, &complete_list); + +end: + dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets); + + mutex_unlock(&dev->device_lock); + + mei_enable_interrupts(dev); + return IRQ_HANDLED; +} + +static const struct mei_hw_ops mei_txe_hw_ops = { + + .host_is_ready = mei_txe_host_is_ready, + + .fw_status = mei_txe_fw_status, + .pg_state = mei_txe_pg_state, + + .hw_is_ready = mei_txe_hw_is_ready, + .hw_reset = mei_txe_hw_reset, + .hw_config = mei_txe_hw_config, + .hw_start = mei_txe_hw_start, + + .pg_in_transition = mei_txe_pg_in_transition, + .pg_is_enabled = mei_txe_pg_is_enabled, + + .intr_clear = mei_txe_intr_clear, + .intr_enable = mei_txe_intr_enable, + .intr_disable = mei_txe_intr_disable, + + .hbuf_free_slots = mei_txe_hbuf_empty_slots, + .hbuf_is_ready = mei_txe_is_input_ready, + .hbuf_max_len = mei_txe_hbuf_max_len, + + .write = mei_txe_write, + + .rdbuf_full_slots = mei_txe_count_full_read_slots, + .read_hdr = mei_txe_read_hdr, + + .read = mei_txe_read, + +}; + +/** + * mei_txe_dev_init - allocates and initializes txe hardware specific structure + * + * @pdev: pci device + * + * Return: struct mei_device * on success or NULL + */ +struct mei_device *mei_txe_dev_init(struct pci_dev *pdev) +{ + struct mei_device *dev; + struct mei_txe_hw *hw; + + dev = kzalloc(sizeof(struct mei_device) + + sizeof(struct mei_txe_hw), GFP_KERNEL); + if (!dev) + return NULL; + + mei_device_init(dev, &pdev->dev, &mei_txe_hw_ops); + + hw = to_txe_hw(dev); + + init_waitqueue_head(&hw->wait_aliveness_resp); + + return dev; +} + +/** + * mei_txe_setup_satt2 - SATT2 configuration for DMA support. + * + * @dev: the device structure + * @addr: physical address start of the range + * @range: physical range size + * + * Return: 0 on success an error code otherwise + */ +int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + u32 lo32 = lower_32_bits(addr); + u32 hi32 = upper_32_bits(addr); + u32 ctrl; + + /* SATT is limited to 36 Bits */ + if (hi32 & ~0xF) + return -EINVAL; + + /* SATT has to be 16Byte aligned */ + if (lo32 & 0xF) + return -EINVAL; + + /* SATT range has to be 4Bytes aligned */ + if (range & 0x4) + return -EINVAL; + + /* SATT is limited to 32 MB range*/ + if (range > SATT_RANGE_MAX) + return -EINVAL; + + ctrl = SATT2_CTRL_VALID_MSK; + ctrl |= hi32 << SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT; + + mei_txe_br_reg_write(hw, SATT2_SAP_SIZE_REG, range); + mei_txe_br_reg_write(hw, SATT2_BRG_BA_LSB_REG, lo32); + mei_txe_br_reg_write(hw, SATT2_CTRL_REG, ctrl); + dev_dbg(dev->dev, "SATT2: SAP_SIZE_OFFSET=0x%08X, BRG_BA_LSB_OFFSET=0x%08X, CTRL_OFFSET=0x%08X\n", + range, lo32, ctrl); + + return 0; +} diff --git a/kernel/drivers/misc/mei/hw-txe.h b/kernel/drivers/misc/mei/hw-txe.h new file mode 100644 index 000000000..ce3ed0b88 --- /dev/null +++ b/kernel/drivers/misc/mei/hw-txe.h @@ -0,0 +1,75 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_HW_TXE_H_ +#define _MEI_HW_TXE_H_ + +#include <linux/irqreturn.h> + +#include "hw.h" +#include "hw-txe-regs.h" + +#define MEI_TXI_RPM_TIMEOUT 500 /* ms */ + +/* Flatten Hierarchy interrupt cause */ +#define TXE_INTR_READINESS_BIT 0 /* HISR_INT_0_STS */ +#define TXE_INTR_READINESS HISR_INT_0_STS +#define TXE_INTR_ALIVENESS_BIT 1 /* HISR_INT_1_STS */ +#define TXE_INTR_ALIVENESS HISR_INT_1_STS +#define TXE_INTR_OUT_DB_BIT 2 /* HISR_INT_2_STS */ +#define TXE_INTR_OUT_DB HISR_INT_2_STS +#define TXE_INTR_IN_READY_BIT 8 /* beyond HISR */ +#define TXE_INTR_IN_READY BIT(8) + +/** + * struct mei_txe_hw - txe hardware specifics + * + * @mem_addr: SeC and BRIDGE bars + * @aliveness: aliveness (power gating) state of the hardware + * @readiness: readiness state of the hardware + * @slots: number of empty slots + * @wait_aliveness_resp: aliveness wait queue + * @intr_cause: translated interrupt cause + */ +struct mei_txe_hw { + void __iomem *mem_addr[NUM_OF_MEM_BARS]; + u32 aliveness; + u32 readiness; + u32 slots; + + wait_queue_head_t wait_aliveness_resp; + + unsigned long intr_cause; +}; + +#define to_txe_hw(dev) (struct mei_txe_hw *)((dev)->hw) + +static inline struct mei_device *hw_txe_to_mei(struct mei_txe_hw *hw) +{ + return container_of((void *)hw, struct mei_device, hw); +} + +struct mei_device *mei_txe_dev_init(struct pci_dev *pdev); + +irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id); +irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id); + +int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req); + +int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range); + + +#endif /* _MEI_HW_TXE_H_ */ diff --git a/kernel/drivers/misc/mei/hw.h b/kernel/drivers/misc/mei/hw.h new file mode 100644 index 000000000..16fef6dc4 --- /dev/null +++ b/kernel/drivers/misc/mei/hw.h @@ -0,0 +1,302 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_HW_TYPES_H_ +#define _MEI_HW_TYPES_H_ + +#include <linux/uuid.h> + +/* + * Timeouts in Seconds + */ +#define MEI_HW_READY_TIMEOUT 2 /* Timeout on ready message */ +#define MEI_CONNECT_TIMEOUT 3 /* HPS: at least 2 seconds */ + +#define MEI_CL_CONNECT_TIMEOUT 15 /* HPS: Client Connect Timeout */ +#define MEI_CLIENTS_INIT_TIMEOUT 15 /* HPS: Clients Enumeration Timeout */ + +#define MEI_IAMTHIF_STALL_TIMER 12 /* HPS */ +#define MEI_IAMTHIF_READ_TIMER 10 /* HPS */ + +#define MEI_PGI_TIMEOUT 1 /* PG Isolation time response 1 sec */ +#define MEI_HBM_TIMEOUT 1 /* 1 second */ + +/* + * MEI Version + */ +#define HBM_MINOR_VERSION 1 +#define HBM_MAJOR_VERSION 1 + +/* + * MEI version with PGI support + */ +#define HBM_MINOR_VERSION_PGI 1 +#define HBM_MAJOR_VERSION_PGI 1 + +/* Host bus message command opcode */ +#define MEI_HBM_CMD_OP_MSK 0x7f +/* Host bus message command RESPONSE */ +#define MEI_HBM_CMD_RES_MSK 0x80 + +/* + * MEI Bus Message Command IDs + */ +#define HOST_START_REQ_CMD 0x01 +#define HOST_START_RES_CMD 0x81 + +#define HOST_STOP_REQ_CMD 0x02 +#define HOST_STOP_RES_CMD 0x82 + +#define ME_STOP_REQ_CMD 0x03 + +#define HOST_ENUM_REQ_CMD 0x04 +#define HOST_ENUM_RES_CMD 0x84 + +#define HOST_CLIENT_PROPERTIES_REQ_CMD 0x05 +#define HOST_CLIENT_PROPERTIES_RES_CMD 0x85 + +#define CLIENT_CONNECT_REQ_CMD 0x06 +#define CLIENT_CONNECT_RES_CMD 0x86 + +#define CLIENT_DISCONNECT_REQ_CMD 0x07 +#define CLIENT_DISCONNECT_RES_CMD 0x87 + +#define MEI_FLOW_CONTROL_CMD 0x08 + +#define MEI_PG_ISOLATION_ENTRY_REQ_CMD 0x0a +#define MEI_PG_ISOLATION_ENTRY_RES_CMD 0x8a +#define MEI_PG_ISOLATION_EXIT_REQ_CMD 0x0b +#define MEI_PG_ISOLATION_EXIT_RES_CMD 0x8b + +/* + * MEI Stop Reason + * used by hbm_host_stop_request.reason + */ +enum mei_stop_reason_types { + DRIVER_STOP_REQUEST = 0x00, + DEVICE_D1_ENTRY = 0x01, + DEVICE_D2_ENTRY = 0x02, + DEVICE_D3_ENTRY = 0x03, + SYSTEM_S1_ENTRY = 0x04, + SYSTEM_S2_ENTRY = 0x05, + SYSTEM_S3_ENTRY = 0x06, + SYSTEM_S4_ENTRY = 0x07, + SYSTEM_S5_ENTRY = 0x08 +}; + + +/** + * enum mei_hbm_status - mei host bus messages return values + * + * @MEI_HBMS_SUCCESS : status success + * @MEI_HBMS_CLIENT_NOT_FOUND : client not found + * @MEI_HBMS_ALREADY_EXISTS : connection already established + * @MEI_HBMS_REJECTED : connection is rejected + * @MEI_HBMS_INVALID_PARAMETER : invalid parameter + * @MEI_HBMS_NOT_ALLOWED : operation not allowed + * @MEI_HBMS_ALREADY_STARTED : system is already started + * @MEI_HBMS_NOT_STARTED : system not started + * + * @MEI_HBMS_MAX : sentinel + */ +enum mei_hbm_status { + MEI_HBMS_SUCCESS = 0, + MEI_HBMS_CLIENT_NOT_FOUND = 1, + MEI_HBMS_ALREADY_EXISTS = 2, + MEI_HBMS_REJECTED = 3, + MEI_HBMS_INVALID_PARAMETER = 4, + MEI_HBMS_NOT_ALLOWED = 5, + MEI_HBMS_ALREADY_STARTED = 6, + MEI_HBMS_NOT_STARTED = 7, + + MEI_HBMS_MAX +}; + + +/* + * Client Connect Status + * used by hbm_client_connect_response.status + */ +enum mei_cl_connect_status { + MEI_CL_CONN_SUCCESS = MEI_HBMS_SUCCESS, + MEI_CL_CONN_NOT_FOUND = MEI_HBMS_CLIENT_NOT_FOUND, + MEI_CL_CONN_ALREADY_STARTED = MEI_HBMS_ALREADY_EXISTS, + MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED, + MEI_CL_CONN_MESSAGE_SMALL = MEI_HBMS_INVALID_PARAMETER, +}; + +/* + * Client Disconnect Status + */ +enum mei_cl_disconnect_status { + MEI_CL_DISCONN_SUCCESS = MEI_HBMS_SUCCESS +}; + +/* + * MEI BUS Interface Section + */ +struct mei_msg_hdr { + u32 me_addr:8; + u32 host_addr:8; + u32 length:9; + u32 reserved:5; + u32 internal:1; + u32 msg_complete:1; +} __packed; + + +struct mei_bus_message { + u8 hbm_cmd; + u8 data[0]; +} __packed; + +/** + * struct hbm_cl_cmd - client specific host bus command + * CONNECT, DISCONNECT, and FlOW CONTROL + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @data: generic data + */ +struct mei_hbm_cl_cmd { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 data; +}; + +struct hbm_version { + u8 minor_version; + u8 major_version; +} __packed; + +struct hbm_host_version_request { + u8 hbm_cmd; + u8 reserved; + struct hbm_version host_version; +} __packed; + +struct hbm_host_version_response { + u8 hbm_cmd; + u8 host_version_supported; + struct hbm_version me_max_version; +} __packed; + +struct hbm_host_stop_request { + u8 hbm_cmd; + u8 reason; + u8 reserved[2]; +} __packed; + +struct hbm_host_stop_response { + u8 hbm_cmd; + u8 reserved[3]; +} __packed; + +struct hbm_me_stop_request { + u8 hbm_cmd; + u8 reason; + u8 reserved[2]; +} __packed; + +struct hbm_host_enum_request { + u8 hbm_cmd; + u8 reserved[3]; +} __packed; + +struct hbm_host_enum_response { + u8 hbm_cmd; + u8 reserved[3]; + u8 valid_addresses[32]; +} __packed; + +struct mei_client_properties { + uuid_le protocol_name; + u8 protocol_version; + u8 max_number_of_connections; + u8 fixed_address; + u8 single_recv_buf; + u32 max_msg_length; +} __packed; + +struct hbm_props_request { + u8 hbm_cmd; + u8 me_addr; + u8 reserved[2]; +} __packed; + +struct hbm_props_response { + u8 hbm_cmd; + u8 me_addr; + u8 status; + u8 reserved[1]; + struct mei_client_properties client_properties; +} __packed; + +/** + * struct hbm_power_gate - power gate request/response + * + * @hbm_cmd: bus message command header + * @reserved: reserved + */ +struct hbm_power_gate { + u8 hbm_cmd; + u8 reserved[3]; +} __packed; + +/** + * struct hbm_client_connect_request - connect/disconnect request + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @reserved: reserved + */ +struct hbm_client_connect_request { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 reserved; +} __packed; + +/** + * struct hbm_client_connect_response - connect/disconnect response + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @status: status of the request + */ +struct hbm_client_connect_response { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 status; +} __packed; + + +#define MEI_FC_MESSAGE_RESERVED_LENGTH 5 + +struct hbm_flow_control { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH]; +} __packed; + + +#endif diff --git a/kernel/drivers/misc/mei/init.c b/kernel/drivers/misc/mei/init.c new file mode 100644 index 000000000..97353cf8d --- /dev/null +++ b/kernel/drivers/misc/mei/init.c @@ -0,0 +1,428 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/delay.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +const char *mei_dev_state_str(int state) +{ +#define MEI_DEV_STATE(state) case MEI_DEV_##state: return #state + switch (state) { + MEI_DEV_STATE(INITIALIZING); + MEI_DEV_STATE(INIT_CLIENTS); + MEI_DEV_STATE(ENABLED); + MEI_DEV_STATE(RESETTING); + MEI_DEV_STATE(DISABLED); + MEI_DEV_STATE(POWER_DOWN); + MEI_DEV_STATE(POWER_UP); + default: + return "unknown"; + } +#undef MEI_DEV_STATE +} + +const char *mei_pg_state_str(enum mei_pg_state state) +{ +#define MEI_PG_STATE(state) case MEI_PG_##state: return #state + switch (state) { + MEI_PG_STATE(OFF); + MEI_PG_STATE(ON); + default: + return "unknown"; + } +#undef MEI_PG_STATE +} + +/** + * mei_fw_status2str - convert fw status registers to printable string + * + * @fw_status: firmware status + * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ + * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ + * + * Return: number of bytes written or -EINVAL if buffer is to small + */ +ssize_t mei_fw_status2str(struct mei_fw_status *fw_status, + char *buf, size_t len) +{ + ssize_t cnt = 0; + int i; + + buf[0] = '\0'; + + if (len < MEI_FW_STATUS_STR_SZ) + return -EINVAL; + + for (i = 0; i < fw_status->count; i++) + cnt += scnprintf(buf + cnt, len - cnt, "%08X ", + fw_status->status[i]); + + /* drop last space */ + buf[cnt] = '\0'; + return cnt; +} +EXPORT_SYMBOL_GPL(mei_fw_status2str); + +/** + * mei_cancel_work - Cancel mei background jobs + * + * @dev: the device structure + */ +void mei_cancel_work(struct mei_device *dev) +{ + cancel_work_sync(&dev->init_work); + cancel_work_sync(&dev->reset_work); + + cancel_delayed_work(&dev->timer_work); +} +EXPORT_SYMBOL_GPL(mei_cancel_work); + +/** + * mei_reset - resets host and fw. + * + * @dev: the device structure + * + * Return: 0 on success or < 0 if the reset hasn't succeeded + */ +int mei_reset(struct mei_device *dev) +{ + enum mei_dev_state state = dev->dev_state; + bool interrupts_enabled; + int ret; + + if (state != MEI_DEV_INITIALIZING && + state != MEI_DEV_DISABLED && + state != MEI_DEV_POWER_DOWN && + state != MEI_DEV_POWER_UP) { + char fw_sts_str[MEI_FW_STATUS_STR_SZ]; + + mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ); + dev_warn(dev->dev, "unexpected reset: dev_state = %s fw status = %s\n", + mei_dev_state_str(state), fw_sts_str); + } + + /* we're already in reset, cancel the init timer + * if the reset was called due the hbm protocol error + * we need to call it before hw start + * so the hbm watchdog won't kick in + */ + mei_hbm_idle(dev); + + /* enter reset flow */ + interrupts_enabled = state != MEI_DEV_POWER_DOWN; + dev->dev_state = MEI_DEV_RESETTING; + + dev->reset_count++; + if (dev->reset_count > MEI_MAX_CONSEC_RESET) { + dev_err(dev->dev, "reset: reached maximal consecutive resets: disabling the device\n"); + dev->dev_state = MEI_DEV_DISABLED; + return -ENODEV; + } + + ret = mei_hw_reset(dev, interrupts_enabled); + /* fall through and remove the sw state even if hw reset has failed */ + + /* no need to clean up software state in case of power up */ + if (state != MEI_DEV_INITIALIZING && + state != MEI_DEV_POWER_UP) { + + /* remove all waiting requests */ + mei_cl_all_write_clear(dev); + + mei_cl_all_disconnect(dev); + + /* wake up all readers and writers so they can be interrupted */ + mei_cl_all_wakeup(dev); + + /* remove entry if already in list */ + dev_dbg(dev->dev, "remove iamthif and wd from the file list.\n"); + mei_cl_unlink(&dev->wd_cl); + mei_cl_unlink(&dev->iamthif_cl); + mei_amthif_reset_params(dev); + } + + mei_hbm_reset(dev); + + dev->rd_msg_hdr = 0; + dev->wd_pending = false; + + if (ret) { + dev_err(dev->dev, "hw_reset failed ret = %d\n", ret); + return ret; + } + + if (state == MEI_DEV_POWER_DOWN) { + dev_dbg(dev->dev, "powering down: end of reset\n"); + dev->dev_state = MEI_DEV_DISABLED; + return 0; + } + + ret = mei_hw_start(dev); + if (ret) { + dev_err(dev->dev, "hw_start failed ret = %d\n", ret); + return ret; + } + + dev_dbg(dev->dev, "link is established start sending messages.\n"); + + dev->dev_state = MEI_DEV_INIT_CLIENTS; + ret = mei_hbm_start_req(dev); + if (ret) { + dev_err(dev->dev, "hbm_start failed ret = %d\n", ret); + dev->dev_state = MEI_DEV_RESETTING; + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mei_reset); + +/** + * mei_start - initializes host and fw to start work. + * + * @dev: the device structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_start(struct mei_device *dev) +{ + int ret; + + mutex_lock(&dev->device_lock); + + /* acknowledge interrupt and stop interrupts */ + mei_clear_interrupts(dev); + + mei_hw_config(dev); + + dev_dbg(dev->dev, "reset in start the mei device.\n"); + + dev->reset_count = 0; + do { + dev->dev_state = MEI_DEV_INITIALIZING; + ret = mei_reset(dev); + + if (ret == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "reset failed ret = %d", ret); + goto err; + } + } while (ret); + + /* we cannot start the device w/o hbm start message completed */ + if (dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "reset failed"); + goto err; + } + + if (mei_hbm_start_wait(dev)) { + dev_err(dev->dev, "HBM haven't started"); + goto err; + } + + if (!mei_host_is_ready(dev)) { + dev_err(dev->dev, "host is not ready.\n"); + goto err; + } + + if (!mei_hw_is_ready(dev)) { + dev_err(dev->dev, "ME is not ready.\n"); + goto err; + } + + if (!mei_hbm_version_is_supported(dev)) { + dev_dbg(dev->dev, "MEI start failed.\n"); + goto err; + } + + dev_dbg(dev->dev, "link layer has been established.\n"); + + mutex_unlock(&dev->device_lock); + return 0; +err: + dev_err(dev->dev, "link layer initialization failed.\n"); + dev->dev_state = MEI_DEV_DISABLED; + mutex_unlock(&dev->device_lock); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(mei_start); + +/** + * mei_restart - restart device after suspend + * + * @dev: the device structure + * + * Return: 0 on success or -ENODEV if the restart hasn't succeeded + */ +int mei_restart(struct mei_device *dev) +{ + int err; + + mutex_lock(&dev->device_lock); + + mei_clear_interrupts(dev); + + dev->dev_state = MEI_DEV_POWER_UP; + dev->reset_count = 0; + + err = mei_reset(dev); + + mutex_unlock(&dev->device_lock); + + if (err == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "device disabled = %d\n", err); + return -ENODEV; + } + + /* try to start again */ + if (err) + schedule_work(&dev->reset_work); + + + return 0; +} +EXPORT_SYMBOL_GPL(mei_restart); + +static void mei_reset_work(struct work_struct *work) +{ + struct mei_device *dev = + container_of(work, struct mei_device, reset_work); + int ret; + + mutex_lock(&dev->device_lock); + + ret = mei_reset(dev); + + mutex_unlock(&dev->device_lock); + + if (dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "device disabled = %d\n", ret); + return; + } + + /* retry reset in case of failure */ + if (ret) + schedule_work(&dev->reset_work); +} + +void mei_stop(struct mei_device *dev) +{ + dev_dbg(dev->dev, "stopping the device.\n"); + + mei_cancel_work(dev); + + mei_nfc_host_exit(dev); + + mei_cl_bus_remove_devices(dev); + + mutex_lock(&dev->device_lock); + + mei_wd_stop(dev); + + dev->dev_state = MEI_DEV_POWER_DOWN; + mei_reset(dev); + /* move device to disabled state unconditionally */ + dev->dev_state = MEI_DEV_DISABLED; + + mutex_unlock(&dev->device_lock); + + mei_watchdog_unregister(dev); +} +EXPORT_SYMBOL_GPL(mei_stop); + +/** + * mei_write_is_idle - check if the write queues are idle + * + * @dev: the device structure + * + * Return: true of there is no pending write + */ +bool mei_write_is_idle(struct mei_device *dev) +{ + bool idle = (dev->dev_state == MEI_DEV_ENABLED && + list_empty(&dev->ctrl_wr_list.list) && + list_empty(&dev->write_list.list)); + + dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%d write=%d\n", + idle, + mei_dev_state_str(dev->dev_state), + list_empty(&dev->ctrl_wr_list.list), + list_empty(&dev->write_list.list)); + + return idle; +} +EXPORT_SYMBOL_GPL(mei_write_is_idle); + +/** + * mei_device_init -- initialize mei_device structure + * + * @dev: the mei device + * @device: the device structure + * @hw_ops: hw operations + */ +void mei_device_init(struct mei_device *dev, + struct device *device, + const struct mei_hw_ops *hw_ops) +{ + /* setup our list array */ + INIT_LIST_HEAD(&dev->file_list); + INIT_LIST_HEAD(&dev->device_list); + INIT_LIST_HEAD(&dev->me_clients); + mutex_init(&dev->device_lock); + init_rwsem(&dev->me_clients_rwsem); + init_waitqueue_head(&dev->wait_hw_ready); + init_waitqueue_head(&dev->wait_pg); + init_waitqueue_head(&dev->wait_hbm_start); + init_waitqueue_head(&dev->wait_stop_wd); + dev->dev_state = MEI_DEV_INITIALIZING; + dev->reset_count = 0; + + mei_io_list_init(&dev->write_list); + mei_io_list_init(&dev->write_waiting_list); + mei_io_list_init(&dev->ctrl_wr_list); + mei_io_list_init(&dev->ctrl_rd_list); + + INIT_DELAYED_WORK(&dev->timer_work, mei_timer); + INIT_WORK(&dev->init_work, mei_host_client_init); + INIT_WORK(&dev->reset_work, mei_reset_work); + + INIT_LIST_HEAD(&dev->wd_cl.link); + INIT_LIST_HEAD(&dev->iamthif_cl.link); + mei_io_list_init(&dev->amthif_cmd_list); + mei_io_list_init(&dev->amthif_rd_complete_list); + + bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX); + dev->open_handle_count = 0; + + /* + * Reserving the first client ID + * 0: Reserved for MEI Bus Message communications + */ + bitmap_set(dev->host_clients_map, 0, 1); + + dev->pg_event = MEI_PG_EVENT_IDLE; + dev->ops = hw_ops; + dev->dev = device; +} +EXPORT_SYMBOL_GPL(mei_device_init); + diff --git a/kernel/drivers/misc/mei/interrupt.c b/kernel/drivers/misc/mei/interrupt.c new file mode 100644 index 000000000..3f84d2edc --- /dev/null +++ b/kernel/drivers/misc/mei/interrupt.c @@ -0,0 +1,610 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + + +#include <linux/export.h> +#include <linux/kthread.h> +#include <linux/interrupt.h> +#include <linux/fs.h> +#include <linux/jiffies.h> +#include <linux/slab.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + + +/** + * mei_irq_compl_handler - dispatch complete handlers + * for the completed callbacks + * + * @dev: mei device + * @compl_list: list of completed cbs + */ +void mei_irq_compl_handler(struct mei_device *dev, struct mei_cl_cb *compl_list) +{ + struct mei_cl_cb *cb, *next; + struct mei_cl *cl; + + list_for_each_entry_safe(cb, next, &compl_list->list, list) { + cl = cb->cl; + list_del_init(&cb->list); + + dev_dbg(dev->dev, "completing call back.\n"); + if (cl == &dev->iamthif_cl) + mei_amthif_complete(dev, cb); + else + mei_cl_complete(cl, cb); + } +} +EXPORT_SYMBOL_GPL(mei_irq_compl_handler); + +/** + * mei_cl_hbm_equal - check if hbm is addressed to the client + * + * @cl: host client + * @mei_hdr: header of mei client message + * + * Return: true if matches, false otherwise + */ +static inline int mei_cl_hbm_equal(struct mei_cl *cl, + struct mei_msg_hdr *mei_hdr) +{ + return cl->host_client_id == mei_hdr->host_addr && + cl->me_client_id == mei_hdr->me_addr; +} + +/** + * mei_irq_discard_msg - discard received message + * + * @dev: mei device + * @hdr: message header + */ +static inline +void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr) +{ + /* + * no need to check for size as it is guarantied + * that length fits into rd_msg_buf + */ + mei_read_slots(dev, dev->rd_msg_buf, hdr->length); + dev_dbg(dev->dev, "discarding message " MEI_HDR_FMT "\n", + MEI_HDR_PRM(hdr)); +} + +/** + * mei_cl_irq_read_msg - process client message + * + * @cl: reading client + * @mei_hdr: header of mei client message + * @complete_list: completion list + * + * Return: always 0 + */ +int mei_cl_irq_read_msg(struct mei_cl *cl, + struct mei_msg_hdr *mei_hdr, + struct mei_cl_cb *complete_list) +{ + struct mei_device *dev = cl->dev; + struct mei_cl_cb *cb; + unsigned char *buffer = NULL; + + cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list); + if (!cb) { + cl_err(dev, cl, "pending read cb not found\n"); + goto out; + } + + if (!mei_cl_is_connected(cl)) { + cl_dbg(dev, cl, "not connected\n"); + cb->status = -ENODEV; + goto out; + } + + if (cb->buf.size == 0 || cb->buf.data == NULL) { + cl_err(dev, cl, "response buffer is not allocated.\n"); + list_move_tail(&cb->list, &complete_list->list); + cb->status = -ENOMEM; + goto out; + } + + if (cb->buf.size < mei_hdr->length + cb->buf_idx) { + cl_dbg(dev, cl, "message overflow. size %d len %d idx %ld\n", + cb->buf.size, mei_hdr->length, cb->buf_idx); + buffer = krealloc(cb->buf.data, mei_hdr->length + cb->buf_idx, + GFP_KERNEL); + + if (!buffer) { + cb->status = -ENOMEM; + list_move_tail(&cb->list, &complete_list->list); + goto out; + } + cb->buf.data = buffer; + cb->buf.size = mei_hdr->length + cb->buf_idx; + } + + buffer = cb->buf.data + cb->buf_idx; + mei_read_slots(dev, buffer, mei_hdr->length); + + cb->buf_idx += mei_hdr->length; + + if (mei_hdr->msg_complete) { + cb->read_time = jiffies; + cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx); + list_move_tail(&cb->list, &complete_list->list); + } + +out: + if (!buffer) + mei_irq_discard_msg(dev, mei_hdr); + + return 0; +} + +/** + * mei_cl_irq_disconnect_rsp - send disconnection response message + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + slots = mei_hbuf_empty_slots(dev); + msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_response)); + + if (slots < msg_slots) + return -EMSGSIZE; + + ret = mei_hbm_cl_disconnect_rsp(dev, cl); + + cl->state = MEI_FILE_DISCONNECTED; + cl->status = 0; + mei_io_cb_free(cb); + + return ret; +} + + + +/** + * mei_cl_irq_disconnect - processes close related operation from + * interrupt thread context - send disconnect request + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + + msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + + if (slots < msg_slots) + return -EMSGSIZE; + + if (mei_hbm_cl_disconnect_req(dev, cl)) { + cl->status = 0; + cb->buf_idx = 0; + list_move_tail(&cb->list, &cmpl_list->list); + return -EIO; + } + + cl->state = MEI_FILE_DISCONNECTING; + cl->status = 0; + cb->buf_idx = 0; + list_move_tail(&cb->list, &dev->ctrl_rd_list.list); + cl->timer_count = MEI_CONNECT_TIMEOUT; + + return 0; +} + + +/** + * mei_cl_irq_read - processes client read related operation from the + * interrupt thread context - request for flow control credits + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + msg_slots = mei_data2slots(sizeof(struct hbm_flow_control)); + slots = mei_hbuf_empty_slots(dev); + + if (slots < msg_slots) + return -EMSGSIZE; + + ret = mei_hbm_cl_flow_control_req(dev, cl); + if (ret) { + cl->status = ret; + cb->buf_idx = 0; + list_move_tail(&cb->list, &cmpl_list->list); + return ret; + } + + list_move_tail(&cb->list, &cl->rd_pending); + + return 0; +} + + +/** + * mei_cl_irq_connect - send connect request in irq_thread context + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + + if (mei_cl_is_other_connecting(cl)) + return 0; + + if (slots < msg_slots) + return -EMSGSIZE; + + cl->state = MEI_FILE_CONNECTING; + + ret = mei_hbm_cl_connect_req(dev, cl); + if (ret) { + cl->status = ret; + cb->buf_idx = 0; + list_del_init(&cb->list); + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list.list); + cl->timer_count = MEI_CONNECT_TIMEOUT; + return 0; +} + + +/** + * mei_irq_read_handler - bottom half read routine after ISR to + * handle the read processing. + * + * @dev: the device structure + * @cmpl_list: An instance of our list structure + * @slots: slots to read. + * + * Return: 0 on success, <0 on failure. + */ +int mei_irq_read_handler(struct mei_device *dev, + struct mei_cl_cb *cmpl_list, s32 *slots) +{ + struct mei_msg_hdr *mei_hdr; + struct mei_cl *cl; + int ret; + + if (!dev->rd_msg_hdr) { + dev->rd_msg_hdr = mei_read_hdr(dev); + (*slots)--; + dev_dbg(dev->dev, "slots =%08x.\n", *slots); + } + mei_hdr = (struct mei_msg_hdr *) &dev->rd_msg_hdr; + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr)); + + if (mei_hdr->reserved || !dev->rd_msg_hdr) { + dev_err(dev->dev, "corrupted message header 0x%08X\n", + dev->rd_msg_hdr); + ret = -EBADMSG; + goto end; + } + + if (mei_slots2data(*slots) < mei_hdr->length) { + dev_err(dev->dev, "less data available than length=%08x.\n", + *slots); + /* we can't read the message */ + ret = -ENODATA; + goto end; + } + + /* HBM message */ + if (mei_hdr->host_addr == 0 && mei_hdr->me_addr == 0) { + ret = mei_hbm_dispatch(dev, mei_hdr); + if (ret) { + dev_dbg(dev->dev, "mei_hbm_dispatch failed ret = %d\n", + ret); + goto end; + } + goto reset_slots; + } + + /* find recipient cl */ + list_for_each_entry(cl, &dev->file_list, link) { + if (mei_cl_hbm_equal(cl, mei_hdr)) { + cl_dbg(dev, cl, "got a message\n"); + break; + } + } + + /* if no recipient cl was found we assume corrupted header */ + if (&cl->link == &dev->file_list) { + dev_err(dev->dev, "no destination client found 0x%08X\n", + dev->rd_msg_hdr); + ret = -EBADMSG; + goto end; + } + + if (cl == &dev->iamthif_cl) { + ret = mei_amthif_irq_read_msg(cl, mei_hdr, cmpl_list); + } else { + ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list); + } + + +reset_slots: + /* reset the number of slots and header */ + *slots = mei_count_full_read_slots(dev); + dev->rd_msg_hdr = 0; + + if (*slots == -EOVERFLOW) { + /* overflow - reset */ + dev_err(dev->dev, "resetting due to slots overflow.\n"); + /* set the event since message has been read */ + ret = -ERANGE; + goto end; + } +end: + return ret; +} +EXPORT_SYMBOL_GPL(mei_irq_read_handler); + + +/** + * mei_irq_write_handler - dispatch write requests + * after irq received + * + * @dev: the device structure + * @cmpl_list: An instance of our list structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list) +{ + + struct mei_cl *cl; + struct mei_cl_cb *cb, *next; + struct mei_cl_cb *list; + s32 slots; + int ret; + + + if (!mei_hbuf_acquire(dev)) + return 0; + + slots = mei_hbuf_empty_slots(dev); + if (slots <= 0) + return -EMSGSIZE; + + /* complete all waiting for write CB */ + dev_dbg(dev->dev, "complete all waiting for write cb.\n"); + + list = &dev->write_waiting_list; + list_for_each_entry_safe(cb, next, &list->list, list) { + cl = cb->cl; + + cl->status = 0; + cl_dbg(dev, cl, "MEI WRITE COMPLETE\n"); + cl->writing_state = MEI_WRITE_COMPLETE; + list_move_tail(&cb->list, &cmpl_list->list); + } + + if (dev->wd_state == MEI_WD_STOPPING) { + dev->wd_state = MEI_WD_IDLE; + wake_up(&dev->wait_stop_wd); + } + + if (mei_cl_is_connected(&dev->wd_cl)) { + if (dev->wd_pending && + mei_cl_flow_ctrl_creds(&dev->wd_cl) > 0) { + ret = mei_wd_send(dev); + if (ret) + return ret; + dev->wd_pending = false; + } + } + + /* complete control write list CB */ + dev_dbg(dev->dev, "complete control write list cb.\n"); + list_for_each_entry_safe(cb, next, &dev->ctrl_wr_list.list, list) { + cl = cb->cl; + switch (cb->fop_type) { + case MEI_FOP_DISCONNECT: + /* send disconnect message */ + ret = mei_cl_irq_disconnect(cl, cb, cmpl_list); + if (ret) + return ret; + + break; + case MEI_FOP_READ: + /* send flow control message */ + ret = mei_cl_irq_read(cl, cb, cmpl_list); + if (ret) + return ret; + + break; + case MEI_FOP_CONNECT: + /* connect message */ + ret = mei_cl_irq_connect(cl, cb, cmpl_list); + if (ret) + return ret; + + break; + case MEI_FOP_DISCONNECT_RSP: + /* send disconnect resp */ + ret = mei_cl_irq_disconnect_rsp(cl, cb, cmpl_list); + if (ret) + return ret; + break; + default: + BUG(); + } + + } + /* complete write list CB */ + dev_dbg(dev->dev, "complete write list cb.\n"); + list_for_each_entry_safe(cb, next, &dev->write_list.list, list) { + cl = cb->cl; + if (cl == &dev->iamthif_cl) + ret = mei_amthif_irq_write(cl, cb, cmpl_list); + else + ret = mei_cl_irq_write(cl, cb, cmpl_list); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL_GPL(mei_irq_write_handler); + + + +/** + * mei_timer - timer function. + * + * @work: pointer to the work_struct structure + * + */ +void mei_timer(struct work_struct *work) +{ + unsigned long timeout; + struct mei_cl *cl; + + struct mei_device *dev = container_of(work, + struct mei_device, timer_work.work); + + + mutex_lock(&dev->device_lock); + + /* Catch interrupt stalls during HBM init handshake */ + if (dev->dev_state == MEI_DEV_INIT_CLIENTS && + dev->hbm_state != MEI_HBM_IDLE) { + + if (dev->init_clients_timer) { + if (--dev->init_clients_timer == 0) { + dev_err(dev->dev, "timer: init clients timeout hbm_state = %d.\n", + dev->hbm_state); + mei_reset(dev); + goto out; + } + } + } + + if (dev->dev_state != MEI_DEV_ENABLED) + goto out; + + /*** connect/disconnect timeouts ***/ + list_for_each_entry(cl, &dev->file_list, link) { + if (cl->timer_count) { + if (--cl->timer_count == 0) { + dev_err(dev->dev, "timer: connect/disconnect timeout.\n"); + mei_reset(dev); + goto out; + } + } + } + + if (!mei_cl_is_connected(&dev->iamthif_cl)) + goto out; + + if (dev->iamthif_stall_timer) { + if (--dev->iamthif_stall_timer == 0) { + dev_err(dev->dev, "timer: amthif hanged.\n"); + mei_reset(dev); + dev->iamthif_canceled = false; + dev->iamthif_state = MEI_IAMTHIF_IDLE; + dev->iamthif_timer = 0; + + mei_io_cb_free(dev->iamthif_current_cb); + dev->iamthif_current_cb = NULL; + + dev->iamthif_file_object = NULL; + mei_amthif_run_next_cmd(dev); + } + } + + if (dev->iamthif_timer) { + + timeout = dev->iamthif_timer + + mei_secs_to_jiffies(MEI_IAMTHIF_READ_TIMER); + + dev_dbg(dev->dev, "dev->iamthif_timer = %ld\n", + dev->iamthif_timer); + dev_dbg(dev->dev, "timeout = %ld\n", timeout); + dev_dbg(dev->dev, "jiffies = %ld\n", jiffies); + if (time_after(jiffies, timeout)) { + /* + * User didn't read the AMTHI data on time (15sec) + * freeing AMTHI for other requests + */ + + dev_dbg(dev->dev, "freeing AMTHI for other requests\n"); + + mei_io_list_flush(&dev->amthif_rd_complete_list, + &dev->iamthif_cl); + mei_io_cb_free(dev->iamthif_current_cb); + dev->iamthif_current_cb = NULL; + + dev->iamthif_file_object->private_data = NULL; + dev->iamthif_file_object = NULL; + dev->iamthif_timer = 0; + mei_amthif_run_next_cmd(dev); + + } + } +out: + if (dev->dev_state != MEI_DEV_DISABLED) + schedule_delayed_work(&dev->timer_work, 2 * HZ); + mutex_unlock(&dev->device_lock); +} diff --git a/kernel/drivers/misc/mei/main.c b/kernel/drivers/misc/mei/main.c new file mode 100644 index 000000000..3e2968159 --- /dev/null +++ b/kernel/drivers/misc/mei/main.c @@ -0,0 +1,788 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/poll.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/cdev.h> +#include <linux/sched.h> +#include <linux/uuid.h> +#include <linux/compat.h> +#include <linux/jiffies.h> +#include <linux/interrupt.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "client.h" + +/** + * mei_open - the open function + * + * @inode: pointer to inode structure + * @file: pointer to file structure + * + * Return: 0 on success, <0 on error + */ +static int mei_open(struct inode *inode, struct file *file) +{ + struct mei_device *dev; + struct mei_cl *cl; + + int err; + + dev = container_of(inode->i_cdev, struct mei_device, cdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (dev->dev_state != MEI_DEV_ENABLED) { + dev_dbg(dev->dev, "dev_state != MEI_ENABLED dev_state = %s\n", + mei_dev_state_str(dev->dev_state)); + err = -ENODEV; + goto err_unlock; + } + + cl = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + goto err_unlock; + } + + file->private_data = cl; + + mutex_unlock(&dev->device_lock); + + return nonseekable_open(inode, file); + +err_unlock: + mutex_unlock(&dev->device_lock); + return err; +} + +/** + * mei_release - the release function + * + * @inode: pointer to inode structure + * @file: pointer to file structure + * + * Return: 0 on success, <0 on error + */ +static int mei_release(struct inode *inode, struct file *file) +{ + struct mei_cl *cl = file->private_data; + struct mei_device *dev; + int rets = 0; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + if (cl == &dev->iamthif_cl) { + rets = mei_amthif_release(dev, file); + goto out; + } + if (mei_cl_is_connected(cl)) { + cl->state = MEI_FILE_DISCONNECTING; + cl_dbg(dev, cl, "disconnecting\n"); + rets = mei_cl_disconnect(cl); + } + mei_cl_flush_queues(cl, file); + cl_dbg(dev, cl, "removing\n"); + + mei_cl_unlink(cl); + + file->private_data = NULL; + + kfree(cl); +out: + mutex_unlock(&dev->device_lock); + return rets; +} + + +/** + * mei_read - the read function. + * + * @file: pointer to file structure + * @ubuf: pointer to user buffer + * @length: buffer length + * @offset: data offset in buffer + * + * Return: >=0 data length on success , <0 on error + */ +static ssize_t mei_read(struct file *file, char __user *ubuf, + size_t length, loff_t *offset) +{ + struct mei_cl *cl = file->private_data; + struct mei_device *dev; + struct mei_cl_cb *cb = NULL; + int rets; + int err; + + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + + mutex_lock(&dev->device_lock); + if (dev->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + if (length == 0) { + rets = 0; + goto out; + } + + if (cl == &dev->iamthif_cl) { + rets = mei_amthif_read(dev, file, ubuf, length, offset); + goto out; + } + + cb = mei_cl_read_cb(cl, file); + if (cb) { + /* read what left */ + if (cb->buf_idx > *offset) + goto copy_buffer; + /* offset is beyond buf_idx we have no more data return 0 */ + if (cb->buf_idx > 0 && cb->buf_idx <= *offset) { + rets = 0; + goto free; + } + /* Offset needs to be cleaned for contiguous reads*/ + if (cb->buf_idx == 0 && *offset > 0) + *offset = 0; + } else if (*offset > 0) { + *offset = 0; + } + + err = mei_cl_read_start(cl, length, file); + if (err && err != -EBUSY) { + dev_dbg(dev->dev, + "mei start read failure with status = %d\n", err); + rets = err; + goto out; + } + + if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) { + if (file->f_flags & O_NONBLOCK) { + rets = -EAGAIN; + goto out; + } + + mutex_unlock(&dev->device_lock); + + if (wait_event_interruptible(cl->rx_wait, + (!list_empty(&cl->rd_completed)) || + (!mei_cl_is_connected(cl)))) { + + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + + mutex_lock(&dev->device_lock); + if (!mei_cl_is_connected(cl)) { + rets = -EBUSY; + goto out; + } + } + + cb = mei_cl_read_cb(cl, file); + if (!cb) { + rets = 0; + goto out; + } + +copy_buffer: + /* now copy the data to user space */ + if (cb->status) { + rets = cb->status; + dev_dbg(dev->dev, "read operation failed %d\n", rets); + goto free; + } + + dev_dbg(dev->dev, "buf.size = %d buf.idx= %ld\n", + cb->buf.size, cb->buf_idx); + if (length == 0 || ubuf == NULL || *offset > cb->buf_idx) { + rets = -EMSGSIZE; + goto free; + } + + /* length is being truncated to PAGE_SIZE, + * however buf_idx may point beyond that */ + length = min_t(size_t, length, cb->buf_idx - *offset); + + if (copy_to_user(ubuf, cb->buf.data + *offset, length)) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + goto free; + } + + rets = length; + *offset += length; + if ((unsigned long)*offset < cb->buf_idx) + goto out; + +free: + mei_io_cb_free(cb); + +out: + dev_dbg(dev->dev, "end mei read rets= %d\n", rets); + mutex_unlock(&dev->device_lock); + return rets; +} +/** + * mei_write - the write function. + * + * @file: pointer to file structure + * @ubuf: pointer to user buffer + * @length: buffer length + * @offset: data offset in buffer + * + * Return: >=0 data length on success , <0 on error + */ +static ssize_t mei_write(struct file *file, const char __user *ubuf, + size_t length, loff_t *offset) +{ + struct mei_cl *cl = file->private_data; + struct mei_me_client *me_cl = NULL; + struct mei_cl_cb *write_cb = NULL; + struct mei_device *dev; + unsigned long timeout = 0; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + if (dev->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); + if (!me_cl) { + rets = -ENOTTY; + goto out; + } + + if (length == 0) { + rets = 0; + goto out; + } + + if (length > me_cl->props.max_msg_length) { + rets = -EFBIG; + goto out; + } + + if (!mei_cl_is_connected(cl)) { + cl_err(dev, cl, "is not connected"); + rets = -ENODEV; + goto out; + } + if (cl == &dev->iamthif_cl) { + write_cb = mei_amthif_find_read_list_entry(dev, file); + + if (write_cb) { + timeout = write_cb->read_time + + mei_secs_to_jiffies(MEI_IAMTHIF_READ_TIMER); + + if (time_after(jiffies, timeout)) { + *offset = 0; + mei_io_cb_free(write_cb); + write_cb = NULL; + } + } + } + + *offset = 0; + write_cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, file); + if (!write_cb) { + rets = -ENOMEM; + goto out; + } + + rets = copy_from_user(write_cb->buf.data, ubuf, length); + if (rets) { + dev_dbg(dev->dev, "failed to copy data from userland\n"); + rets = -EFAULT; + goto out; + } + + if (cl == &dev->iamthif_cl) { + rets = mei_amthif_write(cl, write_cb); + + if (rets) { + dev_err(dev->dev, + "amthif write failed with status = %d\n", rets); + goto out; + } + mei_me_cl_put(me_cl); + mutex_unlock(&dev->device_lock); + return length; + } + + rets = mei_cl_write(cl, write_cb, false); +out: + mei_me_cl_put(me_cl); + mutex_unlock(&dev->device_lock); + if (rets < 0) + mei_io_cb_free(write_cb); + return rets; +} + +/** + * mei_ioctl_connect_client - the connect to fw client IOCTL function + * + * @file: private data of the file object + * @data: IOCTL connect data, input and output parameters + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on success, <0 on failure. + */ +static int mei_ioctl_connect_client(struct file *file, + struct mei_connect_client_data *data) +{ + struct mei_device *dev; + struct mei_client *client; + struct mei_me_client *me_cl; + struct mei_cl *cl; + int rets; + + cl = file->private_data; + dev = cl->dev; + + if (dev->dev_state != MEI_DEV_ENABLED) + return -ENODEV; + + if (cl->state != MEI_FILE_INITIALIZING && + cl->state != MEI_FILE_DISCONNECTED) + return -EBUSY; + + /* find ME client we're trying to connect to */ + me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid); + if (!me_cl || me_cl->props.fixed_address) { + dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n", + &data->in_client_uuid); + return -ENOTTY; + } + + cl->me_client_id = me_cl->client_id; + cl->cl_uuid = me_cl->props.protocol_name; + + dev_dbg(dev->dev, "Connect to FW Client ID = %d\n", + cl->me_client_id); + dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n", + me_cl->props.protocol_version); + dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n", + me_cl->props.max_msg_length); + + /* if we're connecting to amthif client then we will use the + * existing connection + */ + if (uuid_le_cmp(data->in_client_uuid, mei_amthif_guid) == 0) { + dev_dbg(dev->dev, "FW Client is amthi\n"); + if (!mei_cl_is_connected(&dev->iamthif_cl)) { + rets = -ENODEV; + goto end; + } + mei_cl_unlink(cl); + + kfree(cl); + cl = NULL; + dev->iamthif_open_count++; + file->private_data = &dev->iamthif_cl; + + client = &data->out_client_properties; + client->max_msg_length = me_cl->props.max_msg_length; + client->protocol_version = me_cl->props.protocol_version; + rets = dev->iamthif_cl.status; + + goto end; + } + + /* prepare the output buffer */ + client = &data->out_client_properties; + client->max_msg_length = me_cl->props.max_msg_length; + client->protocol_version = me_cl->props.protocol_version; + dev_dbg(dev->dev, "Can connect?\n"); + + rets = mei_cl_connect(cl, file); + +end: + mei_me_cl_put(me_cl); + return rets; +} + +/** + * mei_ioctl - the IOCTL function + * + * @file: pointer to file structure + * @cmd: ioctl command + * @data: pointer to mei message structure + * + * Return: 0 on success , <0 on error + */ +static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data) +{ + struct mei_device *dev; + struct mei_cl *cl = file->private_data; + struct mei_connect_client_data connect_data; + int rets; + + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + dev_dbg(dev->dev, "IOCTL cmd = 0x%x", cmd); + + mutex_lock(&dev->device_lock); + if (dev->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + switch (cmd) { + case IOCTL_MEI_CONNECT_CLIENT: + dev_dbg(dev->dev, ": IOCTL_MEI_CONNECT_CLIENT.\n"); + if (copy_from_user(&connect_data, (char __user *)data, + sizeof(struct mei_connect_client_data))) { + dev_dbg(dev->dev, "failed to copy data from userland\n"); + rets = -EFAULT; + goto out; + } + + rets = mei_ioctl_connect_client(file, &connect_data); + if (rets) + goto out; + + /* if all is ok, copying the data back to user. */ + if (copy_to_user((char __user *)data, &connect_data, + sizeof(struct mei_connect_client_data))) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + goto out; + } + + break; + + default: + dev_err(dev->dev, ": unsupported ioctl %d.\n", cmd); + rets = -ENOIOCTLCMD; + } + +out: + mutex_unlock(&dev->device_lock); + return rets; +} + +/** + * mei_compat_ioctl - the compat IOCTL function + * + * @file: pointer to file structure + * @cmd: ioctl command + * @data: pointer to mei message structure + * + * Return: 0 on success , <0 on error + */ +#ifdef CONFIG_COMPAT +static long mei_compat_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + return mei_ioctl(file, cmd, (unsigned long)compat_ptr(data)); +} +#endif + + +/** + * mei_poll - the poll function + * + * @file: pointer to file structure + * @wait: pointer to poll_table structure + * + * Return: poll mask + */ +static unsigned int mei_poll(struct file *file, poll_table *wait) +{ + unsigned long req_events = poll_requested_events(wait); + struct mei_cl *cl = file->private_data; + struct mei_device *dev; + unsigned int mask = 0; + + if (WARN_ON(!cl || !cl->dev)) + return POLLERR; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + + if (dev->dev_state != MEI_DEV_ENABLED || + !mei_cl_is_connected(cl)) { + mask = POLLERR; + goto out; + } + + if (cl == &dev->iamthif_cl) { + mask = mei_amthif_poll(dev, file, wait); + goto out; + } + + if (req_events & (POLLIN | POLLRDNORM)) { + poll_wait(file, &cl->rx_wait, wait); + + if (!list_empty(&cl->rd_completed)) + mask |= POLLIN | POLLRDNORM; + else + mei_cl_read_start(cl, 0, file); + } + +out: + mutex_unlock(&dev->device_lock); + return mask; +} + +/** + * fw_status_show - mei device attribute show method + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t fw_status_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_fw_status fw_status; + int err, i; + ssize_t cnt = 0; + + mutex_lock(&dev->device_lock); + err = mei_fw_status(dev, &fw_status); + mutex_unlock(&dev->device_lock); + if (err) { + dev_err(device, "read fw_status error = %d\n", err); + return err; + } + + for (i = 0; i < fw_status.count; i++) + cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%08X\n", + fw_status.status[i]); + return cnt; +} +static DEVICE_ATTR_RO(fw_status); + +static struct attribute *mei_attrs[] = { + &dev_attr_fw_status.attr, + NULL +}; +ATTRIBUTE_GROUPS(mei); + +/* + * file operations structure will be used for mei char device. + */ +static const struct file_operations mei_fops = { + .owner = THIS_MODULE, + .read = mei_read, + .unlocked_ioctl = mei_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = mei_compat_ioctl, +#endif + .open = mei_open, + .release = mei_release, + .write = mei_write, + .poll = mei_poll, + .llseek = no_llseek +}; + +static struct class *mei_class; +static dev_t mei_devt; +#define MEI_MAX_DEVS MINORMASK +static DEFINE_MUTEX(mei_minor_lock); +static DEFINE_IDR(mei_idr); + +/** + * mei_minor_get - obtain next free device minor number + * + * @dev: device pointer + * + * Return: allocated minor, or -ENOSPC if no free minor left + */ +static int mei_minor_get(struct mei_device *dev) +{ + int ret; + + mutex_lock(&mei_minor_lock); + ret = idr_alloc(&mei_idr, dev, 0, MEI_MAX_DEVS, GFP_KERNEL); + if (ret >= 0) + dev->minor = ret; + else if (ret == -ENOSPC) + dev_err(dev->dev, "too many mei devices\n"); + + mutex_unlock(&mei_minor_lock); + return ret; +} + +/** + * mei_minor_free - mark device minor number as free + * + * @dev: device pointer + */ +static void mei_minor_free(struct mei_device *dev) +{ + mutex_lock(&mei_minor_lock); + idr_remove(&mei_idr, dev->minor); + mutex_unlock(&mei_minor_lock); +} + +int mei_register(struct mei_device *dev, struct device *parent) +{ + struct device *clsdev; /* class device */ + int ret, devno; + + ret = mei_minor_get(dev); + if (ret < 0) + return ret; + + /* Fill in the data structures */ + devno = MKDEV(MAJOR(mei_devt), dev->minor); + cdev_init(&dev->cdev, &mei_fops); + dev->cdev.owner = mei_fops.owner; + + /* Add the device */ + ret = cdev_add(&dev->cdev, devno, 1); + if (ret) { + dev_err(parent, "unable to add device %d:%d\n", + MAJOR(mei_devt), dev->minor); + goto err_dev_add; + } + + clsdev = device_create_with_groups(mei_class, parent, devno, + dev, mei_groups, + "mei%d", dev->minor); + + if (IS_ERR(clsdev)) { + dev_err(parent, "unable to create device %d:%d\n", + MAJOR(mei_devt), dev->minor); + ret = PTR_ERR(clsdev); + goto err_dev_create; + } + + ret = mei_dbgfs_register(dev, dev_name(clsdev)); + if (ret) { + dev_err(clsdev, "cannot register debugfs ret = %d\n", ret); + goto err_dev_dbgfs; + } + + return 0; + +err_dev_dbgfs: + device_destroy(mei_class, devno); +err_dev_create: + cdev_del(&dev->cdev); +err_dev_add: + mei_minor_free(dev); + return ret; +} +EXPORT_SYMBOL_GPL(mei_register); + +void mei_deregister(struct mei_device *dev) +{ + int devno; + + devno = dev->cdev.dev; + cdev_del(&dev->cdev); + + mei_dbgfs_deregister(dev); + + device_destroy(mei_class, devno); + + mei_minor_free(dev); +} +EXPORT_SYMBOL_GPL(mei_deregister); + +static int __init mei_init(void) +{ + int ret; + + mei_class = class_create(THIS_MODULE, "mei"); + if (IS_ERR(mei_class)) { + pr_err("couldn't create class\n"); + ret = PTR_ERR(mei_class); + goto err; + } + + ret = alloc_chrdev_region(&mei_devt, 0, MEI_MAX_DEVS, "mei"); + if (ret < 0) { + pr_err("unable to allocate char dev region\n"); + goto err_class; + } + + ret = mei_cl_bus_init(); + if (ret < 0) { + pr_err("unable to initialize bus\n"); + goto err_chrdev; + } + + return 0; + +err_chrdev: + unregister_chrdev_region(mei_devt, MEI_MAX_DEVS); +err_class: + class_destroy(mei_class); +err: + return ret; +} + +static void __exit mei_exit(void) +{ + unregister_chrdev_region(mei_devt, MEI_MAX_DEVS); + class_destroy(mei_class); + mei_cl_bus_exit(); +} + +module_init(mei_init); +module_exit(mei_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Management Engine Interface"); +MODULE_LICENSE("GPL v2"); + diff --git a/kernel/drivers/misc/mei/mei-trace.c b/kernel/drivers/misc/mei/mei-trace.c new file mode 100644 index 000000000..388efb519 --- /dev/null +++ b/kernel/drivers/misc/mei/mei-trace.c @@ -0,0 +1,25 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/module.h> + +/* sparse doesn't like tracepoint macros */ +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "mei-trace.h" + +EXPORT_TRACEPOINT_SYMBOL(mei_reg_read); +EXPORT_TRACEPOINT_SYMBOL(mei_reg_write); +#endif /* __CHECKER__ */ diff --git a/kernel/drivers/misc/mei/mei-trace.h b/kernel/drivers/misc/mei/mei-trace.h new file mode 100644 index 000000000..47e1bc655 --- /dev/null +++ b/kernel/drivers/misc/mei/mei-trace.h @@ -0,0 +1,74 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#if !defined(_MEI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MEI_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include <linux/device.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mei + +TRACE_EVENT(mei_reg_read, + TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val), + TP_ARGS(dev, reg, offs, val), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(const char *, reg) + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)) + __entry->reg = reg; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] read %s:[%#x] = %#x", + __get_str(dev), __entry->reg, __entry->offs, __entry->val) +); + +TRACE_EVENT(mei_reg_write, + TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val), + TP_ARGS(dev, reg, offs, val), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(const char *, reg) + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)) + __entry->reg = reg; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] write %s[%#x] = %#x)", + __get_str(dev), __entry->reg, __entry->offs, __entry->val) +); + +#endif /* _MEI_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE mei-trace +#include <trace/define_trace.h> diff --git a/kernel/drivers/misc/mei/mei_dev.h b/kernel/drivers/misc/mei/mei_dev.h new file mode 100644 index 000000000..f84c39ee2 --- /dev/null +++ b/kernel/drivers/misc/mei/mei_dev.h @@ -0,0 +1,865 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_DEV_H_ +#define _MEI_DEV_H_ + +#include <linux/types.h> +#include <linux/watchdog.h> +#include <linux/poll.h> +#include <linux/mei.h> +#include <linux/mei_cl_bus.h> + +#include "hw.h" +#include "hbm.h" + +/* + * watch dog definition + */ +#define MEI_WD_HDR_SIZE 4 +#define MEI_WD_STOP_MSG_SIZE MEI_WD_HDR_SIZE +#define MEI_WD_START_MSG_SIZE (MEI_WD_HDR_SIZE + 16) + +#define MEI_WD_DEFAULT_TIMEOUT 120 /* seconds */ +#define MEI_WD_MIN_TIMEOUT 120 /* seconds */ +#define MEI_WD_MAX_TIMEOUT 65535 /* seconds */ + +#define MEI_WD_STOP_TIMEOUT 10 /* msecs */ + +#define MEI_WD_STATE_INDEPENDENCE_MSG_SENT (1 << 0) + +#define MEI_RD_MSG_BUF_SIZE (128 * sizeof(u32)) + + +/* + * AMTHI Client UUID + */ +extern const uuid_le mei_amthif_guid; + +/* + * Watchdog Client UUID + */ +extern const uuid_le mei_wd_guid; + +/* + * Number of Maximum MEI Clients + */ +#define MEI_CLIENTS_MAX 256 + +/* + * maximum number of consecutive resets + */ +#define MEI_MAX_CONSEC_RESET 3 + +/* + * Number of File descriptors/handles + * that can be opened to the driver. + * + * Limit to 255: 256 Total Clients + * minus internal client for MEI Bus Messages + */ +#define MEI_MAX_OPEN_HANDLE_COUNT (MEI_CLIENTS_MAX - 1) + +/* + * Internal Clients Number + */ +#define MEI_HOST_CLIENT_ID_ANY (-1) +#define MEI_HBM_HOST_CLIENT_ID 0 /* not used, just for documentation */ +#define MEI_WD_HOST_CLIENT_ID 1 +#define MEI_IAMTHIF_HOST_CLIENT_ID 2 + + +/* File state */ +enum file_state { + MEI_FILE_INITIALIZING = 0, + MEI_FILE_CONNECTING, + MEI_FILE_CONNECTED, + MEI_FILE_DISCONNECTING, + MEI_FILE_DISCONNECTED +}; + +/* MEI device states */ +enum mei_dev_state { + MEI_DEV_INITIALIZING = 0, + MEI_DEV_INIT_CLIENTS, + MEI_DEV_ENABLED, + MEI_DEV_RESETTING, + MEI_DEV_DISABLED, + MEI_DEV_POWER_DOWN, + MEI_DEV_POWER_UP +}; + +const char *mei_dev_state_str(int state); + +enum iamthif_states { + MEI_IAMTHIF_IDLE, + MEI_IAMTHIF_WRITING, + MEI_IAMTHIF_FLOW_CONTROL, + MEI_IAMTHIF_READING, + MEI_IAMTHIF_READ_COMPLETE +}; + +enum mei_file_transaction_states { + MEI_IDLE, + MEI_WRITING, + MEI_WRITE_COMPLETE, + MEI_FLOW_CONTROL, + MEI_READING, + MEI_READ_COMPLETE +}; + +enum mei_wd_states { + MEI_WD_IDLE, + MEI_WD_RUNNING, + MEI_WD_STOPPING, +}; + +/** + * enum mei_cb_file_ops - file operation associated with the callback + * @MEI_FOP_READ: read + * @MEI_FOP_WRITE: write + * @MEI_FOP_CONNECT: connect + * @MEI_FOP_DISCONNECT: disconnect + * @MEI_FOP_DISCONNECT_RSP: disconnect response + */ +enum mei_cb_file_ops { + MEI_FOP_READ = 0, + MEI_FOP_WRITE, + MEI_FOP_CONNECT, + MEI_FOP_DISCONNECT, + MEI_FOP_DISCONNECT_RSP, +}; + +/* + * Intel MEI message data struct + */ +struct mei_msg_data { + u32 size; + unsigned char *data; +}; + +/* Maximum number of processed FW status registers */ +#define MEI_FW_STATUS_MAX 6 +/* Minimal buffer for FW status string (8 bytes in dw + space or '\0') */ +#define MEI_FW_STATUS_STR_SZ (MEI_FW_STATUS_MAX * (8 + 1)) + + +/* + * struct mei_fw_status - storage of FW status data + * + * @count: number of actually available elements in array + * @status: FW status registers + */ +struct mei_fw_status { + int count; + u32 status[MEI_FW_STATUS_MAX]; +}; + +/** + * struct mei_me_client - representation of me (fw) client + * + * @list: link in me client list + * @refcnt: struct reference count + * @props: client properties + * @client_id: me client id + * @mei_flow_ctrl_creds: flow control credits + */ +struct mei_me_client { + struct list_head list; + struct kref refcnt; + struct mei_client_properties props; + u8 client_id; + u8 mei_flow_ctrl_creds; +}; + + +struct mei_cl; + +/** + * struct mei_cl_cb - file operation callback structure + * + * @list: link in callback queue + * @cl: file client who is running this operation + * @fop_type: file operation type + * @buf: buffer for data associated with the callback + * @buf_idx: last read index + * @read_time: last read operation time stamp (iamthif) + * @file_object: pointer to file structure + * @status: io status of the cb + * @internal: communication between driver and FW flag + * @completed: the transfer or reception has completed + */ +struct mei_cl_cb { + struct list_head list; + struct mei_cl *cl; + enum mei_cb_file_ops fop_type; + struct mei_msg_data buf; + unsigned long buf_idx; + unsigned long read_time; + struct file *file_object; + int status; + u32 internal:1; + u32 completed:1; +}; + +/** + * struct mei_cl - me client host representation + * carried in file->private_data + * + * @link: link in the clients list + * @dev: mei parent device + * @state: file operation state + * @tx_wait: wait queue for tx completion + * @rx_wait: wait queue for rx completion + * @wait: wait queue for management operation + * @status: connection status + * @cl_uuid: client uuid name + * @host_client_id: host id + * @me_client_id: me/fw id + * @mei_flow_ctrl_creds: transmit flow credentials + * @timer_count: watchdog timer for operation completion + * @writing_state: state of the tx + * @rd_pending: pending read credits + * @rd_completed: completed read + * + * @device: device on the mei client bus + * @device_link: link to bus clients + */ +struct mei_cl { + struct list_head link; + struct mei_device *dev; + enum file_state state; + wait_queue_head_t tx_wait; + wait_queue_head_t rx_wait; + wait_queue_head_t wait; + int status; + uuid_le cl_uuid; + u8 host_client_id; + u8 me_client_id; + u8 mei_flow_ctrl_creds; + u8 timer_count; + enum mei_file_transaction_states writing_state; + struct list_head rd_pending; + struct list_head rd_completed; + + /* MEI CL bus data */ + struct mei_cl_device *device; + struct list_head device_link; +}; + +/** struct mei_hw_ops + * + * @host_is_ready : query for host readiness + + * @hw_is_ready : query if hw is ready + * @hw_reset : reset hw + * @hw_start : start hw after reset + * @hw_config : configure hw + + * @fw_status : get fw status registers + * @pg_state : power gating state of the device + * @pg_in_transition : is device now in pg transition + * @pg_is_enabled : is power gating enabled + + * @intr_clear : clear pending interrupts + * @intr_enable : enable interrupts + * @intr_disable : disable interrupts + + * @hbuf_free_slots : query for write buffer empty slots + * @hbuf_is_ready : query if write buffer is empty + * @hbuf_max_len : query for write buffer max len + + * @write : write a message to FW + + * @rdbuf_full_slots : query how many slots are filled + + * @read_hdr : get first 4 bytes (header) + * @read : read a buffer from the FW + */ +struct mei_hw_ops { + + bool (*host_is_ready)(struct mei_device *dev); + + bool (*hw_is_ready)(struct mei_device *dev); + int (*hw_reset)(struct mei_device *dev, bool enable); + int (*hw_start)(struct mei_device *dev); + void (*hw_config)(struct mei_device *dev); + + + int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); + enum mei_pg_state (*pg_state)(struct mei_device *dev); + bool (*pg_in_transition)(struct mei_device *dev); + bool (*pg_is_enabled)(struct mei_device *dev); + + void (*intr_clear)(struct mei_device *dev); + void (*intr_enable)(struct mei_device *dev); + void (*intr_disable)(struct mei_device *dev); + + int (*hbuf_free_slots)(struct mei_device *dev); + bool (*hbuf_is_ready)(struct mei_device *dev); + size_t (*hbuf_max_len)(const struct mei_device *dev); + + int (*write)(struct mei_device *dev, + struct mei_msg_hdr *hdr, + unsigned char *buf); + + int (*rdbuf_full_slots)(struct mei_device *dev); + + u32 (*read_hdr)(const struct mei_device *dev); + int (*read)(struct mei_device *dev, + unsigned char *buf, unsigned long len); +}; + +/* MEI bus API*/ + +/** + * struct mei_cl_ops - MEI CL device ops + * This structure allows ME host clients to implement technology + * specific operations. + * + * @enable: Enable an MEI CL device. Some devices require specific + * HECI commands to initialize completely. + * @disable: Disable an MEI CL device. + * @send: Tx hook for the device. This allows ME host clients to trap + * the device driver buffers before actually physically + * pushing it to the ME. + * @recv: Rx hook for the device. This allows ME host clients to trap the + * ME buffers before forwarding them to the device driver. + */ +struct mei_cl_ops { + int (*enable)(struct mei_cl_device *device); + int (*disable)(struct mei_cl_device *device); + int (*send)(struct mei_cl_device *device, u8 *buf, size_t length); + int (*recv)(struct mei_cl_device *device, u8 *buf, size_t length); +}; + +struct mei_cl_device *mei_cl_add_device(struct mei_device *dev, + uuid_le uuid, char *name, + struct mei_cl_ops *ops); +void mei_cl_remove_device(struct mei_cl_device *device); + +ssize_t __mei_cl_async_send(struct mei_cl *cl, u8 *buf, size_t length); +ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length); +ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); +void mei_cl_bus_rx_event(struct mei_cl *cl); +void mei_cl_bus_remove_devices(struct mei_device *dev); +int mei_cl_bus_init(void); +void mei_cl_bus_exit(void); +struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *dev, uuid_le uuid); + + +/** + * struct mei_cl_device - MEI device handle + * An mei_cl_device pointer is returned from mei_add_device() + * and links MEI bus clients to their actual ME host client pointer. + * Drivers for MEI devices will get an mei_cl_device pointer + * when being probed and shall use it for doing ME bus I/O. + * + * @dev: linux driver model device pointer + * @cl: mei client + * @ops: ME transport ops + * @event_work: async work to execute event callback + * @event_cb: Drivers register this callback to get asynchronous ME + * events (e.g. Rx buffer pending) notifications. + * @event_context: event callback run context + * @events: Events bitmask sent to the driver. + * @priv_data: client private data + */ +struct mei_cl_device { + struct device dev; + + struct mei_cl *cl; + + const struct mei_cl_ops *ops; + + struct work_struct event_work; + mei_cl_event_cb_t event_cb; + void *event_context; + unsigned long events; + + void *priv_data; +}; + + +/** + * enum mei_pg_event - power gating transition events + * + * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition + * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete + * @MEI_PG_EVENT_RECEIVED: the driver received pg event + * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt + * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt + */ +enum mei_pg_event { + MEI_PG_EVENT_IDLE, + MEI_PG_EVENT_WAIT, + MEI_PG_EVENT_RECEIVED, + MEI_PG_EVENT_INTR_WAIT, + MEI_PG_EVENT_INTR_RECEIVED, +}; + +/** + * enum mei_pg_state - device internal power gating state + * + * @MEI_PG_OFF: device is not power gated - it is active + * @MEI_PG_ON: device is power gated - it is in lower power state + */ +enum mei_pg_state { + MEI_PG_OFF = 0, + MEI_PG_ON = 1, +}; + +const char *mei_pg_state_str(enum mei_pg_state state); + +/** + * struct mei_device - MEI private device struct + * + * @dev : device on a bus + * @cdev : character device + * @minor : minor number allocated for device + * + * @write_list : write pending list + * @write_waiting_list : write completion list + * @ctrl_wr_list : pending control write list + * @ctrl_rd_list : pending control read list + * + * @file_list : list of opened handles + * @open_handle_count: number of opened handles + * + * @device_lock : big device lock + * @timer_work : MEI timer delayed work (timeouts) + * + * @recvd_hw_ready : hw ready message received flag + * + * @wait_hw_ready : wait queue for receive HW ready message form FW + * @wait_pg : wait queue for receive PG message from FW + * @wait_hbm_start : wait queue for receive HBM start message from FW + * @wait_stop_wd : wait queue for receive WD stop message from FW + * + * @reset_count : number of consecutive resets + * @dev_state : device state + * @hbm_state : state of host bus message protocol + * @init_clients_timer : HBM init handshake timeout + * + * @pg_event : power gating event + * @pg_domain : runtime PM domain + * + * @rd_msg_buf : control messages buffer + * @rd_msg_hdr : read message header storage + * + * @hbuf_depth : depth of hardware host/write buffer is slots + * @hbuf_is_ready : query if the host host/write buffer is ready + * @wr_msg : the buffer for hbm control messages + * + * @version : HBM protocol version in use + * @hbm_f_pg_supported : hbm feature pgi protocol + * + * @me_clients_rwsem: rw lock over me_clients list + * @me_clients : list of FW clients + * @me_clients_map : FW clients bit map + * @host_clients_map : host clients id pool + * @me_client_index : last FW client index in enumeration + * + * @wd_cl : watchdog client + * @wd_state : watchdog client state + * @wd_pending : watchdog command is pending + * @wd_timeout : watchdog expiration timeout + * @wd_data : watchdog message buffer + * + * @amthif_cmd_list : amthif list for cmd waiting + * @amthif_rd_complete_list : amthif list for reading completed cmd data + * @iamthif_file_object : file for current amthif operation + * @iamthif_cl : amthif host client + * @iamthif_current_cb : amthif current operation callback + * @iamthif_open_count : number of opened amthif connections + * @iamthif_mtu : amthif client max message length + * @iamthif_timer : time stamp of current amthif command completion + * @iamthif_stall_timer : timer to detect amthif hang + * @iamthif_state : amthif processor state + * @iamthif_canceled : current amthif command is canceled + * + * @init_work : work item for the device init + * @reset_work : work item for the device reset + * + * @device_list : mei client bus list + * + * @dbgfs_dir : debugfs mei root directory + * + * @ops: : hw specific operations + * @hw : hw specific data + */ +struct mei_device { + struct device *dev; + struct cdev cdev; + int minor; + + struct mei_cl_cb write_list; + struct mei_cl_cb write_waiting_list; + struct mei_cl_cb ctrl_wr_list; + struct mei_cl_cb ctrl_rd_list; + + struct list_head file_list; + long open_handle_count; + + struct mutex device_lock; + struct delayed_work timer_work; + + bool recvd_hw_ready; + /* + * waiting queue for receive message from FW + */ + wait_queue_head_t wait_hw_ready; + wait_queue_head_t wait_pg; + wait_queue_head_t wait_hbm_start; + wait_queue_head_t wait_stop_wd; + + /* + * mei device states + */ + unsigned long reset_count; + enum mei_dev_state dev_state; + enum mei_hbm_state hbm_state; + u16 init_clients_timer; + + /* + * Power Gating support + */ + enum mei_pg_event pg_event; +#ifdef CONFIG_PM + struct dev_pm_domain pg_domain; +#endif /* CONFIG_PM */ + + unsigned char rd_msg_buf[MEI_RD_MSG_BUF_SIZE]; + u32 rd_msg_hdr; + + /* write buffer */ + u8 hbuf_depth; + bool hbuf_is_ready; + + /* used for control messages */ + struct { + struct mei_msg_hdr hdr; + unsigned char data[128]; + } wr_msg; + + struct hbm_version version; + unsigned int hbm_f_pg_supported:1; + + struct rw_semaphore me_clients_rwsem; + struct list_head me_clients; + DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX); + DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX); + unsigned long me_client_index; + + struct mei_cl wd_cl; + enum mei_wd_states wd_state; + bool wd_pending; + u16 wd_timeout; + unsigned char wd_data[MEI_WD_START_MSG_SIZE]; + + + /* amthif list for cmd waiting */ + struct mei_cl_cb amthif_cmd_list; + /* driver managed amthif list for reading completed amthif cmd data */ + struct mei_cl_cb amthif_rd_complete_list; + struct file *iamthif_file_object; + struct mei_cl iamthif_cl; + struct mei_cl_cb *iamthif_current_cb; + long iamthif_open_count; + int iamthif_mtu; + unsigned long iamthif_timer; + u32 iamthif_stall_timer; + enum iamthif_states iamthif_state; + bool iamthif_canceled; + + struct work_struct init_work; + struct work_struct reset_work; + + /* List of bus devices */ + struct list_head device_list; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *dbgfs_dir; +#endif /* CONFIG_DEBUG_FS */ + + + const struct mei_hw_ops *ops; + char hw[0] __aligned(sizeof(void *)); +}; + +static inline unsigned long mei_secs_to_jiffies(unsigned long sec) +{ + return msecs_to_jiffies(sec * MSEC_PER_SEC); +} + +/** + * mei_data2slots - get slots - number of (dwords) from a message length + * + size of the mei header + * + * @length: size of the messages in bytes + * + * Return: number of slots + */ +static inline u32 mei_data2slots(size_t length) +{ + return DIV_ROUND_UP(sizeof(struct mei_msg_hdr) + length, 4); +} + +/** + * mei_slots2data - get data in slots - bytes from slots + * + * @slots: number of available slots + * + * Return: number of bytes in slots + */ +static inline u32 mei_slots2data(int slots) +{ + return slots * 4; +} + +/* + * mei init function prototypes + */ +void mei_device_init(struct mei_device *dev, + struct device *device, + const struct mei_hw_ops *hw_ops); +int mei_reset(struct mei_device *dev); +int mei_start(struct mei_device *dev); +int mei_restart(struct mei_device *dev); +void mei_stop(struct mei_device *dev); +void mei_cancel_work(struct mei_device *dev); + +/* + * MEI interrupt functions prototype + */ + +void mei_timer(struct work_struct *work); +int mei_irq_read_handler(struct mei_device *dev, + struct mei_cl_cb *cmpl_list, s32 *slots); + +int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list); +void mei_irq_compl_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list); + +/* + * AMTHIF - AMT Host Interface Functions + */ +void mei_amthif_reset_params(struct mei_device *dev); + +int mei_amthif_host_init(struct mei_device *dev); + +int mei_amthif_read(struct mei_device *dev, struct file *file, + char __user *ubuf, size_t length, loff_t *offset); + +unsigned int mei_amthif_poll(struct mei_device *dev, + struct file *file, poll_table *wait); + +int mei_amthif_release(struct mei_device *dev, struct file *file); + +struct mei_cl_cb *mei_amthif_find_read_list_entry(struct mei_device *dev, + struct file *file); + +int mei_amthif_write(struct mei_cl *cl, struct mei_cl_cb *cb); +int mei_amthif_run_next_cmd(struct mei_device *dev); +int mei_amthif_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list); + +void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb); +int mei_amthif_irq_read_msg(struct mei_cl *cl, + struct mei_msg_hdr *mei_hdr, + struct mei_cl_cb *complete_list); +int mei_amthif_irq_read(struct mei_device *dev, s32 *slots); + +/* + * NFC functions + */ +int mei_nfc_host_init(struct mei_device *dev); +void mei_nfc_host_exit(struct mei_device *dev); + +/* + * NFC Client UUID + */ +extern const uuid_le mei_nfc_guid; + +int mei_wd_send(struct mei_device *dev); +int mei_wd_stop(struct mei_device *dev); +int mei_wd_host_init(struct mei_device *dev); +/* + * mei_watchdog_register - Registering watchdog interface + * once we got connection to the WD Client + * @dev: mei device + */ +int mei_watchdog_register(struct mei_device *dev); +/* + * mei_watchdog_unregister - Unregistering watchdog interface + * @dev: mei device + */ +void mei_watchdog_unregister(struct mei_device *dev); + +/* + * Register Access Function + */ + + +static inline void mei_hw_config(struct mei_device *dev) +{ + dev->ops->hw_config(dev); +} + +static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) +{ + return dev->ops->pg_state(dev); +} + +static inline bool mei_pg_in_transition(struct mei_device *dev) +{ + return dev->ops->pg_in_transition(dev); +} + +static inline bool mei_pg_is_enabled(struct mei_device *dev) +{ + return dev->ops->pg_is_enabled(dev); +} + +static inline int mei_hw_reset(struct mei_device *dev, bool enable) +{ + return dev->ops->hw_reset(dev, enable); +} + +static inline int mei_hw_start(struct mei_device *dev) +{ + return dev->ops->hw_start(dev); +} + +static inline void mei_clear_interrupts(struct mei_device *dev) +{ + dev->ops->intr_clear(dev); +} + +static inline void mei_enable_interrupts(struct mei_device *dev) +{ + dev->ops->intr_enable(dev); +} + +static inline void mei_disable_interrupts(struct mei_device *dev) +{ + dev->ops->intr_disable(dev); +} + +static inline bool mei_host_is_ready(struct mei_device *dev) +{ + return dev->ops->host_is_ready(dev); +} +static inline bool mei_hw_is_ready(struct mei_device *dev) +{ + return dev->ops->hw_is_ready(dev); +} + +static inline bool mei_hbuf_is_ready(struct mei_device *dev) +{ + return dev->ops->hbuf_is_ready(dev); +} + +static inline int mei_hbuf_empty_slots(struct mei_device *dev) +{ + return dev->ops->hbuf_free_slots(dev); +} + +static inline size_t mei_hbuf_max_len(const struct mei_device *dev) +{ + return dev->ops->hbuf_max_len(dev); +} + +static inline int mei_write_message(struct mei_device *dev, + struct mei_msg_hdr *hdr, + unsigned char *buf) +{ + return dev->ops->write(dev, hdr, buf); +} + +static inline u32 mei_read_hdr(const struct mei_device *dev) +{ + return dev->ops->read_hdr(dev); +} + +static inline void mei_read_slots(struct mei_device *dev, + unsigned char *buf, unsigned long len) +{ + dev->ops->read(dev, buf, len); +} + +static inline int mei_count_full_read_slots(struct mei_device *dev) +{ + return dev->ops->rdbuf_full_slots(dev); +} + +static inline int mei_fw_status(struct mei_device *dev, + struct mei_fw_status *fw_status) +{ + return dev->ops->fw_status(dev, fw_status); +} + +bool mei_hbuf_acquire(struct mei_device *dev); + +bool mei_write_is_idle(struct mei_device *dev); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +int mei_dbgfs_register(struct mei_device *dev, const char *name); +void mei_dbgfs_deregister(struct mei_device *dev); +#else +static inline int mei_dbgfs_register(struct mei_device *dev, const char *name) +{ + return 0; +} +static inline void mei_dbgfs_deregister(struct mei_device *dev) {} +#endif /* CONFIG_DEBUG_FS */ + +int mei_register(struct mei_device *dev, struct device *parent); +void mei_deregister(struct mei_device *dev); + +#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d internal=%1d comp=%1d" +#define MEI_HDR_PRM(hdr) \ + (hdr)->host_addr, (hdr)->me_addr, \ + (hdr)->length, (hdr)->internal, (hdr)->msg_complete + +ssize_t mei_fw_status2str(struct mei_fw_status *fw_sts, char *buf, size_t len); +/** + * mei_fw_status_str - fetch and convert fw status registers to printable string + * + * @dev: the device structure + * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ + * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ + * + * Return: number of bytes written or < 0 on failure + */ +static inline ssize_t mei_fw_status_str(struct mei_device *dev, + char *buf, size_t len) +{ + struct mei_fw_status fw_status; + int ret; + + buf[0] = '\0'; + + ret = mei_fw_status(dev, &fw_status); + if (ret) + return ret; + + ret = mei_fw_status2str(&fw_status, buf, MEI_FW_STATUS_STR_SZ); + + return ret; +} + + +#endif diff --git a/kernel/drivers/misc/mei/nfc.c b/kernel/drivers/misc/mei/nfc.c new file mode 100644 index 000000000..c3bcb6368 --- /dev/null +++ b/kernel/drivers/misc/mei/nfc.c @@ -0,0 +1,593 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/slab.h> + +#include <linux/mei_cl_bus.h> + +#include "mei_dev.h" +#include "client.h" + +struct mei_nfc_cmd { + u8 command; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; + u8 sub_command; + u8 data[]; +} __packed; + +struct mei_nfc_reply { + u8 command; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; + u8 sub_command; + u8 reply_status; + u8 data[]; +} __packed; + +struct mei_nfc_if_version { + u8 radio_version_sw[3]; + u8 reserved[3]; + u8 radio_version_hw[3]; + u8 i2c_addr; + u8 fw_ivn; + u8 vendor_id; + u8 radio_type; +} __packed; + +struct mei_nfc_connect { + u8 fw_ivn; + u8 vendor_id; +} __packed; + +struct mei_nfc_connect_resp { + u8 fw_ivn; + u8 vendor_id; + u16 me_major; + u16 me_minor; + u16 me_hotfix; + u16 me_build; +} __packed; + +struct mei_nfc_hci_hdr { + u8 cmd; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; +} __packed; + +#define MEI_NFC_CMD_MAINTENANCE 0x00 +#define MEI_NFC_CMD_HCI_SEND 0x01 +#define MEI_NFC_CMD_HCI_RECV 0x02 + +#define MEI_NFC_SUBCMD_CONNECT 0x00 +#define MEI_NFC_SUBCMD_IF_VERSION 0x01 + +#define MEI_NFC_HEADER_SIZE 10 + +/** + * struct mei_nfc_dev - NFC mei device + * + * @cl: NFC host client + * @cl_info: NFC info host client + * @init_work: perform connection to the info client + * @send_wq: send completion wait queue + * @fw_ivn: NFC Interface Version Number + * @vendor_id: NFC manufacturer ID + * @radio_type: NFC radio type + * @bus_name: bus name + * + * @req_id: message counter + * @recv_req_id: reception message counter + */ +struct mei_nfc_dev { + struct mei_cl *cl; + struct mei_cl *cl_info; + struct work_struct init_work; + wait_queue_head_t send_wq; + u8 fw_ivn; + u8 vendor_id; + u8 radio_type; + char *bus_name; + + u16 req_id; + u16 recv_req_id; +}; + +/* UUIDs for NFC F/W clients */ +const uuid_le mei_nfc_guid = UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, + 0x94, 0xd4, 0x50, 0x26, + 0x67, 0x23, 0x77, 0x5c); + +static const uuid_le mei_nfc_info_guid = UUID_LE(0xd2de1625, 0x382d, 0x417d, + 0x48, 0xa4, 0xef, 0xab, + 0xba, 0x8a, 0x12, 0x06); + +/* Vendors */ +#define MEI_NFC_VENDOR_INSIDE 0x00 +#define MEI_NFC_VENDOR_NXP 0x01 + +/* Radio types */ +#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00 +#define MEI_NFC_VENDOR_NXP_PN544 0x01 + +static void mei_nfc_free(struct mei_nfc_dev *ndev) +{ + if (!ndev) + return; + + if (ndev->cl) { + list_del(&ndev->cl->device_link); + mei_cl_unlink(ndev->cl); + kfree(ndev->cl); + } + + if (ndev->cl_info) { + list_del(&ndev->cl_info->device_link); + mei_cl_unlink(ndev->cl_info); + kfree(ndev->cl_info); + } + + kfree(ndev); +} + +static int mei_nfc_build_bus_name(struct mei_nfc_dev *ndev) +{ + struct mei_device *dev; + + if (!ndev->cl) + return -ENODEV; + + dev = ndev->cl->dev; + + switch (ndev->vendor_id) { + case MEI_NFC_VENDOR_INSIDE: + switch (ndev->radio_type) { + case MEI_NFC_VENDOR_INSIDE_UREAD: + ndev->bus_name = "microread"; + return 0; + + default: + dev_err(dev->dev, "Unknown radio type 0x%x\n", + ndev->radio_type); + + return -EINVAL; + } + + case MEI_NFC_VENDOR_NXP: + switch (ndev->radio_type) { + case MEI_NFC_VENDOR_NXP_PN544: + ndev->bus_name = "pn544"; + return 0; + default: + dev_err(dev->dev, "Unknown radio type 0x%x\n", + ndev->radio_type); + + return -EINVAL; + } + + default: + dev_err(dev->dev, "Unknown vendor ID 0x%x\n", + ndev->vendor_id); + + return -EINVAL; + } + + return 0; +} + +static int mei_nfc_connect(struct mei_nfc_dev *ndev) +{ + struct mei_device *dev; + struct mei_cl *cl; + struct mei_nfc_cmd *cmd, *reply; + struct mei_nfc_connect *connect; + struct mei_nfc_connect_resp *connect_resp; + size_t connect_length, connect_resp_length; + int bytes_recv, ret; + + cl = ndev->cl; + dev = cl->dev; + + connect_length = sizeof(struct mei_nfc_cmd) + + sizeof(struct mei_nfc_connect); + + connect_resp_length = sizeof(struct mei_nfc_cmd) + + sizeof(struct mei_nfc_connect_resp); + + cmd = kzalloc(connect_length, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + connect = (struct mei_nfc_connect *)cmd->data; + + reply = kzalloc(connect_resp_length, GFP_KERNEL); + if (!reply) { + kfree(cmd); + return -ENOMEM; + } + + connect_resp = (struct mei_nfc_connect_resp *)reply->data; + + cmd->command = MEI_NFC_CMD_MAINTENANCE; + cmd->data_size = 3; + cmd->sub_command = MEI_NFC_SUBCMD_CONNECT; + connect->fw_ivn = ndev->fw_ivn; + connect->vendor_id = ndev->vendor_id; + + ret = __mei_cl_send(cl, (u8 *)cmd, connect_length); + if (ret < 0) { + dev_err(dev->dev, "Could not send connect cmd\n"); + goto err; + } + + bytes_recv = __mei_cl_recv(cl, (u8 *)reply, connect_resp_length); + if (bytes_recv < 0) { + dev_err(dev->dev, "Could not read connect response\n"); + ret = bytes_recv; + goto err; + } + + dev_info(dev->dev, "IVN 0x%x Vendor ID 0x%x\n", + connect_resp->fw_ivn, connect_resp->vendor_id); + + dev_info(dev->dev, "ME FW %d.%d.%d.%d\n", + connect_resp->me_major, connect_resp->me_minor, + connect_resp->me_hotfix, connect_resp->me_build); + + ret = 0; + +err: + kfree(reply); + kfree(cmd); + + return ret; +} + +static int mei_nfc_if_version(struct mei_nfc_dev *ndev) +{ + struct mei_device *dev; + struct mei_cl *cl; + + struct mei_nfc_cmd cmd; + struct mei_nfc_reply *reply = NULL; + struct mei_nfc_if_version *version; + size_t if_version_length; + int bytes_recv, ret; + + cl = ndev->cl_info; + dev = cl->dev; + + memset(&cmd, 0, sizeof(struct mei_nfc_cmd)); + cmd.command = MEI_NFC_CMD_MAINTENANCE; + cmd.data_size = 1; + cmd.sub_command = MEI_NFC_SUBCMD_IF_VERSION; + + ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd)); + if (ret < 0) { + dev_err(dev->dev, "Could not send IF version cmd\n"); + return ret; + } + + /* to be sure on the stack we alloc memory */ + if_version_length = sizeof(struct mei_nfc_reply) + + sizeof(struct mei_nfc_if_version); + + reply = kzalloc(if_version_length, GFP_KERNEL); + if (!reply) + return -ENOMEM; + + bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length); + if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) { + dev_err(dev->dev, "Could not read IF version\n"); + ret = -EIO; + goto err; + } + + version = (struct mei_nfc_if_version *)reply->data; + + ndev->fw_ivn = version->fw_ivn; + ndev->vendor_id = version->vendor_id; + ndev->radio_type = version->radio_type; + +err: + kfree(reply); + return ret; +} + +static int mei_nfc_enable(struct mei_cl_device *cldev) +{ + struct mei_device *dev; + struct mei_nfc_dev *ndev; + int ret; + + ndev = (struct mei_nfc_dev *)cldev->priv_data; + dev = ndev->cl->dev; + + ret = mei_nfc_connect(ndev); + if (ret < 0) { + dev_err(dev->dev, "Could not connect to NFC"); + return ret; + } + + return 0; +} + +static int mei_nfc_disable(struct mei_cl_device *cldev) +{ + return 0; +} + +static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length) +{ + struct mei_device *dev; + struct mei_nfc_dev *ndev; + struct mei_nfc_hci_hdr *hdr; + u8 *mei_buf; + int err; + + ndev = (struct mei_nfc_dev *) cldev->priv_data; + dev = ndev->cl->dev; + + err = -ENOMEM; + mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL); + if (!mei_buf) + goto out; + + hdr = (struct mei_nfc_hci_hdr *) mei_buf; + hdr->cmd = MEI_NFC_CMD_HCI_SEND; + hdr->status = 0; + hdr->req_id = ndev->req_id; + hdr->reserved = 0; + hdr->data_size = length; + + memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length); + err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE); + if (err < 0) + goto out; + + if (!wait_event_interruptible_timeout(ndev->send_wq, + ndev->recv_req_id == ndev->req_id, HZ)) { + dev_err(dev->dev, "NFC MEI command timeout\n"); + err = -ETIME; + } else { + ndev->req_id++; + } +out: + kfree(mei_buf); + return err; +} + +static int mei_nfc_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) +{ + struct mei_nfc_dev *ndev; + struct mei_nfc_hci_hdr *hci_hdr; + int received_length; + + ndev = (struct mei_nfc_dev *)cldev->priv_data; + + received_length = __mei_cl_recv(ndev->cl, buf, length); + if (received_length < 0) + return received_length; + + hci_hdr = (struct mei_nfc_hci_hdr *) buf; + + if (hci_hdr->cmd == MEI_NFC_CMD_HCI_SEND) { + ndev->recv_req_id = hci_hdr->req_id; + wake_up(&ndev->send_wq); + + return 0; + } + + return received_length; +} + +static struct mei_cl_ops nfc_ops = { + .enable = mei_nfc_enable, + .disable = mei_nfc_disable, + .send = mei_nfc_send, + .recv = mei_nfc_recv, +}; + +static void mei_nfc_init(struct work_struct *work) +{ + struct mei_device *dev; + struct mei_cl_device *cldev; + struct mei_nfc_dev *ndev; + struct mei_cl *cl_info; + + ndev = container_of(work, struct mei_nfc_dev, init_work); + + cl_info = ndev->cl_info; + dev = cl_info->dev; + + mutex_lock(&dev->device_lock); + + if (mei_cl_connect(cl_info, NULL) < 0) { + mutex_unlock(&dev->device_lock); + dev_err(dev->dev, "Could not connect to the NFC INFO ME client"); + + goto err; + } + + mutex_unlock(&dev->device_lock); + + if (mei_nfc_if_version(ndev) < 0) { + dev_err(dev->dev, "Could not get the NFC interface version"); + + goto err; + } + + dev_info(dev->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n", + ndev->fw_ivn, ndev->vendor_id, ndev->radio_type); + + mutex_lock(&dev->device_lock); + + if (mei_cl_disconnect(cl_info) < 0) { + mutex_unlock(&dev->device_lock); + dev_err(dev->dev, "Could not disconnect the NFC INFO ME client"); + + goto err; + } + + mutex_unlock(&dev->device_lock); + + if (mei_nfc_build_bus_name(ndev) < 0) { + dev_err(dev->dev, "Could not build the bus ID name\n"); + return; + } + + cldev = mei_cl_add_device(dev, mei_nfc_guid, ndev->bus_name, &nfc_ops); + if (!cldev) { + dev_err(dev->dev, "Could not add the NFC device to the MEI bus\n"); + + goto err; + } + + cldev->priv_data = ndev; + + + return; + +err: + mutex_lock(&dev->device_lock); + mei_nfc_free(ndev); + mutex_unlock(&dev->device_lock); + +} + + +int mei_nfc_host_init(struct mei_device *dev) +{ + struct mei_nfc_dev *ndev; + struct mei_cl *cl_info, *cl; + struct mei_me_client *me_cl = NULL; + int ret; + + + /* in case of internal reset bail out + * as the device is already setup + */ + cl = mei_cl_bus_find_cl_by_uuid(dev, mei_nfc_guid); + if (cl) + return 0; + + ndev = kzalloc(sizeof(struct mei_nfc_dev), GFP_KERNEL); + if (!ndev) { + ret = -ENOMEM; + goto err; + } + + /* check for valid client id */ + me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_info_guid); + if (!me_cl) { + dev_info(dev->dev, "nfc: failed to find the client\n"); + ret = -ENOTTY; + goto err; + } + + cl_info = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY); + if (IS_ERR(cl_info)) { + ret = PTR_ERR(cl_info); + goto err; + } + + cl_info->me_client_id = me_cl->client_id; + cl_info->cl_uuid = me_cl->props.protocol_name; + mei_me_cl_put(me_cl); + me_cl = NULL; + + list_add_tail(&cl_info->device_link, &dev->device_list); + + ndev->cl_info = cl_info; + + /* check for valid client id */ + me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_guid); + if (!me_cl) { + dev_info(dev->dev, "nfc: failed to find the client\n"); + ret = -ENOTTY; + goto err; + } + + cl = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY); + if (IS_ERR(cl)) { + ret = PTR_ERR(cl); + goto err; + } + + cl->me_client_id = me_cl->client_id; + cl->cl_uuid = me_cl->props.protocol_name; + mei_me_cl_put(me_cl); + me_cl = NULL; + + list_add_tail(&cl->device_link, &dev->device_list); + + ndev->cl = cl; + + ndev->req_id = 1; + + INIT_WORK(&ndev->init_work, mei_nfc_init); + init_waitqueue_head(&ndev->send_wq); + schedule_work(&ndev->init_work); + + return 0; + +err: + mei_me_cl_put(me_cl); + mei_nfc_free(ndev); + + return ret; +} + +void mei_nfc_host_exit(struct mei_device *dev) +{ + struct mei_nfc_dev *ndev; + struct mei_cl *cl; + struct mei_cl_device *cldev; + + cl = mei_cl_bus_find_cl_by_uuid(dev, mei_nfc_guid); + if (!cl) + return; + + cldev = cl->device; + if (!cldev) + return; + + ndev = (struct mei_nfc_dev *)cldev->priv_data; + if (ndev) + cancel_work_sync(&ndev->init_work); + + cldev->priv_data = NULL; + + mutex_lock(&dev->device_lock); + /* Need to remove the device here + * since mei_nfc_free will unlink the clients + */ + mei_cl_remove_device(cldev); + mei_nfc_free(ndev); + mutex_unlock(&dev->device_lock); +} + + diff --git a/kernel/drivers/misc/mei/pci-me.c b/kernel/drivers/misc/mei/pci-me.c new file mode 100644 index 000000000..23f71f5ce --- /dev/null +++ b/kernel/drivers/misc/mei/pci-me.c @@ -0,0 +1,485 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/pci.h> +#include <linux/poll.h> +#include <linux/ioctl.h> +#include <linux/cdev.h> +#include <linux/sched.h> +#include <linux/uuid.h> +#include <linux/compat.h> +#include <linux/jiffies.h> +#include <linux/interrupt.h> + +#include <linux/pm_runtime.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "client.h" +#include "hw-me-regs.h" +#include "hw-me.h" + +/* mei_pci_tbl - PCI Device ID Table */ +static const struct pci_device_id mei_me_pci_tbl[] = { + {MEI_PCI_DEVICE(MEI_DEV_ID_82946GZ, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82G35, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82Q965, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82G965, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82GM965, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82GME965, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q35, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82G33, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q33, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82X38, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_3200, mei_me_legacy_cfg)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_6, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_7, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_8, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_9, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_10, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_1, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_2, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_3, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_4, mei_me_legacy_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_1, mei_me_ich_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_2, mei_me_ich_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, mei_me_ich_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, mei_me_ich_cfg)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, mei_me_pch_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, mei_me_pch_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, mei_me_pch_cpt_pbg_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, mei_me_pch_cpt_pbg_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, mei_me_pch_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, mei_me_pch_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, mei_me_pch_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, mei_me_pch8_sps_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, mei_me_pch8_cfg)}, + + /* required last entry */ + {0, } +}; + +MODULE_DEVICE_TABLE(pci, mei_me_pci_tbl); + +#ifdef CONFIG_PM +static inline void mei_me_set_pm_domain(struct mei_device *dev); +static inline void mei_me_unset_pm_domain(struct mei_device *dev); +#else +static inline void mei_me_set_pm_domain(struct mei_device *dev) {} +static inline void mei_me_unset_pm_domain(struct mei_device *dev) {} +#endif /* CONFIG_PM */ + +/** + * mei_me_quirk_probe - probe for devices that doesn't valid ME interface + * + * @pdev: PCI device structure + * @cfg: per generation config + * + * Return: true if ME Interface is valid, false otherwise + */ +static bool mei_me_quirk_probe(struct pci_dev *pdev, + const struct mei_cfg *cfg) +{ + if (cfg->quirk_probe && cfg->quirk_probe(pdev)) { + dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n"); + return false; + } + + return true; +} + +/** + * mei_me_probe - Device Initialization Routine + * + * @pdev: PCI device structure + * @ent: entry in kcs_pci_tbl + * + * Return: 0 on success, <0 on failure. + */ +static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + const struct mei_cfg *cfg = (struct mei_cfg *)(ent->driver_data); + struct mei_device *dev; + struct mei_me_hw *hw; + int err; + + + if (!mei_me_quirk_probe(pdev, cfg)) + return -ENODEV; + + /* enable pci dev */ + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "failed to enable pci device.\n"); + goto end; + } + /* set PCI host mastering */ + pci_set_master(pdev); + /* pci request regions for mei driver */ + err = pci_request_regions(pdev, KBUILD_MODNAME); + if (err) { + dev_err(&pdev->dev, "failed to get pci regions.\n"); + goto disable_device; + } + + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) || + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + err = dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32)); + } + if (err) { + dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); + goto release_regions; + } + + + /* allocates and initializes the mei dev structure */ + dev = mei_me_dev_init(pdev, cfg); + if (!dev) { + err = -ENOMEM; + goto release_regions; + } + hw = to_me_hw(dev); + /* mapping IO device memory */ + hw->mem_addr = pci_iomap(pdev, 0, 0); + if (!hw->mem_addr) { + dev_err(&pdev->dev, "mapping I/O device memory failure.\n"); + err = -ENOMEM; + goto free_device; + } + pci_enable_msi(pdev); + + /* request and enable interrupt */ + if (pci_dev_msi_enabled(pdev)) + err = request_threaded_irq(pdev->irq, + NULL, + mei_me_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + else + err = request_threaded_irq(pdev->irq, + mei_me_irq_quick_handler, + mei_me_irq_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + + if (err) { + dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n", + pdev->irq); + goto disable_msi; + } + + if (mei_start(dev)) { + dev_err(&pdev->dev, "init hw failure.\n"); + err = -ENODEV; + goto release_irq; + } + + pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_ME_RPM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + + err = mei_register(dev, &pdev->dev); + if (err) + goto release_irq; + + pci_set_drvdata(pdev, dev); + + schedule_delayed_work(&dev->timer_work, HZ); + + /* + * For not wake-able HW runtime pm framework + * can't be used on pci device level. + * Use domain runtime pm callbacks instead. + */ + if (!pci_dev_run_wake(pdev)) + mei_me_set_pm_domain(dev); + + if (mei_pg_is_enabled(dev)) + pm_runtime_put_noidle(&pdev->dev); + + dev_dbg(&pdev->dev, "initialization successful.\n"); + + return 0; + +release_irq: + mei_cancel_work(dev); + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); +disable_msi: + pci_disable_msi(pdev); + pci_iounmap(pdev, hw->mem_addr); +free_device: + kfree(dev); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +end: + dev_err(&pdev->dev, "initialization failed.\n"); + return err; +} + +/** + * mei_me_remove - Device Removal Routine + * + * @pdev: PCI device structure + * + * mei_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void mei_me_remove(struct pci_dev *pdev) +{ + struct mei_device *dev; + struct mei_me_hw *hw; + + dev = pci_get_drvdata(pdev); + if (!dev) + return; + + if (mei_pg_is_enabled(dev)) + pm_runtime_get_noresume(&pdev->dev); + + hw = to_me_hw(dev); + + + dev_dbg(&pdev->dev, "stop\n"); + mei_stop(dev); + + if (!pci_dev_run_wake(pdev)) + mei_me_unset_pm_domain(dev); + + /* disable interrupts */ + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + + if (hw->mem_addr) + pci_iounmap(pdev, hw->mem_addr); + + mei_deregister(dev); + + kfree(dev); + + pci_release_regions(pdev); + pci_disable_device(pdev); + + +} +#ifdef CONFIG_PM_SLEEP +static int mei_me_pci_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev = pci_get_drvdata(pdev); + + if (!dev) + return -ENODEV; + + dev_dbg(&pdev->dev, "suspend\n"); + + mei_stop(dev); + + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + + return 0; +} + +static int mei_me_pci_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int err; + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + pci_enable_msi(pdev); + + /* request and enable interrupt */ + if (pci_dev_msi_enabled(pdev)) + err = request_threaded_irq(pdev->irq, + NULL, + mei_me_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + else + err = request_threaded_irq(pdev->irq, + mei_me_irq_quick_handler, + mei_me_irq_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + + if (err) { + dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n", + pdev->irq); + return err; + } + + err = mei_restart(dev); + if (err) + return err; + + /* Start timer if stopped in suspend */ + schedule_delayed_work(&dev->timer_work, HZ); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int mei_me_pm_runtime_idle(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + + dev_dbg(&pdev->dev, "rpm: me: runtime_idle\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + if (mei_write_is_idle(dev)) + pm_runtime_autosuspend(device); + + return -EBUSY; +} + +static int mei_me_pm_runtime_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: me: runtime suspend\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (mei_write_is_idle(dev)) + ret = mei_me_pg_enter_sync(dev); + else + ret = -EAGAIN; + + mutex_unlock(&dev->device_lock); + + dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret); + + return ret; +} + +static int mei_me_pm_runtime_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: me: runtime resume\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + ret = mei_me_pg_exit_sync(dev); + + mutex_unlock(&dev->device_lock); + + dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret); + + return ret; +} + +/** + * mei_me_set_pm_domain - fill and set pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_me_set_pm_domain(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->dev.bus && pdev->dev.bus->pm) { + dev->pg_domain.ops = *pdev->dev.bus->pm; + + dev->pg_domain.ops.runtime_suspend = mei_me_pm_runtime_suspend; + dev->pg_domain.ops.runtime_resume = mei_me_pm_runtime_resume; + dev->pg_domain.ops.runtime_idle = mei_me_pm_runtime_idle; + + pdev->dev.pm_domain = &dev->pg_domain; + } +} + +/** + * mei_me_unset_pm_domain - clean pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_me_unset_pm_domain(struct mei_device *dev) +{ + /* stop using pm callbacks if any */ + dev->dev->pm_domain = NULL; +} + +static const struct dev_pm_ops mei_me_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mei_me_pci_suspend, + mei_me_pci_resume) + SET_RUNTIME_PM_OPS( + mei_me_pm_runtime_suspend, + mei_me_pm_runtime_resume, + mei_me_pm_runtime_idle) +}; + +#define MEI_ME_PM_OPS (&mei_me_pm_ops) +#else +#define MEI_ME_PM_OPS NULL +#endif /* CONFIG_PM */ +/* + * PCI driver structure + */ +static struct pci_driver mei_me_driver = { + .name = KBUILD_MODNAME, + .id_table = mei_me_pci_tbl, + .probe = mei_me_probe, + .remove = mei_me_remove, + .shutdown = mei_me_remove, + .driver.pm = MEI_ME_PM_OPS, +}; + +module_pci_driver(mei_me_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Management Engine Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mei/pci-txe.c b/kernel/drivers/misc/mei/pci-txe.c new file mode 100644 index 000000000..dcfcba44b --- /dev/null +++ b/kernel/drivers/misc/mei/pci-txe.c @@ -0,0 +1,436 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/uuid.h> +#include <linux/jiffies.h> +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/pm_runtime.h> + +#include <linux/mei.h> + + +#include "mei_dev.h" +#include "hw-txe.h" + +static const struct pci_device_id mei_txe_pci_tbl[] = { + {PCI_VDEVICE(INTEL, 0x0F18)}, /* Baytrail */ + {PCI_VDEVICE(INTEL, 0x2298)}, /* Cherrytrail */ + + {0, } +}; +MODULE_DEVICE_TABLE(pci, mei_txe_pci_tbl); + +#ifdef CONFIG_PM +static inline void mei_txe_set_pm_domain(struct mei_device *dev); +static inline void mei_txe_unset_pm_domain(struct mei_device *dev); +#else +static inline void mei_txe_set_pm_domain(struct mei_device *dev) {} +static inline void mei_txe_unset_pm_domain(struct mei_device *dev) {} +#endif /* CONFIG_PM */ + +static void mei_txe_pci_iounmap(struct pci_dev *pdev, struct mei_txe_hw *hw) +{ + int i; + + for (i = SEC_BAR; i < NUM_OF_MEM_BARS; i++) { + if (hw->mem_addr[i]) { + pci_iounmap(pdev, hw->mem_addr[i]); + hw->mem_addr[i] = NULL; + } + } +} +/** + * mei_txe_probe - Device Initialization Routine + * + * @pdev: PCI device structure + * @ent: entry in mei_txe_pci_tbl + * + * Return: 0 on success, <0 on failure. + */ +static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct mei_device *dev; + struct mei_txe_hw *hw; + int err; + int i; + + /* enable pci dev */ + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "failed to enable pci device.\n"); + goto end; + } + /* set PCI host mastering */ + pci_set_master(pdev); + /* pci request regions for mei driver */ + err = pci_request_regions(pdev, KBUILD_MODNAME); + if (err) { + dev_err(&pdev->dev, "failed to get pci regions.\n"); + goto disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); + if (err) { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "No suitable DMA available.\n"); + goto release_regions; + } + } + + /* allocates and initializes the mei dev structure */ + dev = mei_txe_dev_init(pdev); + if (!dev) { + err = -ENOMEM; + goto release_regions; + } + hw = to_txe_hw(dev); + + /* mapping IO device memory */ + for (i = SEC_BAR; i < NUM_OF_MEM_BARS; i++) { + hw->mem_addr[i] = pci_iomap(pdev, i, 0); + if (!hw->mem_addr[i]) { + dev_err(&pdev->dev, "mapping I/O device memory failure.\n"); + err = -ENOMEM; + goto free_device; + } + } + + + pci_enable_msi(pdev); + + /* clear spurious interrupts */ + mei_clear_interrupts(dev); + + /* request and enable interrupt */ + if (pci_dev_msi_enabled(pdev)) + err = request_threaded_irq(pdev->irq, + NULL, + mei_txe_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + else + err = request_threaded_irq(pdev->irq, + mei_txe_irq_quick_handler, + mei_txe_irq_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + if (err) { + dev_err(&pdev->dev, "mei: request_threaded_irq failure. irq = %d\n", + pdev->irq); + goto free_device; + } + + if (mei_start(dev)) { + dev_err(&pdev->dev, "init hw failure.\n"); + err = -ENODEV; + goto release_irq; + } + + pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + + err = mei_register(dev, &pdev->dev); + if (err) + goto release_irq; + + pci_set_drvdata(pdev, dev); + + /* + * For not wake-able HW runtime pm framework + * can't be used on pci device level. + * Use domain runtime pm callbacks instead. + */ + if (!pci_dev_run_wake(pdev)) + mei_txe_set_pm_domain(dev); + + pm_runtime_put_noidle(&pdev->dev); + + return 0; + +release_irq: + + mei_cancel_work(dev); + + /* disable interrupts */ + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + +free_device: + mei_txe_pci_iounmap(pdev, hw); + + kfree(dev); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +end: + dev_err(&pdev->dev, "initialization failed.\n"); + return err; +} + +/** + * mei_txe_remove - Device Removal Routine + * + * @pdev: PCI device structure + * + * mei_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void mei_txe_remove(struct pci_dev *pdev) +{ + struct mei_device *dev; + struct mei_txe_hw *hw; + + dev = pci_get_drvdata(pdev); + if (!dev) { + dev_err(&pdev->dev, "mei: dev =NULL\n"); + return; + } + + pm_runtime_get_noresume(&pdev->dev); + + hw = to_txe_hw(dev); + + mei_stop(dev); + + if (!pci_dev_run_wake(pdev)) + mei_txe_unset_pm_domain(dev); + + /* disable interrupts */ + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + + pci_set_drvdata(pdev, NULL); + + mei_txe_pci_iounmap(pdev, hw); + + mei_deregister(dev); + + kfree(dev); + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + + +#ifdef CONFIG_PM_SLEEP +static int mei_txe_pci_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev = pci_get_drvdata(pdev); + + if (!dev) + return -ENODEV; + + dev_dbg(&pdev->dev, "suspend\n"); + + mei_stop(dev); + + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + + return 0; +} + +static int mei_txe_pci_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int err; + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + pci_enable_msi(pdev); + + mei_clear_interrupts(dev); + + /* request and enable interrupt */ + if (pci_dev_msi_enabled(pdev)) + err = request_threaded_irq(pdev->irq, + NULL, + mei_txe_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + else + err = request_threaded_irq(pdev->irq, + mei_txe_irq_quick_handler, + mei_txe_irq_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + if (err) { + dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n", + pdev->irq); + return err; + } + + err = mei_restart(dev); + + return err; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int mei_txe_pm_runtime_idle(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + + dev_dbg(&pdev->dev, "rpm: txe: runtime_idle\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + if (mei_write_is_idle(dev)) + pm_runtime_autosuspend(device); + + return -EBUSY; +} +static int mei_txe_pm_runtime_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: txe: runtime suspend\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (mei_write_is_idle(dev)) + ret = mei_txe_aliveness_set_sync(dev, 0); + else + ret = -EAGAIN; + + /* + * If everything is okay we're about to enter PCI low + * power state (D3) therefor we need to disable the + * interrupts towards host. + * However if device is not wakeable we do not enter + * D-low state and we need to keep the interrupt kicking + */ + if (!ret && pci_dev_run_wake(pdev)) + mei_disable_interrupts(dev); + + dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret); + + mutex_unlock(&dev->device_lock); + return ret; +} + +static int mei_txe_pm_runtime_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: txe: runtime resume\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + mei_enable_interrupts(dev); + + ret = mei_txe_aliveness_set_sync(dev, 1); + + mutex_unlock(&dev->device_lock); + + dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret); + + return ret; +} + +/** + * mei_txe_set_pm_domain - fill and set pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_txe_set_pm_domain(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->dev.bus && pdev->dev.bus->pm) { + dev->pg_domain.ops = *pdev->dev.bus->pm; + + dev->pg_domain.ops.runtime_suspend = mei_txe_pm_runtime_suspend; + dev->pg_domain.ops.runtime_resume = mei_txe_pm_runtime_resume; + dev->pg_domain.ops.runtime_idle = mei_txe_pm_runtime_idle; + + pdev->dev.pm_domain = &dev->pg_domain; + } +} + +/** + * mei_txe_unset_pm_domain - clean pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_txe_unset_pm_domain(struct mei_device *dev) +{ + /* stop using pm callbacks if any */ + dev->dev->pm_domain = NULL; +} + +static const struct dev_pm_ops mei_txe_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mei_txe_pci_suspend, + mei_txe_pci_resume) + SET_RUNTIME_PM_OPS( + mei_txe_pm_runtime_suspend, + mei_txe_pm_runtime_resume, + mei_txe_pm_runtime_idle) +}; + +#define MEI_TXE_PM_OPS (&mei_txe_pm_ops) +#else +#define MEI_TXE_PM_OPS NULL +#endif /* CONFIG_PM */ + +/* + * PCI driver structure + */ +static struct pci_driver mei_txe_driver = { + .name = KBUILD_MODNAME, + .id_table = mei_txe_pci_tbl, + .probe = mei_txe_probe, + .remove = mei_txe_remove, + .shutdown = mei_txe_remove, + .driver.pm = MEI_TXE_PM_OPS, +}; + +module_pci_driver(mei_txe_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Trusted Execution Environment Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mei/wd.c b/kernel/drivers/misc/mei/wd.c new file mode 100644 index 000000000..2725f865c --- /dev/null +++ b/kernel/drivers/misc/mei/wd.c @@ -0,0 +1,404 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/sched.h> +#include <linux/watchdog.h> + +#include <linux/mei.h> + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +static const u8 mei_start_wd_params[] = { 0x02, 0x12, 0x13, 0x10 }; +static const u8 mei_stop_wd_params[] = { 0x02, 0x02, 0x14, 0x10 }; + +/* + * AMT Watchdog Device + */ +#define INTEL_AMT_WATCHDOG_ID "INTCAMT" + +/* UUIDs for AMT F/W clients */ +const uuid_le mei_wd_guid = UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, 0x89, + 0x9D, 0xA9, 0x15, 0x14, 0xCB, + 0x32, 0xAB); + +static void mei_wd_set_start_timeout(struct mei_device *dev, u16 timeout) +{ + dev_dbg(dev->dev, "wd: set timeout=%d.\n", timeout); + memcpy(dev->wd_data, mei_start_wd_params, MEI_WD_HDR_SIZE); + memcpy(dev->wd_data + MEI_WD_HDR_SIZE, &timeout, sizeof(u16)); +} + +/** + * mei_wd_host_init - connect to the watchdog client + * + * @dev: the device structure + * + * Return: -ENOTTY if wd client cannot be found + * -EIO if write has failed + * 0 on success + */ +int mei_wd_host_init(struct mei_device *dev) +{ + struct mei_cl *cl = &dev->wd_cl; + struct mei_me_client *me_cl; + int ret; + + mei_cl_init(cl, dev); + + dev->wd_timeout = MEI_WD_DEFAULT_TIMEOUT; + dev->wd_state = MEI_WD_IDLE; + + + /* check for valid client id */ + me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid); + if (!me_cl) { + dev_info(dev->dev, "wd: failed to find the client\n"); + return -ENOTTY; + } + + cl->me_client_id = me_cl->client_id; + cl->cl_uuid = me_cl->props.protocol_name; + mei_me_cl_put(me_cl); + + ret = mei_cl_link(cl, MEI_WD_HOST_CLIENT_ID); + + if (ret < 0) { + dev_info(dev->dev, "wd: failed link client\n"); + return ret; + } + + ret = mei_cl_connect(cl, NULL); + + if (ret) { + dev_err(dev->dev, "wd: failed to connect = %d\n", ret); + mei_cl_unlink(cl); + return ret; + } + + ret = mei_watchdog_register(dev); + if (ret) { + mei_cl_disconnect(cl); + mei_cl_unlink(cl); + } + return ret; +} + +/** + * mei_wd_send - sends watch dog message to fw. + * + * @dev: the device structure + * + * Return: 0 if success, + * -EIO when message send fails + * -EINVAL when invalid message is to be sent + * -ENODEV on flow control failure + */ +int mei_wd_send(struct mei_device *dev) +{ + struct mei_cl *cl = &dev->wd_cl; + struct mei_msg_hdr hdr; + int ret; + + hdr.host_addr = cl->host_client_id; + hdr.me_addr = cl->me_client_id; + hdr.msg_complete = 1; + hdr.reserved = 0; + hdr.internal = 0; + + if (!memcmp(dev->wd_data, mei_start_wd_params, MEI_WD_HDR_SIZE)) + hdr.length = MEI_WD_START_MSG_SIZE; + else if (!memcmp(dev->wd_data, mei_stop_wd_params, MEI_WD_HDR_SIZE)) + hdr.length = MEI_WD_STOP_MSG_SIZE; + else { + dev_err(dev->dev, "wd: invalid message is to be sent, aborting\n"); + return -EINVAL; + } + + ret = mei_write_message(dev, &hdr, dev->wd_data); + if (ret) { + dev_err(dev->dev, "wd: write message failed\n"); + return ret; + } + + ret = mei_cl_flow_ctrl_reduce(cl); + if (ret) { + dev_err(dev->dev, "wd: flow_ctrl_reduce failed.\n"); + return ret; + } + + return 0; +} + +/** + * mei_wd_stop - sends watchdog stop message to fw. + * + * @dev: the device structure + * + * Return: 0 if success + * on error: + * -EIO when message send fails + * -EINVAL when invalid message is to be sent + * -ETIME on message timeout + */ +int mei_wd_stop(struct mei_device *dev) +{ + struct mei_cl *cl = &dev->wd_cl; + int ret; + + if (!mei_cl_is_connected(cl) || + dev->wd_state != MEI_WD_RUNNING) + return 0; + + memcpy(dev->wd_data, mei_stop_wd_params, MEI_WD_STOP_MSG_SIZE); + + dev->wd_state = MEI_WD_STOPPING; + + ret = mei_cl_flow_ctrl_creds(cl); + if (ret < 0) + goto err; + + if (ret && mei_hbuf_acquire(dev)) { + ret = mei_wd_send(dev); + if (ret) + goto err; + dev->wd_pending = false; + } else { + dev->wd_pending = true; + } + + mutex_unlock(&dev->device_lock); + + ret = wait_event_timeout(dev->wait_stop_wd, + dev->wd_state == MEI_WD_IDLE, + msecs_to_jiffies(MEI_WD_STOP_TIMEOUT)); + mutex_lock(&dev->device_lock); + if (dev->wd_state != MEI_WD_IDLE) { + /* timeout */ + ret = -ETIME; + dev_warn(dev->dev, "wd: stop failed to complete ret=%d\n", ret); + goto err; + } + dev_dbg(dev->dev, "wd: stop completed after %u msec\n", + MEI_WD_STOP_TIMEOUT - jiffies_to_msecs(ret)); + return 0; +err: + return ret; +} + +/** + * mei_wd_ops_start - wd start command from the watchdog core. + * + * @wd_dev: watchdog device struct + * + * Return: 0 if success, negative errno code for failure + */ +static int mei_wd_ops_start(struct watchdog_device *wd_dev) +{ + struct mei_device *dev; + struct mei_cl *cl; + int err = -ENODEV; + + dev = watchdog_get_drvdata(wd_dev); + if (!dev) + return -ENODEV; + + cl = &dev->wd_cl; + + mutex_lock(&dev->device_lock); + + if (dev->dev_state != MEI_DEV_ENABLED) { + dev_dbg(dev->dev, "wd: dev_state != MEI_DEV_ENABLED dev_state = %s\n", + mei_dev_state_str(dev->dev_state)); + goto end_unlock; + } + + if (!mei_cl_is_connected(cl)) { + cl_dbg(dev, cl, "MEI Driver is not connected to Watchdog Client\n"); + goto end_unlock; + } + + mei_wd_set_start_timeout(dev, dev->wd_timeout); + + err = 0; +end_unlock: + mutex_unlock(&dev->device_lock); + return err; +} + +/** + * mei_wd_ops_stop - wd stop command from the watchdog core. + * + * @wd_dev: watchdog device struct + * + * Return: 0 if success, negative errno code for failure + */ +static int mei_wd_ops_stop(struct watchdog_device *wd_dev) +{ + struct mei_device *dev; + + dev = watchdog_get_drvdata(wd_dev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + mei_wd_stop(dev); + mutex_unlock(&dev->device_lock); + + return 0; +} + +/** + * mei_wd_ops_ping - wd ping command from the watchdog core. + * + * @wd_dev: watchdog device struct + * + * Return: 0 if success, negative errno code for failure + */ +static int mei_wd_ops_ping(struct watchdog_device *wd_dev) +{ + struct mei_device *dev; + struct mei_cl *cl; + int ret; + + dev = watchdog_get_drvdata(wd_dev); + if (!dev) + return -ENODEV; + + cl = &dev->wd_cl; + + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + cl_err(dev, cl, "wd: not connected.\n"); + ret = -ENODEV; + goto end; + } + + dev->wd_state = MEI_WD_RUNNING; + + ret = mei_cl_flow_ctrl_creds(cl); + if (ret < 0) + goto end; + + /* Check if we can send the ping to HW*/ + if (ret && mei_hbuf_acquire(dev)) { + dev_dbg(dev->dev, "wd: sending ping\n"); + + ret = mei_wd_send(dev); + if (ret) + goto end; + dev->wd_pending = false; + } else { + dev->wd_pending = true; + } + +end: + mutex_unlock(&dev->device_lock); + return ret; +} + +/** + * mei_wd_ops_set_timeout - wd set timeout command from the watchdog core. + * + * @wd_dev: watchdog device struct + * @timeout: timeout value to set + * + * Return: 0 if success, negative errno code for failure + */ +static int mei_wd_ops_set_timeout(struct watchdog_device *wd_dev, + unsigned int timeout) +{ + struct mei_device *dev; + + dev = watchdog_get_drvdata(wd_dev); + if (!dev) + return -ENODEV; + + /* Check Timeout value */ + if (timeout < MEI_WD_MIN_TIMEOUT || timeout > MEI_WD_MAX_TIMEOUT) + return -EINVAL; + + mutex_lock(&dev->device_lock); + + dev->wd_timeout = timeout; + wd_dev->timeout = timeout; + mei_wd_set_start_timeout(dev, dev->wd_timeout); + + mutex_unlock(&dev->device_lock); + + return 0; +} + +/* + * Watchdog Device structs + */ +static const struct watchdog_ops wd_ops = { + .owner = THIS_MODULE, + .start = mei_wd_ops_start, + .stop = mei_wd_ops_stop, + .ping = mei_wd_ops_ping, + .set_timeout = mei_wd_ops_set_timeout, +}; +static const struct watchdog_info wd_info = { + .identity = INTEL_AMT_WATCHDOG_ID, + .options = WDIOF_KEEPALIVEPING | + WDIOF_SETTIMEOUT | + WDIOF_ALARMONLY, +}; + +static struct watchdog_device amt_wd_dev = { + .info = &wd_info, + .ops = &wd_ops, + .timeout = MEI_WD_DEFAULT_TIMEOUT, + .min_timeout = MEI_WD_MIN_TIMEOUT, + .max_timeout = MEI_WD_MAX_TIMEOUT, +}; + + +int mei_watchdog_register(struct mei_device *dev) +{ + + int ret; + + /* unlock to perserve correct locking order */ + mutex_unlock(&dev->device_lock); + ret = watchdog_register_device(&amt_wd_dev); + mutex_lock(&dev->device_lock); + if (ret) { + dev_err(dev->dev, "wd: unable to register watchdog device = %d.\n", + ret); + return ret; + } + + dev_dbg(dev->dev, "wd: successfully register watchdog interface.\n"); + watchdog_set_drvdata(&amt_wd_dev, dev); + return 0; +} + +void mei_watchdog_unregister(struct mei_device *dev) +{ + if (watchdog_get_drvdata(&amt_wd_dev) == NULL) + return; + + watchdog_set_drvdata(&amt_wd_dev, NULL); + watchdog_unregister_device(&amt_wd_dev); +} + diff --git a/kernel/drivers/misc/mic/Kconfig b/kernel/drivers/misc/mic/Kconfig new file mode 100644 index 000000000..cc4eef040 --- /dev/null +++ b/kernel/drivers/misc/mic/Kconfig @@ -0,0 +1,54 @@ +comment "Intel MIC Bus Driver" + +config INTEL_MIC_BUS + tristate "Intel MIC Bus Driver" + depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS + help + This option is selected by any driver which registers a + device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST, + CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc. + + If you are building a host/card kernel with an Intel MIC device + then say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. + +comment "Intel MIC Host Driver" + +config INTEL_MIC_HOST + tristate "Intel MIC Host Driver" + depends on 64BIT && PCI && X86 && INTEL_MIC_BUS + select VHOST_RING + help + This enables Host Driver support for the Intel Many Integrated + Core (MIC) family of PCIe form factor coprocessor devices that + run a 64 bit Linux OS. The driver manages card OS state and + enables communication between host and card. Intel MIC X100 + devices are currently supported. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. + +comment "Intel MIC Card Driver" + +config INTEL_MIC_CARD + tristate "Intel MIC Card Driver" + depends on 64BIT && X86 && INTEL_MIC_BUS + select VIRTIO + help + This enables card driver support for the Intel Many Integrated + Core (MIC) device family. The card driver communicates shutdown/ + crash events to the host and allows registration/configuration of + virtio devices. Intel MIC X100 devices are currently supported. + + If you are building a card kernel for an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + For more information see + <http://software.intel.com/en-us/mic-developer>. diff --git a/kernel/drivers/misc/mic/Makefile b/kernel/drivers/misc/mic/Makefile new file mode 100644 index 000000000..e9bf14875 --- /dev/null +++ b/kernel/drivers/misc/mic/Makefile @@ -0,0 +1,7 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2013, Intel Corporation. +# +obj-$(CONFIG_INTEL_MIC_HOST) += host/ +obj-$(CONFIG_INTEL_MIC_CARD) += card/ +obj-$(CONFIG_INTEL_MIC_BUS) += bus/ diff --git a/kernel/drivers/misc/mic/bus/Makefile b/kernel/drivers/misc/mic/bus/Makefile new file mode 100644 index 000000000..d85c7f2a0 --- /dev/null +++ b/kernel/drivers/misc/mic/bus/Makefile @@ -0,0 +1,5 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2014, Intel Corporation. +# +obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o diff --git a/kernel/drivers/misc/mic/bus/mic_bus.c b/kernel/drivers/misc/mic/bus/mic_bus.c new file mode 100644 index 000000000..961ae90aa --- /dev/null +++ b/kernel/drivers/misc/mic/bus/mic_bus.c @@ -0,0 +1,218 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Bus driver. + * + * This implementation is very similar to the the virtio bus driver + * implementation @ drivers/virtio/virtio.c + */ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/idr.h> +#include <linux/mic_bus.h> + +/* Unique numbering for mbus devices. */ +static DEFINE_IDA(mbus_index_ida); + +static ssize_t device_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct mbus_device *dev = dev_to_mbus(d); + return sprintf(buf, "0x%04x\n", dev->id.device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t vendor_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct mbus_device *dev = dev_to_mbus(d); + return sprintf(buf, "0x%04x\n", dev->id.vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t modalias_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct mbus_device *dev = dev_to_mbus(d); + return sprintf(buf, "mbus:d%08Xv%08X\n", + dev->id.device, dev->id.vendor); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *mbus_dev_attrs[] = { + &dev_attr_device.attr, + &dev_attr_vendor.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mbus_dev); + +static inline int mbus_id_match(const struct mbus_device *dev, + const struct mbus_device_id *id) +{ + if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID) + return 0; + + return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor; +} + +/* + * This looks through all the IDs a driver claims to support. If any of them + * match, we return 1 and the kernel will call mbus_dev_probe(). + */ +static int mbus_dev_match(struct device *dv, struct device_driver *dr) +{ + unsigned int i; + struct mbus_device *dev = dev_to_mbus(dv); + const struct mbus_device_id *ids; + + ids = drv_to_mbus(dr)->id_table; + for (i = 0; ids[i].device; i++) + if (mbus_id_match(dev, &ids[i])) + return 1; + return 0; +} + +static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env) +{ + struct mbus_device *dev = dev_to_mbus(dv); + + return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X", + dev->id.device, dev->id.vendor); +} + +static int mbus_dev_probe(struct device *d) +{ + int err; + struct mbus_device *dev = dev_to_mbus(d); + struct mbus_driver *drv = drv_to_mbus(dev->dev.driver); + + err = drv->probe(dev); + if (!err) + if (drv->scan) + drv->scan(dev); + return err; +} + +static int mbus_dev_remove(struct device *d) +{ + struct mbus_device *dev = dev_to_mbus(d); + struct mbus_driver *drv = drv_to_mbus(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type mic_bus = { + .name = "mic_bus", + .match = mbus_dev_match, + .dev_groups = mbus_dev_groups, + .uevent = mbus_uevent, + .probe = mbus_dev_probe, + .remove = mbus_dev_remove, +}; + +int mbus_register_driver(struct mbus_driver *driver) +{ + driver->driver.bus = &mic_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(mbus_register_driver); + +void mbus_unregister_driver(struct mbus_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(mbus_unregister_driver); + +static void mbus_release_dev(struct device *d) +{ + struct mbus_device *mbdev = dev_to_mbus(d); + kfree(mbdev); +} + +struct mbus_device * +mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, + struct mbus_hw_ops *hw_ops, void __iomem *mmio_va) +{ + int ret; + struct mbus_device *mbdev; + + mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL); + if (!mbdev) + return ERR_PTR(-ENOMEM); + + mbdev->mmio_va = mmio_va; + mbdev->dev.parent = pdev; + mbdev->id.device = id; + mbdev->id.vendor = MBUS_DEV_ANY_ID; + mbdev->dev.archdata.dma_ops = dma_ops; + mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask; + dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64)); + mbdev->dev.release = mbus_release_dev; + mbdev->hw_ops = hw_ops; + mbdev->dev.bus = &mic_bus; + + /* Assign a unique device index and hence name. */ + ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL); + if (ret < 0) + goto free_mbdev; + + mbdev->index = ret; + dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index); + /* + * device_register() causes the bus infrastructure to look for a + * matching driver. + */ + ret = device_register(&mbdev->dev); + if (ret) + goto ida_remove; + return mbdev; +ida_remove: + ida_simple_remove(&mbus_index_ida, mbdev->index); +free_mbdev: + kfree(mbdev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(mbus_register_device); + +void mbus_unregister_device(struct mbus_device *mbdev) +{ + int index = mbdev->index; /* save for after device release */ + + device_unregister(&mbdev->dev); + ida_simple_remove(&mbus_index_ida, index); +} +EXPORT_SYMBOL_GPL(mbus_unregister_device); + +static int __init mbus_init(void) +{ + return bus_register(&mic_bus); +} + +static void __exit mbus_exit(void) +{ + bus_unregister(&mic_bus); + ida_destroy(&mbus_index_ida); +} + +core_initcall(mbus_init); +module_exit(mbus_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC Bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/card/Makefile b/kernel/drivers/misc/mic/card/Makefile new file mode 100644 index 000000000..69d58bef9 --- /dev/null +++ b/kernel/drivers/misc/mic/card/Makefile @@ -0,0 +1,11 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2013, Intel Corporation. +# +ccflags-y += -DINTEL_MIC_CARD + +obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o +mic_card-y += mic_x100.o +mic_card-y += mic_device.o +mic_card-y += mic_debugfs.o +mic_card-y += mic_virtio.o diff --git a/kernel/drivers/misc/mic/card/mic_debugfs.c b/kernel/drivers/misc/mic/card/mic_debugfs.c new file mode 100644 index 000000000..421b3d791 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_debugfs.c @@ -0,0 +1,130 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/seq_file.h> +#include <linux/interrupt.h> +#include <linux/device.h> + +#include "../common/mic_dev.h" +#include "mic_device.h" + +/* Debugfs parent dir */ +static struct dentry *mic_dbg; + +/** + * mic_intr_test - Send interrupts to host. + */ +static int mic_intr_test(struct seq_file *s, void *unused) +{ + struct mic_driver *mdrv = s->private; + struct mic_device *mdev = &mdrv->mdev; + + mic_send_intr(mdev, 0); + msleep(1000); + mic_send_intr(mdev, 1); + msleep(1000); + mic_send_intr(mdev, 2); + msleep(1000); + mic_send_intr(mdev, 3); + msleep(1000); + + return 0; +} + +static int mic_intr_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_intr_test, inode->i_private); +} + +static int mic_intr_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations intr_test_ops = { + .owner = THIS_MODULE, + .open = mic_intr_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_intr_test_release +}; + +/** + * mic_create_card_debug_dir - Initialize MIC debugfs entries. + */ +void __init mic_create_card_debug_dir(struct mic_driver *mdrv) +{ + struct dentry *d; + + if (!mic_dbg) + return; + + mdrv->dbg_dir = debugfs_create_dir(mdrv->name, mic_dbg); + if (!mdrv->dbg_dir) { + dev_err(mdrv->dev, "Cant create dbg_dir %s\n", mdrv->name); + return; + } + + d = debugfs_create_file("intr_test", 0444, mdrv->dbg_dir, + mdrv, &intr_test_ops); + + if (!d) { + dev_err(mdrv->dev, + "Cant create dbg intr_test %s\n", mdrv->name); + return; + } +} + +/** + * mic_delete_card_debug_dir - Uninitialize MIC debugfs entries. + */ +void mic_delete_card_debug_dir(struct mic_driver *mdrv) +{ + if (!mdrv->dbg_dir) + return; + + debugfs_remove_recursive(mdrv->dbg_dir); +} + +/** + * mic_init_card_debugfs - Initialize global debugfs entry. + */ +void __init mic_init_card_debugfs(void) +{ + mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!mic_dbg) + pr_err("can't create debugfs dir\n"); +} + +/** + * mic_exit_card_debugfs - Uninitialize global debugfs entry + */ +void mic_exit_card_debugfs(void) +{ + debugfs_remove(mic_dbg); +} diff --git a/kernel/drivers/misc/mic/card/mic_device.c b/kernel/drivers/misc/mic/card/mic_device.c new file mode 100644 index 000000000..83819eee5 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_device.c @@ -0,0 +1,308 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/reboot.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_virtio.h" + +static struct mic_driver *g_drv; +static struct mic_irq *shutdown_cookie; + +static void mic_notify_host(u8 state) +{ + struct mic_driver *mdrv = g_drv; + struct mic_bootparam __iomem *bootparam = mdrv->dp; + + iowrite8(state, &bootparam->shutdown_status); + dev_dbg(mdrv->dev, "%s %d system_state %d\n", + __func__, __LINE__, state); + mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db)); +} + +static int mic_panic_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct mic_driver *mdrv = g_drv; + struct mic_bootparam __iomem *bootparam = mdrv->dp; + + iowrite8(-1, &bootparam->h2c_config_db); + iowrite8(-1, &bootparam->h2c_shutdown_db); + mic_notify_host(MIC_CRASHED); + return NOTIFY_DONE; +} + +static struct notifier_block mic_panic = { + .notifier_call = mic_panic_event, +}; + +static irqreturn_t mic_shutdown_isr(int irq, void *data) +{ + struct mic_driver *mdrv = g_drv; + struct mic_bootparam __iomem *bootparam = mdrv->dp; + + mic_ack_interrupt(&g_drv->mdev); + if (ioread8(&bootparam->shutdown_card)) + orderly_poweroff(true); + return IRQ_HANDLED; +} + +static int mic_shutdown_init(void) +{ + int rc = 0; + struct mic_driver *mdrv = g_drv; + struct mic_bootparam __iomem *bootparam = mdrv->dp; + int shutdown_db; + + shutdown_db = mic_next_card_db(); + shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL, + "Shutdown", mdrv, shutdown_db); + if (IS_ERR(shutdown_cookie)) + rc = PTR_ERR(shutdown_cookie); + else + iowrite8(shutdown_db, &bootparam->h2c_shutdown_db); + return rc; +} + +static void mic_shutdown_uninit(void) +{ + struct mic_driver *mdrv = g_drv; + struct mic_bootparam __iomem *bootparam = mdrv->dp; + + iowrite8(-1, &bootparam->h2c_shutdown_db); + mic_free_card_irq(shutdown_cookie, mdrv); +} + +static int __init mic_dp_init(void) +{ + struct mic_driver *mdrv = g_drv; + struct mic_device *mdev = &mdrv->mdev; + struct mic_bootparam __iomem *bootparam; + u64 lo, hi, dp_dma_addr; + u32 magic; + + lo = mic_read_spad(&mdrv->mdev, MIC_DPLO_SPAD); + hi = mic_read_spad(&mdrv->mdev, MIC_DPHI_SPAD); + + dp_dma_addr = lo | (hi << 32); + mdrv->dp = mic_card_map(mdev, dp_dma_addr, MIC_DP_SIZE); + if (!mdrv->dp) { + dev_err(mdrv->dev, "Cannot remap Aperture BAR\n"); + return -ENOMEM; + } + bootparam = mdrv->dp; + magic = ioread32(&bootparam->magic); + if (MIC_MAGIC != magic) { + dev_err(mdrv->dev, "bootparam magic mismatch 0x%x\n", magic); + return -EIO; + } + return 0; +} + +/* Uninitialize the device page */ +static void mic_dp_uninit(void) +{ + mic_card_unmap(&g_drv->mdev, g_drv->dp); +} + +/** + * mic_request_card_irq - request an irq. + * + * @handler: interrupt handler passed to request_threaded_irq. + * @thread_fn: thread fn. passed to request_threaded_irq. + * @name: The ASCII name of the callee requesting the irq. + * @data: private data that is returned back when calling the + * function handler. + * @index: The doorbell index of the requester. + * + * returns: The cookie that is transparent to the caller. Passed + * back when calling mic_free_irq. An appropriate error code + * is returned on failure. Caller needs to use IS_ERR(return_val) + * to check for failure and PTR_ERR(return_val) to obtained the + * error code. + * + */ +struct mic_irq * +mic_request_card_irq(irq_handler_t handler, + irq_handler_t thread_fn, const char *name, + void *data, int index) +{ + int rc = 0; + unsigned long cookie; + struct mic_driver *mdrv = g_drv; + + rc = request_threaded_irq(mic_db_to_irq(mdrv, index), handler, + thread_fn, 0, name, data); + if (rc) { + dev_err(mdrv->dev, "request_threaded_irq failed rc = %d\n", rc); + goto err; + } + mdrv->irq_info.irq_usage_count[index]++; + cookie = index; + return (struct mic_irq *)cookie; +err: + return ERR_PTR(rc); +} + +/** + * mic_free_card_irq - free irq. + * + * @cookie: cookie obtained during a successful call to mic_request_threaded_irq + * @data: private data specified by the calling function during the + * mic_request_threaded_irq + * + * returns: none. + */ +void mic_free_card_irq(struct mic_irq *cookie, void *data) +{ + int index; + struct mic_driver *mdrv = g_drv; + + index = (unsigned long)cookie & 0xFFFFU; + free_irq(mic_db_to_irq(mdrv, index), data); + mdrv->irq_info.irq_usage_count[index]--; +} + +/** + * mic_next_card_db - Get the doorbell with minimum usage count. + * + * Returns the irq index. + */ +int mic_next_card_db(void) +{ + int i; + int index = 0; + struct mic_driver *mdrv = g_drv; + + for (i = 0; i < mdrv->intr_info.num_intr; i++) { + if (mdrv->irq_info.irq_usage_count[i] < + mdrv->irq_info.irq_usage_count[index]) + index = i; + } + + return index; +} + +/** + * mic_init_irq - Initialize irq information. + * + * Returns 0 in success. Appropriate error code on failure. + */ +static int mic_init_irq(void) +{ + struct mic_driver *mdrv = g_drv; + + mdrv->irq_info.irq_usage_count = kzalloc((sizeof(u32) * + mdrv->intr_info.num_intr), + GFP_KERNEL); + if (!mdrv->irq_info.irq_usage_count) + return -ENOMEM; + return 0; +} + +/** + * mic_uninit_irq - Uninitialize irq information. + * + * None. + */ +static void mic_uninit_irq(void) +{ + struct mic_driver *mdrv = g_drv; + + kfree(mdrv->irq_info.irq_usage_count); +} + +/* + * mic_driver_init - MIC driver initialization tasks. + * + * Returns 0 in success. Appropriate error code on failure. + */ +int __init mic_driver_init(struct mic_driver *mdrv) +{ + int rc; + + g_drv = mdrv; + /* + * Unloading the card module is not supported. The MIC card module + * handles fundamental operations like host/card initiated shutdowns + * and informing the host about card crashes and cannot be unloaded. + */ + if (!try_module_get(mdrv->dev->driver->owner)) { + rc = -ENODEV; + goto done; + } + rc = mic_dp_init(); + if (rc) + goto put; + rc = mic_init_irq(); + if (rc) + goto dp_uninit; + rc = mic_shutdown_init(); + if (rc) + goto irq_uninit; + rc = mic_devices_init(mdrv); + if (rc) + goto shutdown_uninit; + mic_create_card_debug_dir(mdrv); + atomic_notifier_chain_register(&panic_notifier_list, &mic_panic); +done: + return rc; +shutdown_uninit: + mic_shutdown_uninit(); +irq_uninit: + mic_uninit_irq(); +dp_uninit: + mic_dp_uninit(); +put: + module_put(mdrv->dev->driver->owner); + return rc; +} + +/* + * mic_driver_uninit - MIC driver uninitialization tasks. + * + * Returns None + */ +void mic_driver_uninit(struct mic_driver *mdrv) +{ + mic_delete_card_debug_dir(mdrv); + mic_devices_uninit(mdrv); + /* + * Inform the host about the shutdown status i.e. poweroff/restart etc. + * The module cannot be unloaded so the only code path to call + * mic_devices_uninit(..) is the shutdown callback. + */ + mic_notify_host(system_state); + mic_shutdown_uninit(); + mic_uninit_irq(); + mic_dp_uninit(); + module_put(mdrv->dev->driver->owner); +} diff --git a/kernel/drivers/misc/mic/card/mic_device.h b/kernel/drivers/misc/mic/card/mic_device.h new file mode 100644 index 000000000..844be8fc9 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_device.h @@ -0,0 +1,139 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#ifndef _MIC_CARD_DEVICE_H_ +#define _MIC_CARD_DEVICE_H_ + +#include <linux/workqueue.h> +#include <linux/io.h> +#include <linux/irqreturn.h> +#include <linux/interrupt.h> +#include <linux/mic_bus.h> + +/** + * struct mic_intr_info - Contains h/w specific interrupt sources info + * + * @num_intr: The number of irqs available + */ +struct mic_intr_info { + u32 num_intr; +}; + +/** + * struct mic_irq_info - OS specific irq information + * + * @irq_usage_count: usage count array tracking the number of sources + * assigned for each irq. + */ +struct mic_irq_info { + int *irq_usage_count; +}; + +/** + * struct mic_device - MIC device information. + * + * @mmio: MMIO bar information. + */ +struct mic_device { + struct mic_mw mmio; +}; + +/** + * struct mic_driver - MIC card driver information. + * + * @name: Name for MIC driver. + * @dbg_dir: debugfs directory of this MIC device. + * @dev: The device backing this MIC. + * @dp: The pointer to the virtio device page. + * @mdev: MIC device information for the host. + * @hotplug_work: Hot plug work for adding/removing virtio devices. + * @irq_info: The OS specific irq information + * @intr_info: H/W specific interrupt information. + * @dma_mbdev: dma device on the MIC virtual bus. + */ +struct mic_driver { + char name[20]; + struct dentry *dbg_dir; + struct device *dev; + void __iomem *dp; + struct mic_device mdev; + struct work_struct hotplug_work; + struct mic_irq_info irq_info; + struct mic_intr_info intr_info; + struct mbus_device *dma_mbdev; +}; + +/** + * struct mic_irq - opaque pointer used as cookie + */ +struct mic_irq; + +/** + * mic_mmio_read - read from an MMIO register. + * @mw: MMIO register base virtual address. + * @offset: register offset. + * + * RETURNS: register value. + */ +static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset) +{ + return ioread32(mw->va + offset); +} + +/** + * mic_mmio_write - write to an MMIO register. + * @mw: MMIO register base virtual address. + * @val: the data value to put into the register + * @offset: register offset. + * + * RETURNS: none. + */ +static inline void +mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset) +{ + iowrite32(val, mw->va + offset); +} + +int mic_driver_init(struct mic_driver *mdrv); +void mic_driver_uninit(struct mic_driver *mdrv); +int mic_next_card_db(void); +struct mic_irq * +mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src); +void mic_free_card_irq(struct mic_irq *cookie, void *data); +u32 mic_read_spad(struct mic_device *mdev, unsigned int idx); +void mic_send_intr(struct mic_device *mdev, int doorbell); +int mic_db_to_irq(struct mic_driver *mdrv, int db); +u32 mic_ack_interrupt(struct mic_device *mdev); +void mic_hw_intr_init(struct mic_driver *mdrv); +void __iomem * +mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size); +void mic_card_unmap(struct mic_device *mdev, void __iomem *addr); +void __init mic_create_card_debug_dir(struct mic_driver *mdrv); +void mic_delete_card_debug_dir(struct mic_driver *mdrv); +void __init mic_init_card_debugfs(void); +void mic_exit_card_debugfs(void); +#endif diff --git a/kernel/drivers/misc/mic/card/mic_virtio.c b/kernel/drivers/misc/mic/card/mic_virtio.c new file mode 100644 index 000000000..e486a0c26 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_virtio.c @@ -0,0 +1,634 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Adapted from: + * + * virtio for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + * + * Intel MIC Card driver. + * + */ +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/virtio_config.h> + +#include "../common/mic_dev.h" +#include "mic_virtio.h" + +#define VIRTIO_SUBCODE_64 0x0D00 + +#define MIC_MAX_VRINGS 4 +struct mic_vdev { + struct virtio_device vdev; + struct mic_device_desc __iomem *desc; + struct mic_device_ctrl __iomem *dc; + struct mic_device *mdev; + void __iomem *vr[MIC_MAX_VRINGS]; + int used_size[MIC_MAX_VRINGS]; + struct completion reset_done; + struct mic_irq *virtio_cookie; + int c2h_vdev_db; +}; + +static struct mic_irq *virtio_config_cookie; +#define to_micvdev(vd) container_of(vd, struct mic_vdev, vdev) + +/* Helper API to obtain the parent of the virtio device */ +static inline struct device *mic_dev(struct mic_vdev *mvdev) +{ + return mvdev->vdev.dev.parent; +} + +/* This gets the device's feature bits. */ +static u64 mic_get_features(struct virtio_device *vdev) +{ + unsigned int i, bits; + u32 features = 0; + struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc; + u8 __iomem *in_features = mic_vq_features(desc); + int feature_len = ioread8(&desc->feature_len); + + bits = min_t(unsigned, feature_len, sizeof(features)) * 8; + for (i = 0; i < bits; i++) + if (ioread8(&in_features[i / 8]) & (BIT(i % 8))) + features |= BIT(i); + + return features; +} + +static int mic_finalize_features(struct virtio_device *vdev) +{ + unsigned int i, bits; + struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc; + u8 feature_len = ioread8(&desc->feature_len); + /* Second half of bitmap is features we accept. */ + u8 __iomem *out_features = + mic_vq_features(desc) + feature_len; + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + /* Make sure we don't have any features > 32 bits! */ + BUG_ON((u32)vdev->features != vdev->features); + + memset_io(out_features, 0, feature_len); + bits = min_t(unsigned, feature_len, + sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (__virtio_test_bit(vdev, i)) + iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)), + &out_features[i / 8]); + } + + return 0; +} + +/* + * Reading and writing elements in config space + */ +static void mic_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned len) +{ + struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc; + + if (offset + len > ioread8(&desc->config_len)) + return; + memcpy_fromio(buf, mic_vq_configspace(desc) + offset, len); +} + +static void mic_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned len) +{ + struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc; + + if (offset + len > ioread8(&desc->config_len)) + return; + memcpy_toio(mic_vq_configspace(desc) + offset, buf, len); +} + +/* + * The operations to get and set the status word just access the status + * field of the device descriptor. set_status also interrupts the host + * to tell about status changes. + */ +static u8 mic_get_status(struct virtio_device *vdev) +{ + return ioread8(&to_micvdev(vdev)->desc->status); +} + +static void mic_set_status(struct virtio_device *vdev, u8 status) +{ + struct mic_vdev *mvdev = to_micvdev(vdev); + if (!status) + return; + iowrite8(status, &mvdev->desc->status); + mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db); +} + +/* Inform host on a virtio device reset and wait for ack from host */ +static void mic_reset_inform_host(struct virtio_device *vdev) +{ + struct mic_vdev *mvdev = to_micvdev(vdev); + struct mic_device_ctrl __iomem *dc = mvdev->dc; + int retry; + + iowrite8(0, &dc->host_ack); + iowrite8(1, &dc->vdev_reset); + mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db); + + /* Wait till host completes all card accesses and acks the reset */ + for (retry = 100; retry--;) { + if (ioread8(&dc->host_ack)) + break; + msleep(100); + }; + + dev_dbg(mic_dev(mvdev), "%s: retry: %d\n", __func__, retry); + + /* Reset status to 0 in case we timed out */ + iowrite8(0, &mvdev->desc->status); +} + +static void mic_reset(struct virtio_device *vdev) +{ + struct mic_vdev *mvdev = to_micvdev(vdev); + + dev_dbg(mic_dev(mvdev), "%s: virtio id %d\n", + __func__, vdev->id.device); + + mic_reset_inform_host(vdev); + complete_all(&mvdev->reset_done); +} + +/* + * The virtio_ring code calls this API when it wants to notify the Host. + */ +static bool mic_notify(struct virtqueue *vq) +{ + struct mic_vdev *mvdev = vq->priv; + + mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db); + return true; +} + +static void mic_del_vq(struct virtqueue *vq, int n) +{ + struct mic_vdev *mvdev = to_micvdev(vq->vdev); + struct vring *vr = (struct vring *)(vq + 1); + + free_pages((unsigned long) vr->used, get_order(mvdev->used_size[n])); + vring_del_virtqueue(vq); + mic_card_unmap(mvdev->mdev, mvdev->vr[n]); + mvdev->vr[n] = NULL; +} + +static void mic_del_vqs(struct virtio_device *vdev) +{ + struct mic_vdev *mvdev = to_micvdev(vdev); + struct virtqueue *vq, *n; + int idx = 0; + + dev_dbg(mic_dev(mvdev), "%s\n", __func__); + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + mic_del_vq(vq, idx++); +} + +/* + * This routine will assign vring's allocated in host/io memory. Code in + * virtio_ring.c however continues to access this io memory as if it were local + * memory without io accessors. + */ +static struct virtqueue *mic_find_vq(struct virtio_device *vdev, + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name) +{ + struct mic_vdev *mvdev = to_micvdev(vdev); + struct mic_vqconfig __iomem *vqconfig; + struct mic_vqconfig config; + struct virtqueue *vq; + void __iomem *va; + struct _mic_vring_info __iomem *info; + void *used; + int vr_size, _vr_size, err, magic; + struct vring *vr; + u8 type = ioread8(&mvdev->desc->type); + + if (index >= ioread8(&mvdev->desc->num_vq)) + return ERR_PTR(-ENOENT); + + if (!name) + return ERR_PTR(-ENOENT); + + /* First assign the vring's allocated in host memory */ + vqconfig = mic_vq_config(mvdev->desc) + index; + memcpy_fromio(&config, vqconfig, sizeof(config)); + _vr_size = vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN); + vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info)); + va = mic_card_map(mvdev->mdev, le64_to_cpu(config.address), vr_size); + if (!va) + return ERR_PTR(-ENOMEM); + mvdev->vr[index] = va; + memset_io(va, 0x0, _vr_size); + vq = vring_new_virtqueue(index, le16_to_cpu(config.num), + MIC_VIRTIO_RING_ALIGN, vdev, false, + (void __force *)va, mic_notify, callback, + name); + if (!vq) { + err = -ENOMEM; + goto unmap; + } + info = va + _vr_size; + magic = ioread32(&info->magic); + + if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) { + err = -EIO; + goto unmap; + } + + /* Allocate and reassign used ring now */ + mvdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 + + sizeof(struct vring_used_elem) * + le16_to_cpu(config.num)); + used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(mvdev->used_size[index])); + if (!used) { + err = -ENOMEM; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto del_vq; + } + iowrite64(virt_to_phys(used), &vqconfig->used_address); + + /* + * To reassign the used ring here we are directly accessing + * struct vring_virtqueue which is a private data structure + * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in + * vring_new_virtqueue() would ensure that + * (&vq->vring == (struct vring *) (&vq->vq + 1)); + */ + vr = (struct vring *)(vq + 1); + vr->used = used; + + vq->priv = mvdev; + return vq; +del_vq: + vring_del_virtqueue(vq); +unmap: + mic_card_unmap(mvdev->mdev, mvdev->vr[index]); + return ERR_PTR(err); +} + +static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + struct mic_vdev *mvdev = to_micvdev(vdev); + struct mic_device_ctrl __iomem *dc = mvdev->dc; + int i, err, retry; + + /* We must have this many virtqueues. */ + if (nvqs > ioread8(&mvdev->desc->num_vq)) + return -ENOENT; + + for (i = 0; i < nvqs; ++i) { + dev_dbg(mic_dev(mvdev), "%s: %d: %s\n", + __func__, i, names[i]); + vqs[i] = mic_find_vq(vdev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto error; + } + } + + iowrite8(1, &dc->used_address_updated); + /* + * Send an interrupt to the host to inform it that used + * rings have been re-assigned. + */ + mic_send_intr(mvdev->mdev, mvdev->c2h_vdev_db); + for (retry = 100; retry--;) { + if (!ioread8(&dc->used_address_updated)) + break; + msleep(100); + }; + + dev_dbg(mic_dev(mvdev), "%s: retry: %d\n", __func__, retry); + if (!retry) { + err = -ENODEV; + goto error; + } + + return 0; +error: + mic_del_vqs(vdev); + return err; +} + +/* + * The config ops structure as defined by virtio config + */ +static struct virtio_config_ops mic_vq_config_ops = { + .get_features = mic_get_features, + .finalize_features = mic_finalize_features, + .get = mic_get, + .set = mic_set, + .get_status = mic_get_status, + .set_status = mic_set_status, + .reset = mic_reset, + .find_vqs = mic_find_vqs, + .del_vqs = mic_del_vqs, +}; + +static irqreturn_t +mic_virtio_intr_handler(int irq, void *data) +{ + struct mic_vdev *mvdev = data; + struct virtqueue *vq; + + mic_ack_interrupt(mvdev->mdev); + list_for_each_entry(vq, &mvdev->vdev.vqs, list) + vring_interrupt(0, vq); + + return IRQ_HANDLED; +} + +static void mic_virtio_release_dev(struct device *_d) +{ + /* + * No need for a release method similar to virtio PCI. + * Provide an empty one to avoid getting a warning from core. + */ +} + +/* + * adds a new device and register it with virtio + * appropriate drivers are loaded by the device model + */ +static int mic_add_device(struct mic_device_desc __iomem *d, + unsigned int offset, struct mic_driver *mdrv) +{ + struct mic_vdev *mvdev; + int ret; + int virtio_db; + u8 type = ioread8(&d->type); + + mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); + if (!mvdev) { + dev_err(mdrv->dev, "Cannot allocate mic dev %u type %u\n", + offset, type); + return -ENOMEM; + } + + mvdev->mdev = &mdrv->mdev; + mvdev->vdev.dev.parent = mdrv->dev; + mvdev->vdev.dev.release = mic_virtio_release_dev; + mvdev->vdev.id.device = type; + mvdev->vdev.config = &mic_vq_config_ops; + mvdev->desc = d; + mvdev->dc = (void __iomem *)d + mic_aligned_desc_size(d); + init_completion(&mvdev->reset_done); + + virtio_db = mic_next_card_db(); + mvdev->virtio_cookie = mic_request_card_irq(mic_virtio_intr_handler, + NULL, "virtio intr", mvdev, virtio_db); + if (IS_ERR(mvdev->virtio_cookie)) { + ret = PTR_ERR(mvdev->virtio_cookie); + goto kfree; + } + iowrite8((u8)virtio_db, &mvdev->dc->h2c_vdev_db); + mvdev->c2h_vdev_db = ioread8(&mvdev->dc->c2h_vdev_db); + + ret = register_virtio_device(&mvdev->vdev); + if (ret) { + dev_err(mic_dev(mvdev), + "Failed to register mic device %u type %u\n", + offset, type); + goto free_irq; + } + iowrite64((u64)mvdev, &mvdev->dc->vdev); + dev_dbg(mic_dev(mvdev), "%s: registered mic device %u type %u mvdev %p\n", + __func__, offset, type, mvdev); + + return 0; + +free_irq: + mic_free_card_irq(mvdev->virtio_cookie, mvdev); +kfree: + kfree(mvdev); + return ret; +} + +/* + * match for a mic device with a specific desc pointer + */ +static int mic_match_desc(struct device *dev, void *data) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + struct mic_vdev *mvdev = to_micvdev(vdev); + + return mvdev->desc == (void __iomem *)data; +} + +static void mic_handle_config_change(struct mic_device_desc __iomem *d, + unsigned int offset, struct mic_driver *mdrv) +{ + struct mic_device_ctrl __iomem *dc + = (void __iomem *)d + mic_aligned_desc_size(d); + struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev); + + if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED) + return; + + dev_dbg(mdrv->dev, "%s %d\n", __func__, __LINE__); + virtio_config_changed(&mvdev->vdev); + iowrite8(1, &dc->guest_ack); +} + +/* + * removes a virtio device if a hot remove event has been + * requested by the host. + */ +static int mic_remove_device(struct mic_device_desc __iomem *d, + unsigned int offset, struct mic_driver *mdrv) +{ + struct mic_device_ctrl __iomem *dc + = (void __iomem *)d + mic_aligned_desc_size(d); + struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev); + u8 status; + int ret = -1; + + if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) { + dev_dbg(mdrv->dev, + "%s %d config_change %d type %d mvdev %p\n", + __func__, __LINE__, + ioread8(&dc->config_change), ioread8(&d->type), mvdev); + + status = ioread8(&d->status); + reinit_completion(&mvdev->reset_done); + unregister_virtio_device(&mvdev->vdev); + mic_free_card_irq(mvdev->virtio_cookie, mvdev); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) + wait_for_completion(&mvdev->reset_done); + kfree(mvdev); + iowrite8(1, &dc->guest_ack); + dev_dbg(mdrv->dev, "%s %d guest_ack %d\n", + __func__, __LINE__, ioread8(&dc->guest_ack)); + ret = 0; + } + + return ret; +} + +#define REMOVE_DEVICES true + +static void mic_scan_devices(struct mic_driver *mdrv, bool remove) +{ + s8 type; + unsigned int i; + struct mic_device_desc __iomem *d; + struct mic_device_ctrl __iomem *dc; + struct device *dev; + int ret; + + for (i = sizeof(struct mic_bootparam); i < MIC_DP_SIZE; + i += mic_total_desc_size(d)) { + d = mdrv->dp + i; + dc = (void __iomem *)d + mic_aligned_desc_size(d); + /* + * This read barrier is paired with the corresponding write + * barrier on the host which is inserted before adding or + * removing a virtio device descriptor, by updating the type. + */ + rmb(); + type = ioread8(&d->type); + + /* end of list */ + if (type == 0) + break; + + if (type == -1) + continue; + + /* device already exists */ + dev = device_find_child(mdrv->dev, (void __force *)d, + mic_match_desc); + if (dev) { + if (remove) + iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE, + &dc->config_change); + put_device(dev); + mic_handle_config_change(d, i, mdrv); + ret = mic_remove_device(d, i, mdrv); + if (!ret && !remove) + iowrite8(-1, &d->type); + if (remove) { + iowrite8(0, &dc->config_change); + iowrite8(0, &dc->guest_ack); + } + continue; + } + + /* new device */ + dev_dbg(mdrv->dev, "%s %d Adding new virtio device %p\n", + __func__, __LINE__, d); + if (!remove) + mic_add_device(d, i, mdrv); + } +} + +/* + * mic_hotplug_device tries to find changes in the device page. + */ +static void mic_hotplug_devices(struct work_struct *work) +{ + struct mic_driver *mdrv = container_of(work, + struct mic_driver, hotplug_work); + + mic_scan_devices(mdrv, !REMOVE_DEVICES); +} + +/* + * Interrupt handler for hot plug/config changes etc. + */ +static irqreturn_t +mic_extint_handler(int irq, void *data) +{ + struct mic_driver *mdrv = (struct mic_driver *)data; + + dev_dbg(mdrv->dev, "%s %d hotplug work\n", + __func__, __LINE__); + mic_ack_interrupt(&mdrv->mdev); + schedule_work(&mdrv->hotplug_work); + return IRQ_HANDLED; +} + +/* + * Init function for virtio + */ +int mic_devices_init(struct mic_driver *mdrv) +{ + int rc; + struct mic_bootparam __iomem *bootparam; + int config_db; + + INIT_WORK(&mdrv->hotplug_work, mic_hotplug_devices); + mic_scan_devices(mdrv, !REMOVE_DEVICES); + + config_db = mic_next_card_db(); + virtio_config_cookie = mic_request_card_irq(mic_extint_handler, NULL, + "virtio_config_intr", mdrv, + config_db); + if (IS_ERR(virtio_config_cookie)) { + rc = PTR_ERR(virtio_config_cookie); + goto exit; + } + + bootparam = mdrv->dp; + iowrite8(config_db, &bootparam->h2c_config_db); + return 0; +exit: + return rc; +} + +/* + * Uninit function for virtio + */ +void mic_devices_uninit(struct mic_driver *mdrv) +{ + struct mic_bootparam __iomem *bootparam = mdrv->dp; + iowrite8(-1, &bootparam->h2c_config_db); + mic_free_card_irq(virtio_config_cookie, mdrv); + flush_work(&mdrv->hotplug_work); + mic_scan_devices(mdrv, REMOVE_DEVICES); +} diff --git a/kernel/drivers/misc/mic/card/mic_virtio.h b/kernel/drivers/misc/mic/card/mic_virtio.h new file mode 100644 index 000000000..d0407ba53 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_virtio.h @@ -0,0 +1,76 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#ifndef __MIC_CARD_VIRTIO_H +#define __MIC_CARD_VIRTIO_H + +#include <linux/mic_common.h> +#include "mic_device.h" + +/* + * 64 bit I/O access + */ +#ifndef ioread64 +#define ioread64 readq +#endif +#ifndef iowrite64 +#define iowrite64 writeq +#endif + +static inline unsigned mic_desc_size(struct mic_device_desc __iomem *desc) +{ + return sizeof(*desc) + + ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig) + + ioread8(&desc->feature_len) * 2 + + ioread8(&desc->config_len); +} + +static inline struct mic_vqconfig __iomem * +mic_vq_config(struct mic_device_desc __iomem *desc) +{ + return (struct mic_vqconfig __iomem *)(desc + 1); +} + +static inline __u8 __iomem * +mic_vq_features(struct mic_device_desc __iomem *desc) +{ + return (__u8 __iomem *)(mic_vq_config(desc) + ioread8(&desc->num_vq)); +} + +static inline __u8 __iomem * +mic_vq_configspace(struct mic_device_desc __iomem *desc) +{ + return mic_vq_features(desc) + ioread8(&desc->feature_len) * 2; +} +static inline unsigned mic_total_desc_size(struct mic_device_desc __iomem *desc) +{ + return mic_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl); +} + +int mic_devices_init(struct mic_driver *mdrv); +void mic_devices_uninit(struct mic_driver *mdrv); + +#endif diff --git a/kernel/drivers/misc/mic/card/mic_x100.c b/kernel/drivers/misc/mic/card/mic_x100.c new file mode 100644 index 000000000..e98e537d6 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_x100.c @@ -0,0 +1,305 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/platform_device.h> + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_x100.h" + +static const char mic_driver_name[] = "mic"; + +static struct mic_driver g_drv; + +/** + * mic_read_spad - read from the scratchpad register + * @mdev: pointer to mic_device instance + * @idx: index to scratchpad register, 0 based + * + * This function allows reading of the 32bit scratchpad register. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +u32 mic_read_spad(struct mic_device *mdev, unsigned int idx) +{ + return mic_mmio_read(&mdev->mmio, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SPAD0 + idx * 4); +} + +/** + * __mic_send_intr - Send interrupt to Host. + * @mdev: pointer to mic_device instance + * @doorbell: Doorbell number. + */ +void mic_send_intr(struct mic_device *mdev, int doorbell) +{ + struct mic_mw *mw = &mdev->mmio; + + if (doorbell > MIC_X100_MAX_DOORBELL_IDX) + return; + /* Ensure that the interrupt is ordered w.r.t previous stores. */ + wmb(); + mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT, + MIC_X100_SBOX_BASE_ADDRESS + + (MIC_X100_SBOX_SDBIC0 + (4 * doorbell))); +} + +/** + * mic_ack_interrupt - Device specific interrupt handling. + * @mdev: pointer to mic_device instance + * + * Returns: bitmask of doorbell events triggered. + */ +u32 mic_ack_interrupt(struct mic_device *mdev) +{ + return 0; +} + +static inline int mic_get_sbox_irq(int db) +{ + return MIC_X100_IRQ_BASE + db; +} + +static inline int mic_get_rdmasr_irq(int index) +{ + return MIC_X100_RDMASR_IRQ_BASE + index; +} + +/** + * mic_hw_intr_init - Initialize h/w specific interrupt + * information. + * @mdrv: pointer to mic_driver + */ +void mic_hw_intr_init(struct mic_driver *mdrv) +{ + mdrv->intr_info.num_intr = MIC_X100_NUM_SBOX_IRQ + + MIC_X100_NUM_RDMASR_IRQ; +} + +/** + * mic_db_to_irq - Retrieve irq number corresponding to a doorbell. + * @mdrv: pointer to mic_driver + * @db: The doorbell obtained for which the irq is needed. Doorbell + * may correspond to an sbox doorbell or an rdmasr index. + * + * Returns the irq corresponding to the doorbell. + */ +int mic_db_to_irq(struct mic_driver *mdrv, int db) +{ + int rdmasr_index; + if (db < MIC_X100_NUM_SBOX_IRQ) { + return mic_get_sbox_irq(db); + } else { + rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ + + MIC_X100_RDMASR_IRQ_BASE; + return mic_get_rdmasr_irq(rdmasr_index); + } +} + +/* + * mic_card_map - Allocate virtual address for a remote memory region. + * @mdev: pointer to mic_device instance. + * @addr: Remote DMA address. + * @size: Size of the region. + * + * Returns: Virtual address backing the remote memory region. + */ +void __iomem * +mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size) +{ + return ioremap(addr, size); +} + +/* + * mic_card_unmap - Unmap the virtual address for a remote memory region. + * @mdev: pointer to mic_device instance. + * @addr: Virtual address for remote memory region. + * + * Returns: None. + */ +void mic_card_unmap(struct mic_device *mdev, void __iomem *addr) +{ + iounmap(addr); +} + +static inline struct mic_driver *mbdev_to_mdrv(struct mbus_device *mbdev) +{ + return dev_get_drvdata(mbdev->dev.parent); +} + +static struct mic_irq * +_mic_request_threaded_irq(struct mbus_device *mbdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src) +{ + int rc = 0; + unsigned int irq = intr_src; + unsigned long cookie = irq; + + rc = request_threaded_irq(irq, handler, thread_fn, 0, name, data); + if (rc) { + dev_err(mbdev_to_mdrv(mbdev)->dev, + "request_threaded_irq failed rc = %d\n", rc); + return ERR_PTR(rc); + } + return (struct mic_irq *)cookie; +} + +static void _mic_free_irq(struct mbus_device *mbdev, + struct mic_irq *cookie, void *data) +{ + unsigned long irq = (unsigned long)cookie; + free_irq(irq, data); +} + +static void _mic_ack_interrupt(struct mbus_device *mbdev, int num) +{ + mic_ack_interrupt(&mbdev_to_mdrv(mbdev)->mdev); +} + +static struct mbus_hw_ops mbus_hw_ops = { + .request_threaded_irq = _mic_request_threaded_irq, + .free_irq = _mic_free_irq, + .ack_interrupt = _mic_ack_interrupt, +}; + +static int __init mic_probe(struct platform_device *pdev) +{ + struct mic_driver *mdrv = &g_drv; + struct mic_device *mdev = &mdrv->mdev; + int rc = 0; + + mdrv->dev = &pdev->dev; + snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name); + + mdev->mmio.pa = MIC_X100_MMIO_BASE; + mdev->mmio.len = MIC_X100_MMIO_LEN; + mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE, + MIC_X100_MMIO_LEN); + if (!mdev->mmio.va) { + dev_err(&pdev->dev, "Cannot remap MMIO BAR\n"); + rc = -EIO; + goto done; + } + mic_hw_intr_init(mdrv); + platform_set_drvdata(pdev, mdrv); + mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC, + NULL, &mbus_hw_ops, + mdrv->mdev.mmio.va); + if (IS_ERR(mdrv->dma_mbdev)) { + rc = PTR_ERR(mdrv->dma_mbdev); + dev_err(&pdev->dev, "mbus_add_device failed rc %d\n", rc); + goto done; + } + rc = mic_driver_init(mdrv); + if (rc) { + dev_err(&pdev->dev, "mic_driver_init failed rc %d\n", rc); + goto remove_dma; + } +done: + return rc; +remove_dma: + mbus_unregister_device(mdrv->dma_mbdev); + return rc; +} + +static int mic_remove(struct platform_device *pdev) +{ + struct mic_driver *mdrv = &g_drv; + + mic_driver_uninit(mdrv); + mbus_unregister_device(mdrv->dma_mbdev); + return 0; +} + +static void mic_platform_shutdown(struct platform_device *pdev) +{ + mic_remove(pdev); +} + +static struct platform_device mic_platform_dev = { + .name = mic_driver_name, + .id = 0, + .num_resources = 0, +}; + +static struct platform_driver __refdata mic_platform_driver = { + .probe = mic_probe, + .remove = mic_remove, + .shutdown = mic_platform_shutdown, + .driver = { + .name = mic_driver_name, + }, +}; + +static int __init mic_init(void) +{ + int ret; + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!(c->x86 == 11 && c->x86_model == 1)) { + ret = -ENODEV; + pr_err("%s not running on X100 ret %d\n", __func__, ret); + goto done; + } + + mic_init_card_debugfs(); + ret = platform_device_register(&mic_platform_dev); + if (ret) { + pr_err("platform_device_register ret %d\n", ret); + goto cleanup_debugfs; + } + ret = platform_driver_register(&mic_platform_driver); + if (ret) { + pr_err("platform_driver_register ret %d\n", ret); + goto device_unregister; + } + return ret; + +device_unregister: + platform_device_unregister(&mic_platform_dev); +cleanup_debugfs: + mic_exit_card_debugfs(); +done: + return ret; +} + +static void __exit mic_exit(void) +{ + platform_driver_unregister(&mic_platform_driver); + platform_device_unregister(&mic_platform_dev); + mic_exit_card_debugfs(); +} + +module_init(mic_init); +module_exit(mic_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC X100 Card driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/card/mic_x100.h b/kernel/drivers/misc/mic/card/mic_x100.h new file mode 100644 index 000000000..d66ea5563 --- /dev/null +++ b/kernel/drivers/misc/mic/card/mic_x100.h @@ -0,0 +1,48 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#ifndef _MIC_X100_CARD_H_ +#define _MIC_X100_CARD_H_ + +#define MIC_X100_MMIO_BASE 0x08007C0000ULL +#define MIC_X100_MMIO_LEN 0x00020000ULL +#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000ULL + +#define MIC_X100_SBOX_SPAD0 0x0000AB20 +#define MIC_X100_SBOX_SDBIC0 0x0000CC90 +#define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000 +#define MIC_X100_SBOX_RDMASR0 0x0000B180 + +#define MIC_X100_MAX_DOORBELL_IDX 8 + +#define MIC_X100_NUM_SBOX_IRQ 8 +#define MIC_X100_NUM_RDMASR_IRQ 8 +#define MIC_X100_SBOX_IRQ_BASE 0 +#define MIC_X100_RDMASR_IRQ_BASE 17 + +#define MIC_X100_IRQ_BASE 26 + +#endif diff --git a/kernel/drivers/misc/mic/common/mic_dev.h b/kernel/drivers/misc/mic/common/mic_dev.h new file mode 100644 index 000000000..92999c2bb --- /dev/null +++ b/kernel/drivers/misc/mic/common/mic_dev.h @@ -0,0 +1,51 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC driver. + * + */ +#ifndef __MIC_DEV_H__ +#define __MIC_DEV_H__ + +/** + * struct mic_mw - MIC memory window + * + * @pa: Base physical address. + * @va: Base ioremap'd virtual address. + * @len: Size of the memory window. + */ +struct mic_mw { + phys_addr_t pa; + void __iomem *va; + resource_size_t len; +}; + +/* + * Scratch pad register offsets used by the host to communicate + * device page DMA address to the card. + */ +#define MIC_DPLO_SPAD 14 +#define MIC_DPHI_SPAD 15 + +/* + * These values are supposed to be in the config_change field of the + * device page when the host sends a config change interrupt to the card. + */ +#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1 +#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2 + +#endif diff --git a/kernel/drivers/misc/mic/host/Makefile b/kernel/drivers/misc/mic/host/Makefile new file mode 100644 index 000000000..c2197f999 --- /dev/null +++ b/kernel/drivers/misc/mic/host/Makefile @@ -0,0 +1,14 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2013, Intel Corporation. +# +obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o +mic_host-objs := mic_main.o +mic_host-objs += mic_x100.o +mic_host-objs += mic_sysfs.o +mic_host-objs += mic_smpt.o +mic_host-objs += mic_intr.o +mic_host-objs += mic_boot.o +mic_host-objs += mic_debugfs.o +mic_host-objs += mic_fops.o +mic_host-objs += mic_virtio.o diff --git a/kernel/drivers/misc/mic/host/mic_boot.c b/kernel/drivers/misc/mic/host/mic_boot.c new file mode 100644 index 000000000..d9fa609da --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_boot.c @@ -0,0 +1,381 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/pci.h> + +#include <linux/mic_common.h> +#include <linux/mic_bus.h> +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" +#include "mic_virtio.h" + +static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev) +{ + return dev_get_drvdata(mbdev->dev.parent); +} + +static dma_addr_t +mic_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + void *va = phys_to_virt(page_to_phys(page)) + offset; + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + return mic_map_single(mdev, va, size); +} + +static void +mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + mic_unmap_single(mdev, dma_addr, size); +} + +static struct dma_map_ops mic_dma_ops = { + .map_page = mic_dma_map_page, + .unmap_page = mic_dma_unmap_page, +}; + +static struct mic_irq * +_mic_request_threaded_irq(struct mbus_device *mbdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src) +{ + return mic_request_threaded_irq(mbdev_to_mdev(mbdev), handler, + thread_fn, name, data, + intr_src, MIC_INTR_DMA); +} + +static void _mic_free_irq(struct mbus_device *mbdev, + struct mic_irq *cookie, void *data) +{ + return mic_free_irq(mbdev_to_mdev(mbdev), cookie, data); +} + +static void _mic_ack_interrupt(struct mbus_device *mbdev, int num) +{ + struct mic_device *mdev = mbdev_to_mdev(mbdev); + mdev->ops->intr_workarounds(mdev); +} + +static struct mbus_hw_ops mbus_hw_ops = { + .request_threaded_irq = _mic_request_threaded_irq, + .free_irq = _mic_free_irq, + .ack_interrupt = _mic_ack_interrupt, +}; + +/** + * mic_reset - Reset the MIC device. + * @mdev: pointer to mic_device instance + */ +static void mic_reset(struct mic_device *mdev) +{ + int i; + +#define MIC_RESET_TO (45) + + reinit_completion(&mdev->reset_wait); + mdev->ops->reset_fw_ready(mdev); + mdev->ops->reset(mdev); + + for (i = 0; i < MIC_RESET_TO; i++) { + if (mdev->ops->is_fw_ready(mdev)) + goto done; + /* + * Resets typically take 10s of seconds to complete. + * Since an MMIO read is required to check if the + * firmware is ready or not, a 1 second delay works nicely. + */ + msleep(1000); + } + mic_set_state(mdev, MIC_RESET_FAILED); +done: + complete_all(&mdev->reset_wait); +} + +/* Initialize the MIC bootparams */ +void mic_bootparam_init(struct mic_device *mdev) +{ + struct mic_bootparam *bootparam = mdev->dp; + + bootparam->magic = cpu_to_le32(MIC_MAGIC); + bootparam->c2h_shutdown_db = mdev->shutdown_db; + bootparam->h2c_shutdown_db = -1; + bootparam->h2c_config_db = -1; + bootparam->shutdown_status = 0; + bootparam->shutdown_card = 0; +} + +/** + * mic_start - Start the MIC. + * @mdev: pointer to mic_device instance + * @buf: buffer containing boot string including firmware/ramdisk path. + * + * This function prepares an MIC for boot and initiates boot. + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int mic_start(struct mic_device *mdev, const char *buf) +{ + int rc; + mutex_lock(&mdev->mic_mutex); +retry: + if (MIC_OFFLINE != mdev->state) { + rc = -EINVAL; + goto unlock_ret; + } + if (!mdev->ops->is_fw_ready(mdev)) { + mic_reset(mdev); + /* + * The state will either be MIC_OFFLINE if the reset succeeded + * or MIC_RESET_FAILED if the firmware reset failed. + */ + goto retry; + } + mdev->dma_mbdev = mbus_register_device(mdev->sdev->parent, + MBUS_DEV_DMA_HOST, &mic_dma_ops, + &mbus_hw_ops, mdev->mmio.va); + if (IS_ERR(mdev->dma_mbdev)) { + rc = PTR_ERR(mdev->dma_mbdev); + goto unlock_ret; + } + mdev->dma_ch = mic_request_dma_chan(mdev); + if (!mdev->dma_ch) { + rc = -ENXIO; + goto dma_remove; + } + rc = mdev->ops->load_mic_fw(mdev, buf); + if (rc) + goto dma_release; + mic_smpt_restore(mdev); + mic_intr_restore(mdev); + mdev->intr_ops->enable_interrupts(mdev); + mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); + mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); + mdev->ops->send_firmware_intr(mdev); + mic_set_state(mdev, MIC_ONLINE); + goto unlock_ret; +dma_release: + dma_release_channel(mdev->dma_ch); +dma_remove: + mbus_unregister_device(mdev->dma_mbdev); +unlock_ret: + mutex_unlock(&mdev->mic_mutex); + return rc; +} + +/** + * mic_stop - Prepare the MIC for reset and trigger reset. + * @mdev: pointer to mic_device instance + * @force: force a MIC to reset even if it is already offline. + * + * RETURNS: None. + */ +void mic_stop(struct mic_device *mdev, bool force) +{ + mutex_lock(&mdev->mic_mutex); + if (MIC_OFFLINE != mdev->state || force) { + mic_virtio_reset_devices(mdev); + if (mdev->dma_ch) { + dma_release_channel(mdev->dma_ch); + mdev->dma_ch = NULL; + } + mbus_unregister_device(mdev->dma_mbdev); + mic_bootparam_init(mdev); + mic_reset(mdev); + if (MIC_RESET_FAILED == mdev->state) + goto unlock; + mic_set_shutdown_status(mdev, MIC_NOP); + if (MIC_SUSPENDED != mdev->state) + mic_set_state(mdev, MIC_OFFLINE); + } +unlock: + mutex_unlock(&mdev->mic_mutex); +} + +/** + * mic_shutdown - Initiate MIC shutdown. + * @mdev: pointer to mic_device instance + * + * RETURNS: None. + */ +void mic_shutdown(struct mic_device *mdev) +{ + struct mic_bootparam *bootparam = mdev->dp; + s8 db = bootparam->h2c_shutdown_db; + + mutex_lock(&mdev->mic_mutex); + if (MIC_ONLINE == mdev->state && db != -1) { + bootparam->shutdown_card = 1; + mdev->ops->send_intr(mdev, db); + mic_set_state(mdev, MIC_SHUTTING_DOWN); + } + mutex_unlock(&mdev->mic_mutex); +} + +/** + * mic_shutdown_work - Handle shutdown interrupt from MIC. + * @work: The work structure. + * + * This work is scheduled whenever the host has received a shutdown + * interrupt from the MIC. + */ +void mic_shutdown_work(struct work_struct *work) +{ + struct mic_device *mdev = container_of(work, struct mic_device, + shutdown_work); + struct mic_bootparam *bootparam = mdev->dp; + + mutex_lock(&mdev->mic_mutex); + mic_set_shutdown_status(mdev, bootparam->shutdown_status); + bootparam->shutdown_status = 0; + + /* + * if state is MIC_SUSPENDED, OSPM suspend is in progress. We do not + * change the state here so as to prevent users from booting the card + * during and after the suspend operation. + */ + if (MIC_SHUTTING_DOWN != mdev->state && + MIC_SUSPENDED != mdev->state) + mic_set_state(mdev, MIC_SHUTTING_DOWN); + mutex_unlock(&mdev->mic_mutex); +} + +/** + * mic_reset_trigger_work - Trigger MIC reset. + * @work: The work structure. + * + * This work is scheduled whenever the host wants to reset the MIC. + */ +void mic_reset_trigger_work(struct work_struct *work) +{ + struct mic_device *mdev = container_of(work, struct mic_device, + reset_trigger_work); + + mic_stop(mdev, false); +} + +/** + * mic_complete_resume - Complete MIC Resume after an OSPM suspend/hibernate + * event. + * @mdev: pointer to mic_device instance + * + * RETURNS: None. + */ +void mic_complete_resume(struct mic_device *mdev) +{ + if (mdev->state != MIC_SUSPENDED) { + dev_warn(mdev->sdev->parent, "state %d should be %d\n", + mdev->state, MIC_SUSPENDED); + return; + } + + /* Make sure firmware is ready */ + if (!mdev->ops->is_fw_ready(mdev)) + mic_stop(mdev, true); + + mutex_lock(&mdev->mic_mutex); + mic_set_state(mdev, MIC_OFFLINE); + mutex_unlock(&mdev->mic_mutex); +} + +/** + * mic_prepare_suspend - Handle suspend notification for the MIC device. + * @mdev: pointer to mic_device instance + * + * RETURNS: None. + */ +void mic_prepare_suspend(struct mic_device *mdev) +{ + unsigned long timeout; + +#define MIC_SUSPEND_TIMEOUT (60 * HZ) + + mutex_lock(&mdev->mic_mutex); + switch (mdev->state) { + case MIC_OFFLINE: + /* + * Card is already offline. Set state to MIC_SUSPENDED + * to prevent users from booting the card. + */ + mic_set_state(mdev, MIC_SUSPENDED); + mutex_unlock(&mdev->mic_mutex); + break; + case MIC_ONLINE: + /* + * Card is online. Set state to MIC_SUSPENDING and notify + * MIC user space daemon which will issue card + * shutdown and reset. + */ + mic_set_state(mdev, MIC_SUSPENDING); + mutex_unlock(&mdev->mic_mutex); + timeout = wait_for_completion_timeout(&mdev->reset_wait, + MIC_SUSPEND_TIMEOUT); + /* Force reset the card if the shutdown completion timed out */ + if (!timeout) { + mutex_lock(&mdev->mic_mutex); + mic_set_state(mdev, MIC_SUSPENDED); + mutex_unlock(&mdev->mic_mutex); + mic_stop(mdev, true); + } + break; + case MIC_SHUTTING_DOWN: + /* + * Card is shutting down. Set state to MIC_SUSPENDED + * to prevent further boot of the card. + */ + mic_set_state(mdev, MIC_SUSPENDED); + mutex_unlock(&mdev->mic_mutex); + timeout = wait_for_completion_timeout(&mdev->reset_wait, + MIC_SUSPEND_TIMEOUT); + /* Force reset the card if the shutdown completion timed out */ + if (!timeout) + mic_stop(mdev, true); + break; + default: + mutex_unlock(&mdev->mic_mutex); + break; + } +} + +/** + * mic_suspend - Initiate MIC suspend. Suspend merely issues card shutdown. + * @mdev: pointer to mic_device instance + * + * RETURNS: None. + */ +void mic_suspend(struct mic_device *mdev) +{ + struct mic_bootparam *bootparam = mdev->dp; + s8 db = bootparam->h2c_shutdown_db; + + mutex_lock(&mdev->mic_mutex); + if (MIC_SUSPENDING == mdev->state && db != -1) { + bootparam->shutdown_card = 1; + mdev->ops->send_intr(mdev, db); + mic_set_state(mdev, MIC_SUSPENDED); + } + mutex_unlock(&mdev->mic_mutex); +} diff --git a/kernel/drivers/misc/mic/host/mic_debugfs.c b/kernel/drivers/misc/mic/host/mic_debugfs.c new file mode 100644 index 000000000..687e9aacf --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_debugfs.c @@ -0,0 +1,497 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/debugfs.h> +#include <linux/pci.h> +#include <linux/seq_file.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" +#include "mic_virtio.h" + +/* Debugfs parent dir */ +static struct dentry *mic_dbg; + +/** + * mic_log_buf_show - Display MIC kernel log buffer. + * + * log_buf addr/len is read from System.map by user space + * and populated in sysfs entries. + */ +static int mic_log_buf_show(struct seq_file *s, void *unused) +{ + void __iomem *log_buf_va; + int __iomem *log_buf_len_va; + struct mic_device *mdev = s->private; + void *kva; + int size; + unsigned long aper_offset; + + if (!mdev || !mdev->log_buf_addr || !mdev->log_buf_len) + goto done; + /* + * Card kernel will never be relocated and any kernel text/data mapping + * can be translated to phys address by subtracting __START_KERNEL_map. + */ + aper_offset = (unsigned long)mdev->log_buf_len - __START_KERNEL_map; + log_buf_len_va = mdev->aper.va + aper_offset; + aper_offset = (unsigned long)mdev->log_buf_addr - __START_KERNEL_map; + log_buf_va = mdev->aper.va + aper_offset; + size = ioread32(log_buf_len_va); + + kva = kmalloc(size, GFP_KERNEL); + if (!kva) + goto done; + mutex_lock(&mdev->mic_mutex); + memcpy_fromio(kva, log_buf_va, size); + switch (mdev->state) { + case MIC_ONLINE: + /* Fall through */ + case MIC_SHUTTING_DOWN: + seq_write(s, kva, size); + break; + default: + break; + } + mutex_unlock(&mdev->mic_mutex); + kfree(kva); +done: + return 0; +} + +static int mic_log_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_log_buf_show, inode->i_private); +} + +static int mic_log_buf_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations log_buf_ops = { + .owner = THIS_MODULE, + .open = mic_log_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_log_buf_release +}; + +static int mic_smpt_show(struct seq_file *s, void *pos) +{ + int i; + struct mic_device *mdev = s->private; + unsigned long flags; + + seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n", + mdev->id, "SMPT entry", "SW DMA addr", "RefCount"); + seq_puts(s, "====================================================\n"); + + if (mdev->smpt) { + struct mic_smpt_info *smpt_info = mdev->smpt; + spin_lock_irqsave(&smpt_info->smpt_lock, flags); + for (i = 0; i < smpt_info->info.num_reg; i++) { + seq_printf(s, "%9s|%-10d| %-#14llx %-10lld\n", + " ", i, smpt_info->entry[i].dma_addr, + smpt_info->entry[i].ref_count); + } + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + } + seq_puts(s, "====================================================\n"); + return 0; +} + +static int mic_smpt_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_smpt_show, inode->i_private); +} + +static int mic_smpt_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations smpt_file_ops = { + .owner = THIS_MODULE, + .open = mic_smpt_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_smpt_debug_release +}; + +static int mic_soft_reset_show(struct seq_file *s, void *pos) +{ + struct mic_device *mdev = s->private; + + mic_stop(mdev, true); + return 0; +} + +static int mic_soft_reset_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_soft_reset_show, inode->i_private); +} + +static int mic_soft_reset_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations soft_reset_ops = { + .owner = THIS_MODULE, + .open = mic_soft_reset_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_soft_reset_debug_release +}; + +static int mic_post_code_show(struct seq_file *s, void *pos) +{ + struct mic_device *mdev = s->private; + u32 reg = mdev->ops->get_postcode(mdev); + + seq_printf(s, "%c%c", reg & 0xff, (reg >> 8) & 0xff); + return 0; +} + +static int mic_post_code_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_post_code_show, inode->i_private); +} + +static int mic_post_code_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations post_code_ops = { + .owner = THIS_MODULE, + .open = mic_post_code_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_post_code_debug_release +}; + +static int mic_dp_show(struct seq_file *s, void *pos) +{ + struct mic_device *mdev = s->private; + struct mic_device_desc *d; + struct mic_device_ctrl *dc; + struct mic_vqconfig *vqconfig; + __u32 *features; + __u8 *config; + struct mic_bootparam *bootparam = mdev->dp; + int i, j; + + seq_printf(s, "Bootparam: magic 0x%x\n", + bootparam->magic); + seq_printf(s, "Bootparam: h2c_shutdown_db %d\n", + bootparam->h2c_shutdown_db); + seq_printf(s, "Bootparam: h2c_config_db %d\n", + bootparam->h2c_config_db); + seq_printf(s, "Bootparam: c2h_shutdown_db %d\n", + bootparam->c2h_shutdown_db); + seq_printf(s, "Bootparam: shutdown_status %d\n", + bootparam->shutdown_status); + seq_printf(s, "Bootparam: shutdown_card %d\n", + bootparam->shutdown_card); + + for (i = sizeof(*bootparam); i < MIC_DP_SIZE; + i += mic_total_desc_size(d)) { + d = mdev->dp + i; + dc = (void *)d + mic_aligned_desc_size(d); + + /* end of list */ + if (d->type == 0) + break; + + if (d->type == -1) + continue; + + seq_printf(s, "Type %d ", d->type); + seq_printf(s, "Num VQ %d ", d->num_vq); + seq_printf(s, "Feature Len %d\n", d->feature_len); + seq_printf(s, "Config Len %d ", d->config_len); + seq_printf(s, "Shutdown Status %d\n", d->status); + + for (j = 0; j < d->num_vq; j++) { + vqconfig = mic_vq_config(d) + j; + seq_printf(s, "vqconfig[%d]: ", j); + seq_printf(s, "address 0x%llx ", vqconfig->address); + seq_printf(s, "num %d ", vqconfig->num); + seq_printf(s, "used address 0x%llx\n", + vqconfig->used_address); + } + + features = (__u32 *)mic_vq_features(d); + seq_printf(s, "Features: Host 0x%x ", features[0]); + seq_printf(s, "Guest 0x%x\n", features[1]); + + config = mic_vq_configspace(d); + for (j = 0; j < d->config_len; j++) + seq_printf(s, "config[%d]=%d\n", j, config[j]); + + seq_puts(s, "Device control:\n"); + seq_printf(s, "Config Change %d ", dc->config_change); + seq_printf(s, "Vdev reset %d\n", dc->vdev_reset); + seq_printf(s, "Guest Ack %d ", dc->guest_ack); + seq_printf(s, "Host ack %d\n", dc->host_ack); + seq_printf(s, "Used address updated %d ", + dc->used_address_updated); + seq_printf(s, "Vdev 0x%llx\n", dc->vdev); + seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db); + seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db); + } + + return 0; +} + +static int mic_dp_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_dp_show, inode->i_private); +} + +static int mic_dp_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations dp_ops = { + .owner = THIS_MODULE, + .open = mic_dp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_dp_debug_release +}; + +static int mic_vdev_info_show(struct seq_file *s, void *unused) +{ + struct mic_device *mdev = s->private; + struct list_head *pos, *tmp; + struct mic_vdev *mvdev; + int i, j; + + mutex_lock(&mdev->mic_mutex); + list_for_each_safe(pos, tmp, &mdev->vdev_list) { + mvdev = list_entry(pos, struct mic_vdev, list); + seq_printf(s, "VDEV type %d state %s in %ld out %ld\n", + mvdev->virtio_id, + mic_vdevup(mvdev) ? "UP" : "DOWN", + mvdev->in_bytes, + mvdev->out_bytes); + for (i = 0; i < MIC_MAX_VRINGS; i++) { + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + struct mic_vringh *mvr = &mvdev->mvr[i]; + struct vringh *vrh = &mvr->vrh; + int num = vrh->vring.num; + if (!num) + continue; + desc = vrh->vring.desc; + seq_printf(s, "vring i %d avail_idx %d", + i, mvr->vring.info->avail_idx & (num - 1)); + seq_printf(s, " vring i %d avail_idx %d\n", + i, mvr->vring.info->avail_idx); + seq_printf(s, "vrh i %d weak_barriers %d", + i, vrh->weak_barriers); + seq_printf(s, " last_avail_idx %d last_used_idx %d", + vrh->last_avail_idx, vrh->last_used_idx); + seq_printf(s, " completed %d\n", vrh->completed); + for (j = 0; j < num; j++) { + seq_printf(s, "desc[%d] addr 0x%llx len %d", + j, desc->addr, desc->len); + seq_printf(s, " flags 0x%x next %d\n", + desc->flags, desc->next); + desc++; + } + avail = vrh->vring.avail; + seq_printf(s, "avail flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, avail->idx) & (num - 1)); + seq_printf(s, "avail flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, avail->idx)); + for (j = 0; j < num; j++) + seq_printf(s, "avail ring[%d] %d\n", + j, avail->ring[j]); + used = vrh->vring.used; + seq_printf(s, "used flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx) & (num - 1)); + seq_printf(s, "used flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx)); + for (j = 0; j < num; j++) + seq_printf(s, "used ring[%d] id %d len %d\n", + j, vringh32_to_cpu(vrh, + used->ring[j].id), + vringh32_to_cpu(vrh, + used->ring[j].len)); + } + } + mutex_unlock(&mdev->mic_mutex); + + return 0; +} + +static int mic_vdev_info_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_vdev_info_show, inode->i_private); +} + +static int mic_vdev_info_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations vdev_info_ops = { + .owner = THIS_MODULE, + .open = mic_vdev_info_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_vdev_info_debug_release +}; + +static int mic_msi_irq_info_show(struct seq_file *s, void *pos) +{ + struct mic_device *mdev = s->private; + int reg; + int i, j; + u16 entry; + u16 vector; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + + if (pci_dev_msi_enabled(pdev)) { + for (i = 0; i < mdev->irq_info.num_vectors; i++) { + if (pdev->msix_enabled) { + entry = mdev->irq_info.msix_entries[i].entry; + vector = mdev->irq_info.msix_entries[i].vector; + } else { + entry = 0; + vector = pdev->irq; + } + + reg = mdev->intr_ops->read_msi_to_src_map(mdev, entry); + + seq_printf(s, "%s %-10d %s %-10d MXAR[%d]: %08X\n", + "IRQ:", vector, "Entry:", entry, i, reg); + + seq_printf(s, "%-10s", "offset:"); + for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--) + seq_printf(s, "%4d ", j); + seq_puts(s, "\n"); + + + seq_printf(s, "%-10s", "count:"); + for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--) + seq_printf(s, "%4d ", + (mdev->irq_info.mic_msi_map[i] & + BIT(j)) ? 1 : 0); + seq_puts(s, "\n\n"); + } + } else { + seq_puts(s, "MSI/MSIx interrupts not enabled\n"); + } + + return 0; +} + +static int mic_msi_irq_info_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_msi_irq_info_show, inode->i_private); +} + +static int +mic_msi_irq_info_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations msi_irq_info_ops = { + .owner = THIS_MODULE, + .open = mic_msi_irq_info_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_msi_irq_info_debug_release +}; + +/** + * mic_create_debug_dir - Initialize MIC debugfs entries. + */ +void mic_create_debug_dir(struct mic_device *mdev) +{ + if (!mic_dbg) + return; + + mdev->dbg_dir = debugfs_create_dir(dev_name(mdev->sdev), mic_dbg); + if (!mdev->dbg_dir) + return; + + debugfs_create_file("log_buf", 0444, mdev->dbg_dir, mdev, &log_buf_ops); + + debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops); + + debugfs_create_file("soft_reset", 0444, mdev->dbg_dir, mdev, + &soft_reset_ops); + + debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev, + &post_code_ops); + + debugfs_create_file("dp", 0444, mdev->dbg_dir, mdev, &dp_ops); + + debugfs_create_file("vdev_info", 0444, mdev->dbg_dir, mdev, + &vdev_info_ops); + + debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, mdev, + &msi_irq_info_ops); +} + +/** + * mic_delete_debug_dir - Uninitialize MIC debugfs entries. + */ +void mic_delete_debug_dir(struct mic_device *mdev) +{ + if (!mdev->dbg_dir) + return; + + debugfs_remove_recursive(mdev->dbg_dir); +} + +/** + * mic_init_debugfs - Initialize global debugfs entry. + */ +void __init mic_init_debugfs(void) +{ + mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!mic_dbg) + pr_err("can't create debugfs dir\n"); +} + +/** + * mic_exit_debugfs - Uninitialize global debugfs entry + */ +void mic_exit_debugfs(void) +{ + debugfs_remove(mic_dbg); +} diff --git a/kernel/drivers/misc/mic/host/mic_device.h b/kernel/drivers/misc/mic/host/mic_device.h new file mode 100644 index 000000000..016bd15a7 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_device.h @@ -0,0 +1,231 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_DEVICE_H_ +#define _MIC_DEVICE_H_ + +#include <linux/cdev.h> +#include <linux/idr.h> +#include <linux/notifier.h> +#include <linux/irqreturn.h> +#include <linux/dmaengine.h> +#include <linux/mic_bus.h> + +#include "mic_intr.h" + +/* The maximum number of MIC devices supported in a single host system. */ +#define MIC_MAX_NUM_DEVS 256 + +/** + * enum mic_hw_family - The hardware family to which a device belongs. + */ +enum mic_hw_family { + MIC_FAMILY_X100 = 0, + MIC_FAMILY_UNKNOWN +}; + +/** + * enum mic_stepping - MIC stepping ids. + */ +enum mic_stepping { + MIC_A0_STEP = 0x0, + MIC_B0_STEP = 0x10, + MIC_B1_STEP = 0x11, + MIC_C0_STEP = 0x20, +}; + +/** + * struct mic_device - MIC device information for each card. + * + * @mmio: MMIO bar information. + * @aper: Aperture bar information. + * @family: The MIC family to which this device belongs. + * @ops: MIC HW specific operations. + * @id: The unique device id for this MIC device. + * @stepping: Stepping ID. + * @attr_group: Pointer to list of sysfs attribute groups. + * @sdev: Device for sysfs entries. + * @mic_mutex: Mutex for synchronizing access to mic_device. + * @intr_ops: HW specific interrupt operations. + * @smpt_ops: Hardware specific SMPT operations. + * @smpt: MIC SMPT information. + * @intr_info: H/W specific interrupt information. + * @irq_info: The OS specific irq information + * @dbg_dir: debugfs directory of this MIC device. + * @cmdline: Kernel command line. + * @firmware: Firmware file name. + * @ramdisk: Ramdisk file name. + * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates. + * @bootaddr: MIC boot address. + * @reset_trigger_work: Work for triggering reset requests. + * @shutdown_work: Work for handling shutdown interrupts. + * @state: MIC state. + * @shutdown_status: MIC status reported by card for shutdown/crashes. + * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes. + * @reset_wait: Waitqueue for sleeping while reset completes. + * @log_buf_addr: Log buffer address for MIC. + * @log_buf_len: Log buffer length address for MIC. + * @dp: virtio device page + * @dp_dma_addr: virtio device page DMA address. + * @shutdown_db: shutdown doorbell. + * @shutdown_cookie: shutdown cookie. + * @cdev: Character device for MIC. + * @vdev_list: list of virtio devices. + * @pm_notifier: Handles PM notifications from the OS. + * @dma_mbdev: MIC BUS DMA device. + * @dma_ch: DMA channel reserved by this driver for use by virtio devices. + */ +struct mic_device { + struct mic_mw mmio; + struct mic_mw aper; + enum mic_hw_family family; + struct mic_hw_ops *ops; + int id; + enum mic_stepping stepping; + const struct attribute_group **attr_group; + struct device *sdev; + struct mutex mic_mutex; + struct mic_hw_intr_ops *intr_ops; + struct mic_smpt_ops *smpt_ops; + struct mic_smpt_info *smpt; + struct mic_intr_info *intr_info; + struct mic_irq_info irq_info; + struct dentry *dbg_dir; + char *cmdline; + char *firmware; + char *ramdisk; + char *bootmode; + u32 bootaddr; + struct work_struct reset_trigger_work; + struct work_struct shutdown_work; + u8 state; + u8 shutdown_status; + struct kernfs_node *state_sysfs; + struct completion reset_wait; + void *log_buf_addr; + int *log_buf_len; + void *dp; + dma_addr_t dp_dma_addr; + int shutdown_db; + struct mic_irq *shutdown_cookie; + struct cdev cdev; + struct list_head vdev_list; + struct notifier_block pm_notifier; + struct mbus_device *dma_mbdev; + struct dma_chan *dma_ch; +}; + +/** + * struct mic_hw_ops - MIC HW specific operations. + * @aper_bar: Aperture bar resource number. + * @mmio_bar: MMIO bar resource number. + * @read_spad: Read from scratch pad register. + * @write_spad: Write to scratch pad register. + * @send_intr: Send an interrupt for a particular doorbell on the card. + * @ack_interrupt: Hardware specific operations to ack the h/w on + * receipt of an interrupt. + * @intr_workarounds: Hardware specific workarounds needed after + * handling an interrupt. + * @reset: Reset the remote processor. + * @reset_fw_ready: Reset firmware ready field. + * @is_fw_ready: Check if firmware is ready for OS download. + * @send_firmware_intr: Send an interrupt to the card firmware. + * @load_mic_fw: Load firmware segments required to boot the card + * into card memory. This includes the kernel, command line, ramdisk etc. + * @get_postcode: Get post code status from firmware. + * @dma_filter: DMA filter function to be used. + */ +struct mic_hw_ops { + u8 aper_bar; + u8 mmio_bar; + u32 (*read_spad)(struct mic_device *mdev, unsigned int idx); + void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val); + void (*send_intr)(struct mic_device *mdev, int doorbell); + u32 (*ack_interrupt)(struct mic_device *mdev); + void (*intr_workarounds)(struct mic_device *mdev); + void (*reset)(struct mic_device *mdev); + void (*reset_fw_ready)(struct mic_device *mdev); + bool (*is_fw_ready)(struct mic_device *mdev); + void (*send_firmware_intr)(struct mic_device *mdev); + int (*load_mic_fw)(struct mic_device *mdev, const char *buf); + u32 (*get_postcode)(struct mic_device *mdev); + bool (*dma_filter)(struct dma_chan *chan, void *param); +}; + +/** + * mic_mmio_read - read from an MMIO register. + * @mw: MMIO register base virtual address. + * @offset: register offset. + * + * RETURNS: register value. + */ +static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset) +{ + return ioread32(mw->va + offset); +} + +/** + * mic_mmio_write - write to an MMIO register. + * @mw: MMIO register base virtual address. + * @val: the data value to put into the register + * @offset: register offset. + * + * RETURNS: none. + */ +static inline void +mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset) +{ + iowrite32(val, mw->va + offset); +} + +static inline struct dma_chan *mic_request_dma_chan(struct mic_device *mdev) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + chan = dma_request_channel(mask, mdev->ops->dma_filter, + mdev->sdev->parent); + if (chan) + return chan; + dev_err(mdev->sdev->parent, "%s %d unable to acquire channel\n", + __func__, __LINE__); + return NULL; +} + +void mic_sysfs_init(struct mic_device *mdev); +int mic_start(struct mic_device *mdev, const char *buf); +void mic_stop(struct mic_device *mdev, bool force); +void mic_shutdown(struct mic_device *mdev); +void mic_reset_delayed_work(struct work_struct *work); +void mic_reset_trigger_work(struct work_struct *work); +void mic_shutdown_work(struct work_struct *work); +void mic_bootparam_init(struct mic_device *mdev); +void mic_set_state(struct mic_device *mdev, u8 state); +void mic_set_shutdown_status(struct mic_device *mdev, u8 status); +void mic_create_debug_dir(struct mic_device *dev); +void mic_delete_debug_dir(struct mic_device *dev); +void __init mic_init_debugfs(void); +void mic_exit_debugfs(void); +void mic_prepare_suspend(struct mic_device *mdev); +void mic_complete_resume(struct mic_device *mdev); +void mic_suspend(struct mic_device *mdev); +#endif diff --git a/kernel/drivers/misc/mic/host/mic_fops.c b/kernel/drivers/misc/mic/host/mic_fops.c new file mode 100644 index 000000000..85776d732 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_fops.c @@ -0,0 +1,222 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/poll.h> +#include <linux/pci.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_fops.h" +#include "mic_virtio.h" + +int mic_open(struct inode *inode, struct file *f) +{ + struct mic_vdev *mvdev; + struct mic_device *mdev = container_of(inode->i_cdev, + struct mic_device, cdev); + + mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); + if (!mvdev) + return -ENOMEM; + + init_waitqueue_head(&mvdev->waitq); + INIT_LIST_HEAD(&mvdev->list); + mvdev->mdev = mdev; + mvdev->virtio_id = -1; + + f->private_data = mvdev; + return 0; +} + +int mic_release(struct inode *inode, struct file *f) +{ + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; + + if (-1 != mvdev->virtio_id) + mic_virtio_del_device(mvdev); + f->private_data = NULL; + kfree(mvdev); + return 0; +} + +long mic_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; + void __user *argp = (void __user *)arg; + int ret; + + switch (cmd) { + case MIC_VIRTIO_ADD_DEVICE: + { + ret = mic_virtio_add_device(mvdev, argp); + if (ret < 0) { + dev_err(mic_dev(mvdev), + "%s %d errno ret %d\n", + __func__, __LINE__, ret); + return ret; + } + break; + } + case MIC_VIRTIO_COPY_DESC: + { + struct mic_copy_desc copy; + + ret = mic_vdev_inited(mvdev); + if (ret) + return ret; + + if (copy_from_user(©, argp, sizeof(copy))) + return -EFAULT; + + dev_dbg(mic_dev(mvdev), + "%s %d === iovcnt 0x%x vr_idx 0x%x update_used %d\n", + __func__, __LINE__, copy.iovcnt, copy.vr_idx, + copy.update_used); + + ret = mic_virtio_copy_desc(mvdev, ©); + if (ret < 0) { + dev_err(mic_dev(mvdev), + "%s %d errno ret %d\n", + __func__, __LINE__, ret); + return ret; + } + if (copy_to_user( + &((struct mic_copy_desc __user *)argp)->out_len, + ©.out_len, sizeof(copy.out_len))) { + dev_err(mic_dev(mvdev), "%s %d errno ret %d\n", + __func__, __LINE__, -EFAULT); + return -EFAULT; + } + break; + } + case MIC_VIRTIO_CONFIG_CHANGE: + { + ret = mic_vdev_inited(mvdev); + if (ret) + return ret; + + ret = mic_virtio_config_change(mvdev, argp); + if (ret < 0) { + dev_err(mic_dev(mvdev), + "%s %d errno ret %d\n", + __func__, __LINE__, ret); + return ret; + } + break; + } + default: + return -ENOIOCTLCMD; + }; + return 0; +} + +/* + * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and + * not when previously enqueued buffers may be available. This means that + * in the card->host (TX) path, when userspace is unblocked by poll it + * must drain all available descriptors or it can stall. + */ +unsigned int mic_poll(struct file *f, poll_table *wait) +{ + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; + int mask = 0; + + poll_wait(f, &mvdev->waitq, wait); + + if (mic_vdev_inited(mvdev)) { + mask = POLLERR; + } else if (mvdev->poll_wake) { + mvdev->poll_wake = 0; + mask = POLLIN | POLLOUT; + } + + return mask; +} + +static inline int +mic_query_offset(struct mic_vdev *mvdev, unsigned long offset, + unsigned long *size, unsigned long *pa) +{ + struct mic_device *mdev = mvdev->mdev; + unsigned long start = MIC_DP_SIZE; + int i; + + /* + * MMAP interface is as follows: + * offset region + * 0x0 virtio device_page + * 0x1000 first vring + * 0x1000 + size of 1st vring second vring + * .... + */ + if (!offset) { + *pa = virt_to_phys(mdev->dp); + *size = MIC_DP_SIZE; + return 0; + } + + for (i = 0; i < mvdev->dd->num_vq; i++) { + struct mic_vringh *mvr = &mvdev->mvr[i]; + if (offset == start) { + *pa = virt_to_phys(mvr->vring.va); + *size = mvr->vring.len; + return 0; + } + start += mvr->vring.len; + } + return -1; +} + +/* + * Maps the device page and virtio rings to user space for readonly access. + */ +int +mic_mmap(struct file *f, struct vm_area_struct *vma) +{ + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; + int i, err; + + err = mic_vdev_inited(mvdev); + if (err) + return err; + + if (vma->vm_flags & VM_WRITE) + return -EACCES; + + while (size_rem) { + i = mic_query_offset(mvdev, offset, &size, &pa); + if (i < 0) + return -EINVAL; + err = remap_pfn_range(vma, vma->vm_start + offset, + pa >> PAGE_SHIFT, size, vma->vm_page_prot); + if (err) + return err; + dev_dbg(mic_dev(mvdev), + "%s %d type %d size 0x%lx off 0x%lx pa 0x%lx vma 0x%lx\n", + __func__, __LINE__, mvdev->virtio_id, size, offset, + pa, vma->vm_start + offset); + size_rem -= size; + offset += size; + } + return 0; +} diff --git a/kernel/drivers/misc/mic/host/mic_fops.h b/kernel/drivers/misc/mic/host/mic_fops.h new file mode 100644 index 000000000..dc3893dff --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_fops.h @@ -0,0 +1,32 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_FOPS_H_ +#define _MIC_FOPS_H_ + +int mic_open(struct inode *inode, struct file *filp); +int mic_release(struct inode *inode, struct file *filp); +ssize_t mic_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos); +long mic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +int mic_mmap(struct file *f, struct vm_area_struct *vma); +unsigned int mic_poll(struct file *f, poll_table *wait); + +#endif diff --git a/kernel/drivers/misc/mic/host/mic_intr.c b/kernel/drivers/misc/mic/host/mic_intr.c new file mode 100644 index 000000000..b4ca6c884 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_intr.c @@ -0,0 +1,651 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/pci.h> +#include <linux/interrupt.h> + +#include "../common/mic_dev.h" +#include "mic_device.h" + +static irqreturn_t mic_thread_fn(int irq, void *dev) +{ + struct mic_device *mdev = dev; + struct mic_intr_info *intr_info = mdev->intr_info; + struct mic_irq_info *irq_info = &mdev->irq_info; + struct mic_intr_cb *intr_cb; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + int i; + + spin_lock(&irq_info->mic_thread_lock); + for (i = intr_info->intr_start_idx[MIC_INTR_DB]; + i < intr_info->intr_len[MIC_INTR_DB]; i++) + if (test_and_clear_bit(i, &irq_info->mask)) { + list_for_each_entry(intr_cb, &irq_info->cb_list[i], + list) + if (intr_cb->thread_fn) + intr_cb->thread_fn(pdev->irq, + intr_cb->data); + } + spin_unlock(&irq_info->mic_thread_lock); + return IRQ_HANDLED; +} +/** + * mic_interrupt - Generic interrupt handler for + * MSI and INTx based interrupts. + */ +static irqreturn_t mic_interrupt(int irq, void *dev) +{ + struct mic_device *mdev = dev; + struct mic_intr_info *intr_info = mdev->intr_info; + struct mic_irq_info *irq_info = &mdev->irq_info; + struct mic_intr_cb *intr_cb; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + u32 mask; + int i; + + mask = mdev->ops->ack_interrupt(mdev); + if (!mask) + return IRQ_NONE; + + spin_lock(&irq_info->mic_intr_lock); + for (i = intr_info->intr_start_idx[MIC_INTR_DB]; + i < intr_info->intr_len[MIC_INTR_DB]; i++) + if (mask & BIT(i)) { + list_for_each_entry(intr_cb, &irq_info->cb_list[i], + list) + if (intr_cb->handler) + intr_cb->handler(pdev->irq, + intr_cb->data); + set_bit(i, &irq_info->mask); + } + spin_unlock(&irq_info->mic_intr_lock); + return IRQ_WAKE_THREAD; +} + +/* Return the interrupt offset from the index. Index is 0 based. */ +static u16 mic_map_src_to_offset(struct mic_device *mdev, + int intr_src, enum mic_intr_type type) +{ + if (type >= MIC_NUM_INTR_TYPES) + return MIC_NUM_OFFSETS; + if (intr_src >= mdev->intr_info->intr_len[type]) + return MIC_NUM_OFFSETS; + + return mdev->intr_info->intr_start_idx[type] + intr_src; +} + +/* Return next available msix_entry. */ +static struct msix_entry *mic_get_available_vector(struct mic_device *mdev) +{ + int i; + struct mic_irq_info *info = &mdev->irq_info; + + for (i = 0; i < info->num_vectors; i++) + if (!info->mic_msi_map[i]) + return &info->msix_entries[i]; + return NULL; +} + +/** + * mic_register_intr_callback - Register a callback handler for the + * given source id. + * + * @mdev: pointer to the mic_device instance + * @idx: The source id to be registered. + * @handler: The function to be called when the source id receives + * the interrupt. + * @thread_fn: thread fn. corresponding to the handler + * @data: Private data of the requester. + * Return the callback structure that was registered or an + * appropriate error on failure. + */ +static struct mic_intr_cb *mic_register_intr_callback(struct mic_device *mdev, + u8 idx, irq_handler_t handler, irq_handler_t thread_fn, + void *data) +{ + struct mic_intr_cb *intr_cb; + unsigned long flags; + int rc; + intr_cb = kmalloc(sizeof(*intr_cb), GFP_KERNEL); + + if (!intr_cb) + return ERR_PTR(-ENOMEM); + + intr_cb->handler = handler; + intr_cb->thread_fn = thread_fn; + intr_cb->data = data; + intr_cb->cb_id = ida_simple_get(&mdev->irq_info.cb_ida, + 0, 0, GFP_KERNEL); + if (intr_cb->cb_id < 0) { + rc = intr_cb->cb_id; + goto ida_fail; + } + + spin_lock(&mdev->irq_info.mic_thread_lock); + spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); + list_add_tail(&intr_cb->list, &mdev->irq_info.cb_list[idx]); + spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + + return intr_cb; +ida_fail: + kfree(intr_cb); + return ERR_PTR(rc); +} + +/** + * mic_unregister_intr_callback - Unregister the callback handler + * identified by its callback id. + * + * @mdev: pointer to the mic_device instance + * @idx: The callback structure id to be unregistered. + * Return the source id that was unregistered or MIC_NUM_OFFSETS if no + * such callback handler was found. + */ +static u8 mic_unregister_intr_callback(struct mic_device *mdev, u32 idx) +{ + struct list_head *pos, *tmp; + struct mic_intr_cb *intr_cb; + unsigned long flags; + int i; + + spin_lock(&mdev->irq_info.mic_thread_lock); + spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); + for (i = 0; i < MIC_NUM_OFFSETS; i++) { + list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) { + intr_cb = list_entry(pos, struct mic_intr_cb, list); + if (intr_cb->cb_id == idx) { + list_del(pos); + ida_simple_remove(&mdev->irq_info.cb_ida, + intr_cb->cb_id); + kfree(intr_cb); + spin_unlock_irqrestore( + &mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + return i; + } + } + } + spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + return MIC_NUM_OFFSETS; +} + +/** + * mic_setup_msix - Initializes MSIx interrupts. + * + * @mdev: pointer to mic_device instance + * + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc, i; + int entry_size = sizeof(*mdev->irq_info.msix_entries); + + mdev->irq_info.msix_entries = kmalloc_array(MIC_MIN_MSIX, + entry_size, GFP_KERNEL); + if (!mdev->irq_info.msix_entries) { + rc = -ENOMEM; + goto err_nomem1; + } + + for (i = 0; i < MIC_MIN_MSIX; i++) + mdev->irq_info.msix_entries[i].entry = i; + + rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries, + MIC_MIN_MSIX); + if (rc) { + dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc); + goto err_enable_msix; + } + + mdev->irq_info.num_vectors = MIC_MIN_MSIX; + mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) * + mdev->irq_info.num_vectors), GFP_KERNEL); + + if (!mdev->irq_info.mic_msi_map) { + rc = -ENOMEM; + goto err_nomem2; + } + + dev_dbg(mdev->sdev->parent, + "%d MSIx irqs setup\n", mdev->irq_info.num_vectors); + return 0; +err_nomem2: + pci_disable_msix(pdev); +err_enable_msix: + kfree(mdev->irq_info.msix_entries); +err_nomem1: + mdev->irq_info.num_vectors = 0; + return rc; +} + +/** + * mic_setup_callbacks - Initialize data structures needed + * to handle callbacks. + * + * @mdev: pointer to mic_device instance + */ +static int mic_setup_callbacks(struct mic_device *mdev) +{ + int i; + + mdev->irq_info.cb_list = kmalloc_array(MIC_NUM_OFFSETS, + sizeof(*mdev->irq_info.cb_list), + GFP_KERNEL); + if (!mdev->irq_info.cb_list) + return -ENOMEM; + + for (i = 0; i < MIC_NUM_OFFSETS; i++) + INIT_LIST_HEAD(&mdev->irq_info.cb_list[i]); + ida_init(&mdev->irq_info.cb_ida); + spin_lock_init(&mdev->irq_info.mic_intr_lock); + spin_lock_init(&mdev->irq_info.mic_thread_lock); + return 0; +} + +/** + * mic_release_callbacks - Uninitialize data structures needed + * to handle callbacks. + * + * @mdev: pointer to mic_device instance + */ +static void mic_release_callbacks(struct mic_device *mdev) +{ + unsigned long flags; + struct list_head *pos, *tmp; + struct mic_intr_cb *intr_cb; + int i; + + spin_lock(&mdev->irq_info.mic_thread_lock); + spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); + for (i = 0; i < MIC_NUM_OFFSETS; i++) { + + if (list_empty(&mdev->irq_info.cb_list[i])) + break; + + list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) { + intr_cb = list_entry(pos, struct mic_intr_cb, list); + list_del(pos); + ida_simple_remove(&mdev->irq_info.cb_ida, + intr_cb->cb_id); + kfree(intr_cb); + } + } + spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + ida_destroy(&mdev->irq_info.cb_ida); + kfree(mdev->irq_info.cb_list); +} + +/** + * mic_setup_msi - Initializes MSI interrupts. + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int mic_setup_msi(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + rc = pci_enable_msi(pdev); + if (rc) { + dev_dbg(&pdev->dev, "Error enabling MSI. rc = %d\n", rc); + return rc; + } + + mdev->irq_info.num_vectors = 1; + mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) * + mdev->irq_info.num_vectors), GFP_KERNEL); + + if (!mdev->irq_info.mic_msi_map) { + rc = -ENOMEM; + goto err_nomem1; + } + + rc = mic_setup_callbacks(mdev); + if (rc) { + dev_err(&pdev->dev, "Error setting up callbacks\n"); + goto err_nomem2; + } + + rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn, + 0, "mic-msi", mdev); + if (rc) { + dev_err(&pdev->dev, "Error allocating MSI interrupt\n"); + goto err_irq_req_fail; + } + + dev_dbg(&pdev->dev, "%d MSI irqs setup\n", mdev->irq_info.num_vectors); + return 0; +err_irq_req_fail: + mic_release_callbacks(mdev); +err_nomem2: + kfree(mdev->irq_info.mic_msi_map); +err_nomem1: + pci_disable_msi(pdev); + mdev->irq_info.num_vectors = 0; + return rc; +} + +/** + * mic_setup_intx - Initializes legacy interrupts. + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int mic_setup_intx(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + /* Enable intx */ + pci_intx(pdev, 1); + rc = mic_setup_callbacks(mdev); + if (rc) { + dev_err(&pdev->dev, "Error setting up callbacks\n"); + goto err_nomem; + } + + rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn, + IRQF_SHARED, "mic-intx", mdev); + if (rc) + goto err; + + dev_dbg(&pdev->dev, "intx irq setup\n"); + return 0; +err: + mic_release_callbacks(mdev); +err_nomem: + return rc; +} + +/** + * mic_next_db - Retrieve the next doorbell interrupt source id. + * The id is picked sequentially from the available pool of + * doorlbell ids. + * + * @mdev: pointer to the mic_device instance. + * + * Returns the next doorbell interrupt source. + */ +int mic_next_db(struct mic_device *mdev) +{ + int next_db; + + next_db = mdev->irq_info.next_avail_src % + mdev->intr_info->intr_len[MIC_INTR_DB]; + mdev->irq_info.next_avail_src++; + return next_db; +} + +#define COOKIE_ID_SHIFT 16 +#define GET_ENTRY(cookie) ((cookie) & 0xFFFF) +#define GET_OFFSET(cookie) ((cookie) >> COOKIE_ID_SHIFT) +#define MK_COOKIE(x, y) ((x) | (y) << COOKIE_ID_SHIFT) + +/** + * mic_request_threaded_irq - request an irq. mic_mutex needs + * to be held before calling this function. + * + * @mdev: pointer to mic_device instance + * @handler: The callback function that handles the interrupt. + * The function needs to call ack_interrupts + * (mdev->ops->ack_interrupt(mdev)) when handling the interrupts. + * @thread_fn: thread fn required by request_threaded_irq. + * @name: The ASCII name of the callee requesting the irq. + * @data: private data that is returned back when calling the + * function handler. + * @intr_src: The source id of the requester. Its the doorbell id + * for Doorbell interrupts and DMA channel id for DMA interrupts. + * @type: The type of interrupt. Values defined in mic_intr_type + * + * returns: The cookie that is transparent to the caller. Passed + * back when calling mic_free_irq. An appropriate error code + * is returned on failure. Caller needs to use IS_ERR(return_val) + * to check for failure and PTR_ERR(return_val) to obtained the + * error code. + * + */ +struct mic_irq * +mic_request_threaded_irq(struct mic_device *mdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src, + enum mic_intr_type type) +{ + u16 offset; + int rc = 0; + struct msix_entry *msix = NULL; + unsigned long cookie = 0; + u16 entry; + struct mic_intr_cb *intr_cb; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + + offset = mic_map_src_to_offset(mdev, intr_src, type); + if (offset >= MIC_NUM_OFFSETS) { + dev_err(mdev->sdev->parent, + "Error mapping index %d to a valid source id.\n", + intr_src); + rc = -EINVAL; + goto err; + } + + if (mdev->irq_info.num_vectors > 1) { + msix = mic_get_available_vector(mdev); + if (!msix) { + dev_err(mdev->sdev->parent, + "No MSIx vectors available for use.\n"); + rc = -ENOSPC; + goto err; + } + + rc = request_threaded_irq(msix->vector, handler, thread_fn, + 0, name, data); + if (rc) { + dev_dbg(mdev->sdev->parent, + "request irq failed rc = %d\n", rc); + goto err; + } + entry = msix->entry; + mdev->irq_info.mic_msi_map[entry] |= BIT(offset); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, true); + cookie = MK_COOKIE(entry, offset); + dev_dbg(mdev->sdev->parent, "irq: %d assigned for src: %d\n", + msix->vector, intr_src); + } else { + intr_cb = mic_register_intr_callback(mdev, offset, handler, + thread_fn, data); + if (IS_ERR(intr_cb)) { + dev_err(mdev->sdev->parent, + "No available callback entries for use\n"); + rc = PTR_ERR(intr_cb); + goto err; + } + + entry = 0; + if (pci_dev_msi_enabled(pdev)) { + mdev->irq_info.mic_msi_map[entry] |= (1 << offset); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, true); + } + cookie = MK_COOKIE(entry, intr_cb->cb_id); + dev_dbg(mdev->sdev->parent, "callback %d registered for src: %d\n", + intr_cb->cb_id, intr_src); + } + return (struct mic_irq *)cookie; +err: + return ERR_PTR(rc); +} + +/** + * mic_free_irq - free irq. mic_mutex + * needs to be held before calling this function. + * + * @mdev: pointer to mic_device instance + * @cookie: cookie obtained during a successful call to mic_request_threaded_irq + * @data: private data specified by the calling function during the + * mic_request_threaded_irq + * + * returns: none. + */ +void mic_free_irq(struct mic_device *mdev, + struct mic_irq *cookie, void *data) +{ + u32 offset; + u32 entry; + u8 src_id; + unsigned int irq; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + + entry = GET_ENTRY((unsigned long)cookie); + offset = GET_OFFSET((unsigned long)cookie); + if (mdev->irq_info.num_vectors > 1) { + if (entry >= mdev->irq_info.num_vectors) { + dev_warn(mdev->sdev->parent, + "entry %d should be < num_irq %d\n", + entry, mdev->irq_info.num_vectors); + return; + } + irq = mdev->irq_info.msix_entries[entry].vector; + free_irq(irq, data); + mdev->irq_info.mic_msi_map[entry] &= ~(BIT(offset)); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, false); + + dev_dbg(mdev->sdev->parent, "irq: %d freed\n", irq); + } else { + irq = pdev->irq; + src_id = mic_unregister_intr_callback(mdev, offset); + if (src_id >= MIC_NUM_OFFSETS) { + dev_warn(mdev->sdev->parent, "Error unregistering callback\n"); + return; + } + if (pci_dev_msi_enabled(pdev)) { + mdev->irq_info.mic_msi_map[entry] &= ~(BIT(src_id)); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, src_id, false); + } + dev_dbg(mdev->sdev->parent, "callback %d unregistered for src: %d\n", + offset, src_id); + } +} + +/** + * mic_setup_interrupts - Initializes interrupts. + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + rc = mic_setup_msix(mdev, pdev); + if (!rc) + goto done; + + rc = mic_setup_msi(mdev, pdev); + if (!rc) + goto done; + + rc = mic_setup_intx(mdev, pdev); + if (rc) { + dev_err(mdev->sdev->parent, "no usable interrupts\n"); + return rc; + } +done: + mdev->intr_ops->enable_interrupts(mdev); + return 0; +} + +/** + * mic_free_interrupts - Frees interrupts setup by mic_setup_interrupts + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * returns none. + */ +void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev) +{ + int i; + + mdev->intr_ops->disable_interrupts(mdev); + if (mdev->irq_info.num_vectors > 1) { + for (i = 0; i < mdev->irq_info.num_vectors; i++) { + if (mdev->irq_info.mic_msi_map[i]) + dev_warn(&pdev->dev, "irq %d may still be in use.\n", + mdev->irq_info.msix_entries[i].vector); + } + kfree(mdev->irq_info.mic_msi_map); + kfree(mdev->irq_info.msix_entries); + pci_disable_msix(pdev); + } else { + if (pci_dev_msi_enabled(pdev)) { + free_irq(pdev->irq, mdev); + kfree(mdev->irq_info.mic_msi_map); + pci_disable_msi(pdev); + } else { + free_irq(pdev->irq, mdev); + } + mic_release_callbacks(mdev); + } +} + +/** + * mic_intr_restore - Restore MIC interrupt registers. + * + * @mdev: pointer to mic_device instance. + * + * Restore the interrupt registers to values previously + * stored in the SW data structures. mic_mutex needs to + * be held before calling this function. + * + * returns None. + */ +void mic_intr_restore(struct mic_device *mdev) +{ + int entry, offset; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + + if (!pci_dev_msi_enabled(pdev)) + return; + + for (entry = 0; entry < mdev->irq_info.num_vectors; entry++) { + for (offset = 0; offset < MIC_NUM_OFFSETS; offset++) { + if (mdev->irq_info.mic_msi_map[entry] & BIT(offset)) + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, true); + } + } +} diff --git a/kernel/drivers/misc/mic/host/mic_intr.h b/kernel/drivers/misc/mic/host/mic_intr.h new file mode 100644 index 000000000..9f783d4ad --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_intr.h @@ -0,0 +1,148 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_INTR_H_ +#define _MIC_INTR_H_ + +#include <linux/bitops.h> +#include <linux/interrupt.h> +/* + * The minimum number of msix vectors required for normal operation. + * 3 for virtio network, console and block devices. + * 1 for card shutdown notifications. + * 4 for host owned DMA channels. + */ +#define MIC_MIN_MSIX 8 +#define MIC_NUM_OFFSETS 32 + +/** + * mic_intr_source - The type of source that will generate + * the interrupt.The number of types needs to be in sync with + * MIC_NUM_INTR_TYPES + * + * MIC_INTR_DB: The source is a doorbell + * MIC_INTR_DMA: The source is a DMA channel + * MIC_INTR_ERR: The source is an error interrupt e.g. SBOX ERR + * MIC_NUM_INTR_TYPES: Total number of interrupt sources. + */ +enum mic_intr_type { + MIC_INTR_DB = 0, + MIC_INTR_DMA, + MIC_INTR_ERR, + MIC_NUM_INTR_TYPES +}; + +/** + * struct mic_intr_info - Contains h/w specific interrupt sources + * information. + * + * @intr_start_idx: Contains the starting indexes of the + * interrupt types. + * @intr_len: Contains the length of the interrupt types. + */ +struct mic_intr_info { + u16 intr_start_idx[MIC_NUM_INTR_TYPES]; + u16 intr_len[MIC_NUM_INTR_TYPES]; +}; + +/** + * struct mic_irq_info - OS specific irq information + * + * @next_avail_src: next available doorbell that can be assigned. + * @msix_entries: msix entries allocated while setting up MSI-x + * @mic_msi_map: The MSI/MSI-x mapping information. + * @num_vectors: The number of MSI/MSI-x vectors that have been allocated. + * @cb_ida: callback ID allocator to track the callbacks registered. + * @mic_intr_lock: spinlock to protect the interrupt callback list. + * @mic_thread_lock: spinlock to protect the thread callback list. + * This lock is used to protect against thread_fn while + * mic_intr_lock is used to protect against interrupt handler. + * @cb_list: Array of callback lists one for each source. + * @mask: Mask used by the main thread fn to call the underlying thread fns. + */ +struct mic_irq_info { + int next_avail_src; + struct msix_entry *msix_entries; + u32 *mic_msi_map; + u16 num_vectors; + struct ida cb_ida; + spinlock_t mic_intr_lock; + spinlock_t mic_thread_lock; + struct list_head *cb_list; + unsigned long mask; +}; + +/** + * struct mic_intr_cb - Interrupt callback structure. + * + * @handler: The callback function + * @thread_fn: The thread_fn. + * @data: Private data of the requester. + * @cb_id: The callback id. Identifies this callback. + * @list: list head pointing to the next callback structure. + */ +struct mic_intr_cb { + irq_handler_t handler; + irq_handler_t thread_fn; + void *data; + int cb_id; + struct list_head list; +}; + +/** + * struct mic_irq - opaque pointer used as cookie + */ +struct mic_irq; + +/* Forward declaration */ +struct mic_device; + +/** + * struct mic_hw_intr_ops: MIC HW specific interrupt operations + * @intr_init: Initialize H/W specific interrupt information. + * @enable_interrupts: Enable interrupts from the hardware. + * @disable_interrupts: Disable interrupts from the hardware. + * @program_msi_to_src_map: Update MSI mapping registers with + * irq information. + * @read_msi_to_src_map: Read MSI mapping registers containing + * irq information. + */ +struct mic_hw_intr_ops { + void (*intr_init)(struct mic_device *mdev); + void (*enable_interrupts)(struct mic_device *mdev); + void (*disable_interrupts)(struct mic_device *mdev); + void (*program_msi_to_src_map) (struct mic_device *mdev, + int idx, int intr_src, bool set); + u32 (*read_msi_to_src_map) (struct mic_device *mdev, + int idx); +}; + +int mic_next_db(struct mic_device *mdev); +struct mic_irq * +mic_request_threaded_irq(struct mic_device *mdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src, + enum mic_intr_type type); +void mic_free_irq(struct mic_device *mdev, + struct mic_irq *cookie, void *data); +int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev); +void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev); +void mic_intr_restore(struct mic_device *mdev); +#endif diff --git a/kernel/drivers/misc/mic/host/mic_main.c b/kernel/drivers/misc/mic/host/mic_main.c new file mode 100644 index 000000000..ab37a3117 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_main.c @@ -0,0 +1,537 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + * Global TODO's across the driver to be added after initial base + * patches are accepted upstream: + * 1) Enable DMA support. + * 2) Enable per vring interrupt support. + */ +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/poll.h> +#include <linux/suspend.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_x100.h" +#include "mic_smpt.h" +#include "mic_fops.h" +#include "mic_virtio.h" + +static const char mic_driver_name[] = "mic"; + +static const struct pci_device_id mic_pci_tbl[] = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2250)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2251)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2252)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2253)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2254)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2255)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2256)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2257)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2258)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2259)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225a)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225b)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225c)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225d)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225e)}, + + /* required last entry */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mic_pci_tbl); + +/* ID allocator for MIC devices */ +static struct ida g_mic_ida; +/* Class of MIC devices for sysfs accessibility. */ +static struct class *g_mic_class; +/* Base device node number for MIC devices */ +static dev_t g_mic_devno; + +static const struct file_operations mic_fops = { + .open = mic_open, + .release = mic_release, + .unlocked_ioctl = mic_ioctl, + .poll = mic_poll, + .mmap = mic_mmap, + .owner = THIS_MODULE, +}; + +/* Initialize the device page */ +static int mic_dp_init(struct mic_device *mdev) +{ + mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL); + if (!mdev->dp) { + dev_err(mdev->sdev->parent, "%s %d err %d\n", + __func__, __LINE__, -ENOMEM); + return -ENOMEM; + } + + mdev->dp_dma_addr = mic_map_single(mdev, + mdev->dp, MIC_DP_SIZE); + if (mic_map_error(mdev->dp_dma_addr)) { + kfree(mdev->dp); + dev_err(mdev->sdev->parent, "%s %d err %d\n", + __func__, __LINE__, -ENOMEM); + return -ENOMEM; + } + mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); + mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); + return 0; +} + +/* Uninitialize the device page */ +static void mic_dp_uninit(struct mic_device *mdev) +{ + mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE); + kfree(mdev->dp); +} + +/** + * mic_shutdown_db - Shutdown doorbell interrupt handler. + */ +static irqreturn_t mic_shutdown_db(int irq, void *data) +{ + struct mic_device *mdev = data; + struct mic_bootparam *bootparam = mdev->dp; + + mdev->ops->intr_workarounds(mdev); + + switch (bootparam->shutdown_status) { + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_RESTART: + /* Fall through */ + case MIC_CRASHED: + schedule_work(&mdev->shutdown_work); + break; + default: + break; + }; + return IRQ_HANDLED; +} + +/** + * mic_ops_init: Initialize HW specific operation tables. + * + * @mdev: pointer to mic_device instance + * + * returns none. + */ +static void mic_ops_init(struct mic_device *mdev) +{ + switch (mdev->family) { + case MIC_FAMILY_X100: + mdev->ops = &mic_x100_ops; + mdev->intr_ops = &mic_x100_intr_ops; + mdev->smpt_ops = &mic_x100_smpt_ops; + break; + default: + break; + } +} + +/** + * mic_get_family - Determine hardware family to which this MIC belongs. + * + * @pdev: The pci device structure + * + * returns family. + */ +static enum mic_hw_family mic_get_family(struct pci_dev *pdev) +{ + enum mic_hw_family family; + + switch (pdev->device) { + case MIC_X100_PCI_DEVICE_2250: + case MIC_X100_PCI_DEVICE_2251: + case MIC_X100_PCI_DEVICE_2252: + case MIC_X100_PCI_DEVICE_2253: + case MIC_X100_PCI_DEVICE_2254: + case MIC_X100_PCI_DEVICE_2255: + case MIC_X100_PCI_DEVICE_2256: + case MIC_X100_PCI_DEVICE_2257: + case MIC_X100_PCI_DEVICE_2258: + case MIC_X100_PCI_DEVICE_2259: + case MIC_X100_PCI_DEVICE_225a: + case MIC_X100_PCI_DEVICE_225b: + case MIC_X100_PCI_DEVICE_225c: + case MIC_X100_PCI_DEVICE_225d: + case MIC_X100_PCI_DEVICE_225e: + family = MIC_FAMILY_X100; + break; + default: + family = MIC_FAMILY_UNKNOWN; + break; + } + return family; +} + +/** +* mic_pm_notifier: Notifier callback function that handles +* PM notifications. +* +* @notifier_block: The notifier structure. +* @pm_event: The event for which the driver was notified. +* @unused: Meaningless. Always NULL. +* +* returns NOTIFY_DONE +*/ +static int mic_pm_notifier(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + struct mic_device *mdev = container_of(notifier, + struct mic_device, pm_notifier); + + switch (pm_event) { + case PM_HIBERNATION_PREPARE: + /* Fall through */ + case PM_SUSPEND_PREPARE: + mic_prepare_suspend(mdev); + break; + case PM_POST_HIBERNATION: + /* Fall through */ + case PM_POST_SUSPEND: + /* Fall through */ + case PM_POST_RESTORE: + mic_complete_resume(mdev); + break; + case PM_RESTORE_PREPARE: + break; + default: + break; + } + return NOTIFY_DONE; +} + +/** + * mic_device_init - Allocates and initializes the MIC device structure + * + * @mdev: pointer to mic_device instance + * @pdev: The pci device structure + * + * returns none. + */ +static int +mic_device_init(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + mdev->family = mic_get_family(pdev); + mdev->stepping = pdev->revision; + mic_ops_init(mdev); + mic_sysfs_init(mdev); + mutex_init(&mdev->mic_mutex); + mdev->irq_info.next_avail_src = 0; + INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work); + INIT_WORK(&mdev->shutdown_work, mic_shutdown_work); + init_completion(&mdev->reset_wait); + INIT_LIST_HEAD(&mdev->vdev_list); + mdev->pm_notifier.notifier_call = mic_pm_notifier; + rc = register_pm_notifier(&mdev->pm_notifier); + if (rc) { + dev_err(&pdev->dev, "register_pm_notifier failed rc %d\n", + rc); + goto register_pm_notifier_fail; + } + return 0; +register_pm_notifier_fail: + flush_work(&mdev->shutdown_work); + flush_work(&mdev->reset_trigger_work); + return rc; +} + +/** + * mic_device_uninit - Frees resources allocated during mic_device_init(..) + * + * @mdev: pointer to mic_device instance + * + * returns none + */ +static void mic_device_uninit(struct mic_device *mdev) +{ + /* The cmdline sysfs entry might have allocated cmdline */ + kfree(mdev->cmdline); + kfree(mdev->firmware); + kfree(mdev->ramdisk); + kfree(mdev->bootmode); + flush_work(&mdev->reset_trigger_work); + flush_work(&mdev->shutdown_work); + unregister_pm_notifier(&mdev->pm_notifier); +} + +/** + * mic_probe - Device Initialization Routine + * + * @pdev: PCI device structure + * @ent: entry in mic_pci_tbl + * + * returns 0 on success, < 0 on failure. + */ +static int mic_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + struct mic_device *mdev; + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) { + rc = -ENOMEM; + dev_err(&pdev->dev, "mdev kmalloc failed rc %d\n", rc); + goto mdev_alloc_fail; + } + mdev->id = ida_simple_get(&g_mic_ida, 0, MIC_MAX_NUM_DEVS, GFP_KERNEL); + if (mdev->id < 0) { + rc = mdev->id; + dev_err(&pdev->dev, "ida_simple_get failed rc %d\n", rc); + goto ida_fail; + } + + rc = mic_device_init(mdev, pdev); + if (rc) { + dev_err(&pdev->dev, "mic_device_init failed rc %d\n", rc); + goto device_init_fail; + } + + rc = pci_enable_device(pdev); + if (rc) { + dev_err(&pdev->dev, "failed to enable pci device.\n"); + goto uninit_device; + } + + pci_set_master(pdev); + + rc = pci_request_regions(pdev, mic_driver_name); + if (rc) { + dev_err(&pdev->dev, "failed to get pci regions.\n"); + goto disable_device; + } + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (rc) { + dev_err(&pdev->dev, "Cannot set DMA mask\n"); + goto release_regions; + } + + mdev->mmio.pa = pci_resource_start(pdev, mdev->ops->mmio_bar); + mdev->mmio.len = pci_resource_len(pdev, mdev->ops->mmio_bar); + mdev->mmio.va = pci_ioremap_bar(pdev, mdev->ops->mmio_bar); + if (!mdev->mmio.va) { + dev_err(&pdev->dev, "Cannot remap MMIO BAR\n"); + rc = -EIO; + goto release_regions; + } + + mdev->aper.pa = pci_resource_start(pdev, mdev->ops->aper_bar); + mdev->aper.len = pci_resource_len(pdev, mdev->ops->aper_bar); + mdev->aper.va = ioremap_wc(mdev->aper.pa, mdev->aper.len); + if (!mdev->aper.va) { + dev_err(&pdev->dev, "Cannot remap Aperture BAR\n"); + rc = -EIO; + goto unmap_mmio; + } + + mdev->intr_ops->intr_init(mdev); + rc = mic_setup_interrupts(mdev, pdev); + if (rc) { + dev_err(&pdev->dev, "mic_setup_interrupts failed %d\n", rc); + goto unmap_aper; + } + rc = mic_smpt_init(mdev); + if (rc) { + dev_err(&pdev->dev, "smpt_init failed %d\n", rc); + goto free_interrupts; + } + + pci_set_drvdata(pdev, mdev); + + mdev->sdev = device_create_with_groups(g_mic_class, &pdev->dev, + MKDEV(MAJOR(g_mic_devno), mdev->id), NULL, + mdev->attr_group, "mic%d", mdev->id); + if (IS_ERR(mdev->sdev)) { + rc = PTR_ERR(mdev->sdev); + dev_err(&pdev->dev, + "device_create_with_groups failed rc %d\n", rc); + goto smpt_uninit; + } + mdev->state_sysfs = sysfs_get_dirent(mdev->sdev->kobj.sd, "state"); + if (!mdev->state_sysfs) { + rc = -ENODEV; + dev_err(&pdev->dev, "sysfs_get_dirent failed rc %d\n", rc); + goto destroy_device; + } + + rc = mic_dp_init(mdev); + if (rc) { + dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc); + goto sysfs_put; + } + mutex_lock(&mdev->mic_mutex); + + mdev->shutdown_db = mic_next_db(mdev); + mdev->shutdown_cookie = mic_request_threaded_irq(mdev, mic_shutdown_db, + NULL, "shutdown-interrupt", mdev, + mdev->shutdown_db, MIC_INTR_DB); + if (IS_ERR(mdev->shutdown_cookie)) { + rc = PTR_ERR(mdev->shutdown_cookie); + mutex_unlock(&mdev->mic_mutex); + goto dp_uninit; + } + mutex_unlock(&mdev->mic_mutex); + mic_bootparam_init(mdev); + + mic_create_debug_dir(mdev); + cdev_init(&mdev->cdev, &mic_fops); + mdev->cdev.owner = THIS_MODULE; + rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1); + if (rc) { + dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc); + goto cleanup_debug_dir; + } + return 0; +cleanup_debug_dir: + mic_delete_debug_dir(mdev); + mutex_lock(&mdev->mic_mutex); + mic_free_irq(mdev, mdev->shutdown_cookie, mdev); + mutex_unlock(&mdev->mic_mutex); +dp_uninit: + mic_dp_uninit(mdev); +sysfs_put: + sysfs_put(mdev->state_sysfs); +destroy_device: + device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id)); +smpt_uninit: + mic_smpt_uninit(mdev); +free_interrupts: + mic_free_interrupts(mdev, pdev); +unmap_aper: + iounmap(mdev->aper.va); +unmap_mmio: + iounmap(mdev->mmio.va); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +uninit_device: + mic_device_uninit(mdev); +device_init_fail: + ida_simple_remove(&g_mic_ida, mdev->id); +ida_fail: + kfree(mdev); +mdev_alloc_fail: + dev_err(&pdev->dev, "Probe failed rc %d\n", rc); + return rc; +} + +/** + * mic_remove - Device Removal Routine + * mic_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + * + * @pdev: PCI device structure + */ +static void mic_remove(struct pci_dev *pdev) +{ + struct mic_device *mdev; + + mdev = pci_get_drvdata(pdev); + if (!mdev) + return; + + mic_stop(mdev, false); + cdev_del(&mdev->cdev); + mic_delete_debug_dir(mdev); + mutex_lock(&mdev->mic_mutex); + mic_free_irq(mdev, mdev->shutdown_cookie, mdev); + mutex_unlock(&mdev->mic_mutex); + flush_work(&mdev->shutdown_work); + mic_dp_uninit(mdev); + sysfs_put(mdev->state_sysfs); + device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id)); + mic_smpt_uninit(mdev); + mic_free_interrupts(mdev, pdev); + iounmap(mdev->mmio.va); + iounmap(mdev->aper.va); + mic_device_uninit(mdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + ida_simple_remove(&g_mic_ida, mdev->id); + kfree(mdev); +} +static struct pci_driver mic_driver = { + .name = mic_driver_name, + .id_table = mic_pci_tbl, + .probe = mic_probe, + .remove = mic_remove +}; + +static int __init mic_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&g_mic_devno, 0, + MIC_MAX_NUM_DEVS, mic_driver_name); + if (ret) { + pr_err("alloc_chrdev_region failed ret %d\n", ret); + goto error; + } + + g_mic_class = class_create(THIS_MODULE, mic_driver_name); + if (IS_ERR(g_mic_class)) { + ret = PTR_ERR(g_mic_class); + pr_err("class_create failed ret %d\n", ret); + goto cleanup_chrdev; + } + + mic_init_debugfs(); + ida_init(&g_mic_ida); + ret = pci_register_driver(&mic_driver); + if (ret) { + pr_err("pci_register_driver failed ret %d\n", ret); + goto cleanup_debugfs; + } + return ret; +cleanup_debugfs: + mic_exit_debugfs(); + class_destroy(g_mic_class); +cleanup_chrdev: + unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); +error: + return ret; +} + +static void __exit mic_exit(void) +{ + pci_unregister_driver(&mic_driver); + ida_destroy(&g_mic_ida); + mic_exit_debugfs(); + class_destroy(g_mic_class); + unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); +} + +module_init(mic_init); +module_exit(mic_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC X100 Host driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/host/mic_smpt.c b/kernel/drivers/misc/mic/host/mic_smpt.c new file mode 100644 index 000000000..fae474c48 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_smpt.c @@ -0,0 +1,442 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/pci.h> + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" + +static inline u64 mic_system_page_mask(struct mic_device *mdev) +{ + return (1ULL << mdev->smpt->info.page_shift) - 1ULL; +} + +static inline u8 mic_sys_addr_to_smpt(struct mic_device *mdev, dma_addr_t pa) +{ + return (pa - mdev->smpt->info.base) >> mdev->smpt->info.page_shift; +} + +static inline u64 mic_smpt_to_pa(struct mic_device *mdev, u8 index) +{ + return mdev->smpt->info.base + (index * mdev->smpt->info.page_size); +} + +static inline u64 mic_smpt_offset(struct mic_device *mdev, dma_addr_t pa) +{ + return pa & mic_system_page_mask(mdev); +} + +static inline u64 mic_smpt_align_low(struct mic_device *mdev, dma_addr_t pa) +{ + return ALIGN(pa - mic_system_page_mask(mdev), + mdev->smpt->info.page_size); +} + +static inline u64 mic_smpt_align_high(struct mic_device *mdev, dma_addr_t pa) +{ + return ALIGN(pa, mdev->smpt->info.page_size); +} + +/* Total Cumulative system memory accessible by MIC across all SMPT entries */ +static inline u64 mic_max_system_memory(struct mic_device *mdev) +{ + return mdev->smpt->info.num_reg * mdev->smpt->info.page_size; +} + +/* Maximum system memory address accessible by MIC */ +static inline u64 mic_max_system_addr(struct mic_device *mdev) +{ + return mdev->smpt->info.base + mic_max_system_memory(mdev) - 1ULL; +} + +/* Check if the DMA address is a MIC system memory address */ +static inline bool +mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa) +{ + return pa >= mdev->smpt->info.base && pa <= mic_max_system_addr(mdev); +} + +/* Populate an SMPT entry and update the reference counts. */ +static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, + int entries, struct mic_device *mdev) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + int i; + + for (i = spt; i < spt + entries; i++, + addr += smpt_info->info.page_size) { + if (!smpt_info->entry[i].ref_count && + (smpt_info->entry[i].dma_addr != addr)) { + mdev->smpt_ops->set(mdev, addr, i); + smpt_info->entry[i].dma_addr = addr; + } + smpt_info->entry[i].ref_count += ref[i - spt]; + } +} + +/* + * Find an available MIC address in MIC SMPT address space + * for a given DMA address and size. + */ +static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr, + int entries, s64 *ref, size_t size) +{ + int spt; + int ae = 0; + int i; + unsigned long flags; + dma_addr_t mic_addr = 0; + dma_addr_t addr = dma_addr; + struct mic_smpt_info *smpt_info = mdev->smpt; + + spin_lock_irqsave(&smpt_info->smpt_lock, flags); + + /* find existing entries */ + for (i = 0; i < smpt_info->info.num_reg; i++) { + if (smpt_info->entry[i].dma_addr == addr) { + ae++; + addr += smpt_info->info.page_size; + } else if (ae) /* cannot find contiguous entries */ + goto not_found; + + if (ae == entries) + goto found; + } + + /* find free entry */ + for (ae = 0, i = 0; i < smpt_info->info.num_reg; i++) { + ae = (smpt_info->entry[i].ref_count == 0) ? ae + 1 : 0; + if (ae == entries) + goto found; + } + +not_found: + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + return mic_addr; + +found: + spt = i - entries + 1; + mic_addr = mic_smpt_to_pa(mdev, spt); + mic_add_smpt_entry(spt, ref, dma_addr, entries, mdev); + smpt_info->map_count++; + smpt_info->ref_count += (s64)size; + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + return mic_addr; +} + +/* + * Returns number of smpt entries needed for dma_addr to dma_addr + size + * also returns the reference count array for each of those entries + * and the starting smpt address + */ +static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr, + size_t size, s64 *ref, u64 *smpt_start) +{ + u64 start = dma_addr; + u64 end = dma_addr + size; + int i = 0; + + while (start < end) { + ref[i++] = min(mic_smpt_align_high(mdev, start + 1), + end) - start; + start = mic_smpt_align_high(mdev, start + 1); + } + + if (smpt_start) + *smpt_start = mic_smpt_align_low(mdev, dma_addr); + + return i; +} + +/* + * mic_to_dma_addr - Converts a MIC address to a DMA address. + * + * @mdev: pointer to mic_device instance. + * @mic_addr: MIC address. + * + * returns a DMA address. + */ +static dma_addr_t +mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + int spt; + dma_addr_t dma_addr; + + if (!mic_is_system_addr(mdev, mic_addr)) { + dev_err(mdev->sdev->parent, + "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr); + return -EINVAL; + } + spt = mic_sys_addr_to_smpt(mdev, mic_addr); + dma_addr = smpt_info->entry[spt].dma_addr + + mic_smpt_offset(mdev, mic_addr); + return dma_addr; +} + +/** + * mic_map - Maps a DMA address to a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @dma_addr: DMA address. + * @size: Size of the region to be mapped. + * + * This API converts the DMA address provided to a DMA address understood + * by MIC. Caller should check for errors by calling mic_map_error(..). + * + * returns DMA address as required by MIC. + */ +dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size) +{ + dma_addr_t mic_addr = 0; + int num_entries; + s64 *ref; + u64 smpt_start; + + if (!size || size > mic_max_system_memory(mdev)) + return mic_addr; + + ref = kmalloc(mdev->smpt->info.num_reg * sizeof(s64), GFP_KERNEL); + if (!ref) + return mic_addr; + + num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size, + ref, &smpt_start); + + /* Set the smpt table appropriately and get 16G aligned mic address */ + mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size); + + kfree(ref); + + /* + * If mic_addr is zero then its an error case + * since mic_addr can never be zero. + * else generate mic_addr by adding the 16G offset in dma_addr + */ + if (!mic_addr && MIC_FAMILY_X100 == mdev->family) { + dev_err(mdev->sdev->parent, + "mic_map failed dma_addr 0x%llx size 0x%lx\n", + dma_addr, size); + return mic_addr; + } else { + return mic_addr + mic_smpt_offset(mdev, dma_addr); + } +} + +/** + * mic_unmap - Unmaps a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @mic_addr: MIC physical address. + * @size: Size of the region to be unmapped. + * + * This API unmaps the mappings created by mic_map(..). + * + * returns None. + */ +void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + s64 *ref; + int num_smpt; + int spt; + int i; + unsigned long flags; + + if (!size) + return; + + if (!mic_is_system_addr(mdev, mic_addr)) { + dev_err(mdev->sdev->parent, + "invalid address: 0x%llx\n", mic_addr); + return; + } + + spt = mic_sys_addr_to_smpt(mdev, mic_addr); + ref = kmalloc(mdev->smpt->info.num_reg * sizeof(s64), GFP_KERNEL); + if (!ref) + return; + + /* Get number of smpt entries to be mapped, ref count array */ + num_smpt = mic_get_smpt_ref_count(mdev, mic_addr, size, ref, NULL); + + spin_lock_irqsave(&smpt_info->smpt_lock, flags); + smpt_info->unmap_count++; + smpt_info->ref_count -= (s64)size; + + for (i = spt; i < spt + num_smpt; i++) { + smpt_info->entry[i].ref_count -= ref[i - spt]; + if (smpt_info->entry[i].ref_count < 0) + dev_warn(mdev->sdev->parent, + "ref count for entry %d is negative\n", i); + } + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + kfree(ref); +} + +/** + * mic_map_single - Maps a virtual address to a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @va: Kernel direct mapped virtual address. + * @size: Size of the region to be mapped. + * + * This API calls pci_map_single(..) for the direct mapped virtual address + * and then converts the DMA address provided to a DMA address understood + * by MIC. Caller should check for errors by calling mic_map_error(..). + * + * returns DMA address as required by MIC. + */ +dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) +{ + dma_addr_t mic_addr = 0; + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + dma_addr_t dma_addr = + pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL); + + if (!pci_dma_mapping_error(pdev, dma_addr)) { + mic_addr = mic_map(mdev, dma_addr, size); + if (!mic_addr) { + dev_err(mdev->sdev->parent, + "mic_map failed dma_addr 0x%llx size 0x%lx\n", + dma_addr, size); + pci_unmap_single(pdev, dma_addr, + size, PCI_DMA_BIDIRECTIONAL); + } + } + return mic_addr; +} + +/** + * mic_unmap_single - Unmaps a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @mic_addr: MIC physical address. + * @size: Size of the region to be unmapped. + * + * This API unmaps the mappings created by mic_map_single(..). + * + * returns None. + */ +void +mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) +{ + struct pci_dev *pdev = container_of(mdev->sdev->parent, + struct pci_dev, dev); + dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr); + mic_unmap(mdev, mic_addr, size); + pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); +} + +/** + * mic_smpt_init - Initialize MIC System Memory Page Tables. + * + * @mdev: pointer to mic_device instance. + * + * returns 0 for success and -errno for error. + */ +int mic_smpt_init(struct mic_device *mdev) +{ + int i, err = 0; + dma_addr_t dma_addr; + struct mic_smpt_info *smpt_info; + + mdev->smpt = kmalloc(sizeof(*mdev->smpt), GFP_KERNEL); + if (!mdev->smpt) + return -ENOMEM; + + smpt_info = mdev->smpt; + mdev->smpt_ops->init(mdev); + smpt_info->entry = kmalloc_array(smpt_info->info.num_reg, + sizeof(*smpt_info->entry), GFP_KERNEL); + if (!smpt_info->entry) { + err = -ENOMEM; + goto free_smpt; + } + spin_lock_init(&smpt_info->smpt_lock); + for (i = 0; i < smpt_info->info.num_reg; i++) { + dma_addr = i * smpt_info->info.page_size; + smpt_info->entry[i].dma_addr = dma_addr; + smpt_info->entry[i].ref_count = 0; + mdev->smpt_ops->set(mdev, dma_addr, i); + } + smpt_info->ref_count = 0; + smpt_info->map_count = 0; + smpt_info->unmap_count = 0; + return 0; +free_smpt: + kfree(smpt_info); + return err; +} + +/** + * mic_smpt_uninit - UnInitialize MIC System Memory Page Tables. + * + * @mdev: pointer to mic_device instance. + * + * returns None. + */ +void mic_smpt_uninit(struct mic_device *mdev) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + int i; + + dev_dbg(mdev->sdev->parent, + "nodeid %d SMPT ref count %lld map %lld unmap %lld\n", + mdev->id, smpt_info->ref_count, + smpt_info->map_count, smpt_info->unmap_count); + + for (i = 0; i < smpt_info->info.num_reg; i++) { + dev_dbg(mdev->sdev->parent, + "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n", + i, smpt_info->entry[i].dma_addr, + smpt_info->entry[i].ref_count); + if (smpt_info->entry[i].ref_count) + dev_warn(mdev->sdev->parent, + "ref count for entry %d is not zero\n", i); + } + kfree(smpt_info->entry); + kfree(smpt_info); +} + +/** + * mic_smpt_restore - Restore MIC System Memory Page Tables. + * + * @mdev: pointer to mic_device instance. + * + * Restore the SMPT registers to values previously stored in the + * SW data structures. Some MIC steppings lose register state + * across resets and this API should be called for performing + * a restore operation if required. + * + * returns None. + */ +void mic_smpt_restore(struct mic_device *mdev) +{ + int i; + dma_addr_t dma_addr; + + for (i = 0; i < mdev->smpt->info.num_reg; i++) { + dma_addr = mdev->smpt->entry[i].dma_addr; + mdev->smpt_ops->set(mdev, dma_addr, i); + } +} diff --git a/kernel/drivers/misc/mic/host/mic_smpt.h b/kernel/drivers/misc/mic/host/mic_smpt.h new file mode 100644 index 000000000..51970abfe --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_smpt.h @@ -0,0 +1,98 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef MIC_SMPT_H +#define MIC_SMPT_H +/** + * struct mic_smpt_ops - MIC HW specific SMPT operations. + * @init: Initialize hardware specific SMPT information in mic_smpt_hw_info. + * @set: Set the value for a particular SMPT entry. + */ +struct mic_smpt_ops { + void (*init)(struct mic_device *mdev); + void (*set)(struct mic_device *mdev, dma_addr_t dma_addr, u8 index); +}; + +/** + * struct mic_smpt - MIC SMPT entry information. + * @dma_addr: Base DMA address for this SMPT entry. + * @ref_count: Number of active mappings for this SMPT entry in bytes. + */ +struct mic_smpt { + dma_addr_t dma_addr; + s64 ref_count; +}; + +/** + * struct mic_smpt_hw_info - MIC SMPT hardware specific information. + * @num_reg: Number of SMPT registers. + * @page_shift: System memory page shift. + * @page_size: System memory page size. + * @base: System address base. + */ +struct mic_smpt_hw_info { + u8 num_reg; + u8 page_shift; + u64 page_size; + u64 base; +}; + +/** + * struct mic_smpt_info - MIC SMPT information. + * @entry: Array of SMPT entries. + * @smpt_lock: Spin lock protecting access to SMPT data structures. + * @info: Hardware specific SMPT information. + * @ref_count: Number of active SMPT mappings (for debug). + * @map_count: Number of SMPT mappings created (for debug). + * @unmap_count: Number of SMPT mappings destroyed (for debug). + */ +struct mic_smpt_info { + struct mic_smpt *entry; + spinlock_t smpt_lock; + struct mic_smpt_hw_info info; + s64 ref_count; + s64 map_count; + s64 unmap_count; +}; + +dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size); +void mic_unmap_single(struct mic_device *mdev, + dma_addr_t mic_addr, size_t size); +dma_addr_t mic_map(struct mic_device *mdev, + dma_addr_t dma_addr, size_t size); +void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size); + +/** + * mic_map_error - Check a MIC address for errors. + * + * @mdev: pointer to mic_device instance. + * + * returns Whether there was an error during mic_map..(..) APIs. + */ +static inline bool mic_map_error(dma_addr_t mic_addr) +{ + return !mic_addr; +} + +int mic_smpt_init(struct mic_device *mdev); +void mic_smpt_uninit(struct mic_device *mdev); +void mic_smpt_restore(struct mic_device *mdev); + +#endif diff --git a/kernel/drivers/misc/mic/host/mic_sysfs.c b/kernel/drivers/misc/mic/host/mic_sysfs.c new file mode 100644 index 000000000..6dd864e4a --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_sysfs.c @@ -0,0 +1,459 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/pci.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" +#include "mic_device.h" + +/* + * A state-to-string lookup table, for exposing a human readable state + * via sysfs. Always keep in sync with enum mic_states + */ +static const char * const mic_state_string[] = { + [MIC_OFFLINE] = "offline", + [MIC_ONLINE] = "online", + [MIC_SHUTTING_DOWN] = "shutting_down", + [MIC_RESET_FAILED] = "reset_failed", + [MIC_SUSPENDING] = "suspending", + [MIC_SUSPENDED] = "suspended", +}; + +/* + * A shutdown-status-to-string lookup table, for exposing a human + * readable state via sysfs. Always keep in sync with enum mic_shutdown_status + */ +static const char * const mic_shutdown_status_string[] = { + [MIC_NOP] = "nop", + [MIC_CRASHED] = "crashed", + [MIC_HALTED] = "halted", + [MIC_POWER_OFF] = "poweroff", + [MIC_RESTART] = "restart", +}; + +void mic_set_shutdown_status(struct mic_device *mdev, u8 shutdown_status) +{ + dev_dbg(mdev->sdev->parent, "Shutdown Status %s -> %s\n", + mic_shutdown_status_string[mdev->shutdown_status], + mic_shutdown_status_string[shutdown_status]); + mdev->shutdown_status = shutdown_status; +} + +void mic_set_state(struct mic_device *mdev, u8 state) +{ + dev_dbg(mdev->sdev->parent, "State %s -> %s\n", + mic_state_string[mdev->state], + mic_state_string[state]); + mdev->state = state; + sysfs_notify_dirent(mdev->state_sysfs); +} + +static ssize_t +family_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + static const char x100[] = "x100"; + static const char unknown[] = "Unknown"; + const char *card = NULL; + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + switch (mdev->family) { + case MIC_FAMILY_X100: + card = x100; + break; + default: + card = unknown; + break; + } + return scnprintf(buf, PAGE_SIZE, "%s\n", card); +} +static DEVICE_ATTR_RO(family); + +static ssize_t +stepping_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + char *string = "??"; + + if (!mdev) + return -EINVAL; + + switch (mdev->stepping) { + case MIC_A0_STEP: + string = "A0"; + break; + case MIC_B0_STEP: + string = "B0"; + break; + case MIC_B1_STEP: + string = "B1"; + break; + case MIC_C0_STEP: + string = "C0"; + break; + default: + break; + } + return scnprintf(buf, PAGE_SIZE, "%s\n", string); +} +static DEVICE_ATTR_RO(stepping); + +static ssize_t +state_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev || mdev->state >= MIC_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + mic_state_string[mdev->state]); +} + +static ssize_t +state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc = 0; + struct mic_device *mdev = dev_get_drvdata(dev->parent); + if (!mdev) + return -EINVAL; + if (sysfs_streq(buf, "boot")) { + rc = mic_start(mdev, buf); + if (rc) { + dev_err(mdev->sdev->parent, + "mic_boot failed rc %d\n", rc); + count = rc; + } + goto done; + } + + if (sysfs_streq(buf, "reset")) { + schedule_work(&mdev->reset_trigger_work); + goto done; + } + + if (sysfs_streq(buf, "shutdown")) { + mic_shutdown(mdev); + goto done; + } + + if (sysfs_streq(buf, "suspend")) { + mic_suspend(mdev); + goto done; + } + + count = -EINVAL; +done: + return count; +} +static DEVICE_ATTR_RW(state); + +static ssize_t shutdown_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev || mdev->shutdown_status >= MIC_STATUS_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + mic_shutdown_status_string[mdev->shutdown_status]); +} +static DEVICE_ATTR_RO(shutdown_status); + +static ssize_t +cmdline_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + char *cmdline; + + if (!mdev) + return -EINVAL; + + cmdline = mdev->cmdline; + + if (cmdline) + return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline); + return 0; +} + +static ssize_t +cmdline_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + mutex_lock(&mdev->mic_mutex); + kfree(mdev->cmdline); + + mdev->cmdline = kmalloc(count + 1, GFP_KERNEL); + if (!mdev->cmdline) { + count = -ENOMEM; + goto unlock; + } + + strncpy(mdev->cmdline, buf, count); + + if (mdev->cmdline[count - 1] == '\n') + mdev->cmdline[count - 1] = '\0'; + else + mdev->cmdline[count] = '\0'; +unlock: + mutex_unlock(&mdev->mic_mutex); + return count; +} +static DEVICE_ATTR_RW(cmdline); + +static ssize_t +firmware_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + char *firmware; + + if (!mdev) + return -EINVAL; + + firmware = mdev->firmware; + + if (firmware) + return scnprintf(buf, PAGE_SIZE, "%s\n", firmware); + return 0; +} + +static ssize_t +firmware_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + mutex_lock(&mdev->mic_mutex); + kfree(mdev->firmware); + + mdev->firmware = kmalloc(count + 1, GFP_KERNEL); + if (!mdev->firmware) { + count = -ENOMEM; + goto unlock; + } + strncpy(mdev->firmware, buf, count); + + if (mdev->firmware[count - 1] == '\n') + mdev->firmware[count - 1] = '\0'; + else + mdev->firmware[count] = '\0'; +unlock: + mutex_unlock(&mdev->mic_mutex); + return count; +} +static DEVICE_ATTR_RW(firmware); + +static ssize_t +ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + char *ramdisk; + + if (!mdev) + return -EINVAL; + + ramdisk = mdev->ramdisk; + + if (ramdisk) + return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk); + return 0; +} + +static ssize_t +ramdisk_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + mutex_lock(&mdev->mic_mutex); + kfree(mdev->ramdisk); + + mdev->ramdisk = kmalloc(count + 1, GFP_KERNEL); + if (!mdev->ramdisk) { + count = -ENOMEM; + goto unlock; + } + + strncpy(mdev->ramdisk, buf, count); + + if (mdev->ramdisk[count - 1] == '\n') + mdev->ramdisk[count - 1] = '\0'; + else + mdev->ramdisk[count] = '\0'; +unlock: + mutex_unlock(&mdev->mic_mutex); + return count; +} +static DEVICE_ATTR_RW(ramdisk); + +static ssize_t +bootmode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + char *bootmode; + + if (!mdev) + return -EINVAL; + + bootmode = mdev->bootmode; + + if (bootmode) + return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode); + return 0; +} + +static ssize_t +bootmode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "elf")) + return -EINVAL; + + mutex_lock(&mdev->mic_mutex); + kfree(mdev->bootmode); + + mdev->bootmode = kmalloc(count + 1, GFP_KERNEL); + if (!mdev->bootmode) { + count = -ENOMEM; + goto unlock; + } + + strncpy(mdev->bootmode, buf, count); + + if (mdev->bootmode[count - 1] == '\n') + mdev->bootmode[count - 1] = '\0'; + else + mdev->bootmode[count] = '\0'; +unlock: + mutex_unlock(&mdev->mic_mutex); + return count; +} +static DEVICE_ATTR_RW(bootmode); + +static ssize_t +log_buf_addr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_addr); +} + +static ssize_t +log_buf_addr_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + int ret; + unsigned long addr; + + if (!mdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + mdev->log_buf_addr = (void *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_addr); + +static ssize_t +log_buf_len_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + if (!mdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_len); +} + +static ssize_t +log_buf_len_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + int ret; + unsigned long addr; + + if (!mdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + mdev->log_buf_len = (int *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_len); + +static struct attribute *mic_default_attrs[] = { + &dev_attr_family.attr, + &dev_attr_stepping.attr, + &dev_attr_state.attr, + &dev_attr_shutdown_status.attr, + &dev_attr_cmdline.attr, + &dev_attr_firmware.attr, + &dev_attr_ramdisk.attr, + &dev_attr_bootmode.attr, + &dev_attr_log_buf_addr.attr, + &dev_attr_log_buf_len.attr, + + NULL +}; + +ATTRIBUTE_GROUPS(mic_default); + +void mic_sysfs_init(struct mic_device *mdev) +{ + mdev->attr_group = mic_default_groups; +} diff --git a/kernel/drivers/misc/mic/host/mic_virtio.c b/kernel/drivers/misc/mic/host/mic_virtio.c new file mode 100644 index 000000000..a020e4eb4 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_virtio.c @@ -0,0 +1,812 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/uaccess.h> +#include <linux/dmaengine.h> +#include <linux/mic_common.h> + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" +#include "mic_virtio.h" + +/* + * Size of the internal buffer used during DMA's as an intermediate buffer + * for copy to/from user. + */ +#define MIC_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL) + +static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst, + dma_addr_t src, size_t len) +{ + int err = 0; + struct dma_async_tx_descriptor *tx; + struct dma_chan *mic_ch = mdev->dma_ch; + + if (!mic_ch) { + err = -EBUSY; + goto error; + } + + tx = mic_ch->device->device_prep_dma_memcpy(mic_ch, dst, src, len, + DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + goto error; + } else { + dma_cookie_t cookie = tx->tx_submit(tx); + + err = dma_submit_error(cookie); + if (err) + goto error; + err = dma_sync_wait(mic_ch, cookie); + } +error: + if (err) + dev_err(mdev->sdev->parent, "%s %d err %d\n", + __func__, __LINE__, err); + return err; +} + +/* + * Initiates the copies across the PCIe bus from card memory to a user + * space buffer. When transfers are done using DMA, source/destination + * addresses and transfer length must follow the alignment requirements of + * the MIC DMA engine. + */ +static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, void __user *ubuf, + size_t len, u64 daddr, size_t dlen, + int vr_idx) +{ + struct mic_device *mdev = mvdev->mdev; + void __iomem *dbuf = mdev->aper.va + daddr; + struct mic_vringh *mvr = &mvdev->mvr[vr_idx]; + size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align; + size_t dma_offset; + size_t partlen; + int err; + + dma_offset = daddr - round_down(daddr, dma_alignment); + daddr -= dma_offset; + len += dma_offset; + + while (len) { + partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE); + + err = mic_sync_dma(mdev, mvr->buf_da, daddr, + ALIGN(partlen, dma_alignment)); + if (err) + goto err; + + if (copy_to_user(ubuf, mvr->buf + dma_offset, + partlen - dma_offset)) { + err = -EFAULT; + goto err; + } + daddr += partlen; + ubuf += partlen; + dbuf += partlen; + mvdev->in_bytes_dma += partlen; + mvdev->in_bytes += partlen; + len -= partlen; + dma_offset = 0; + } + return 0; +err: + dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err); + return err; +} + +/* + * Initiates copies across the PCIe bus from a user space buffer to card + * memory. When transfers are done using DMA, source/destination addresses + * and transfer length must follow the alignment requirements of the MIC + * DMA engine. + */ +static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, void __user *ubuf, + size_t len, u64 daddr, size_t dlen, + int vr_idx) +{ + struct mic_device *mdev = mvdev->mdev; + void __iomem *dbuf = mdev->aper.va + daddr; + struct mic_vringh *mvr = &mvdev->mvr[vr_idx]; + size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align; + size_t partlen; + int err; + + if (daddr & (dma_alignment - 1)) { + mvdev->tx_dst_unaligned += len; + goto memcpy; + } else if (ALIGN(len, dma_alignment) > dlen) { + mvdev->tx_len_unaligned += len; + goto memcpy; + } + + while (len) { + partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE); + + if (copy_from_user(mvr->buf, ubuf, partlen)) { + err = -EFAULT; + goto err; + } + err = mic_sync_dma(mdev, daddr, mvr->buf_da, + ALIGN(partlen, dma_alignment)); + if (err) + goto err; + daddr += partlen; + ubuf += partlen; + dbuf += partlen; + mvdev->out_bytes_dma += partlen; + mvdev->out_bytes += partlen; + len -= partlen; + } +memcpy: + /* + * We are copying to IO below and should ideally use something + * like copy_from_user_toio(..) if it existed. + */ + if (copy_from_user((void __force *)dbuf, ubuf, len)) { + err = -EFAULT; + goto err; + } + mvdev->out_bytes += len; + return 0; +err: + dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err); + return err; +} + +#define MIC_VRINGH_READ true + +/* The function to call to notify the card about added buffers */ +static void mic_notify(struct vringh *vrh) +{ + struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh); + struct mic_vdev *mvdev = mvrh->mvdev; + s8 db = mvdev->dc->h2c_vdev_db; + + if (db != -1) + mvdev->mdev->ops->send_intr(mvdev->mdev, db); +} + +/* Determine the total number of bytes consumed in a VRINGH KIOV */ +static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov) +{ + int i; + u32 total = iov->consumed; + + for (i = 0; i < iov->i; i++) + total += iov->iov[i].iov_len; + return total; +} + +/* + * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. + * This API is heavily based on the vringh_iov_xfer(..) implementation + * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) + * and vringh_iov_push_kern(..) directly is because there is no + * way to override the VRINGH xfer(..) routines as of v3.10. + */ +static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov, + void __user *ubuf, size_t len, bool read, int vr_idx, + size_t *out_len) +{ + int ret = 0; + size_t partlen, tot_len = 0; + + while (len && iov->i < iov->used) { + partlen = min(iov->iov[iov->i].iov_len, len); + if (read) + ret = mic_virtio_copy_to_user(mvdev, ubuf, partlen, + (u64)iov->iov[iov->i].iov_base, + iov->iov[iov->i].iov_len, + vr_idx); + else + ret = mic_virtio_copy_from_user(mvdev, ubuf, partlen, + (u64)iov->iov[iov->i].iov_base, + iov->iov[iov->i].iov_len, + vr_idx); + if (ret) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= partlen; + ubuf += partlen; + tot_len += partlen; + iov->consumed += partlen; + iov->iov[iov->i].iov_len -= partlen; + iov->iov[iov->i].iov_base += partlen; + if (!iov->iov[iov->i].iov_len) { + /* Fix up old iov element then increment. */ + iov->iov[iov->i].iov_len = iov->consumed; + iov->iov[iov->i].iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } + } + *out_len = tot_len; + return ret; +} + +/* + * Use the standard VRINGH infrastructure in the kernel to fetch new + * descriptors, initiate the copies and update the used ring. + */ +static int _mic_virtio_copy(struct mic_vdev *mvdev, + struct mic_copy_desc *copy) +{ + int ret = 0; + u32 iovcnt = copy->iovcnt; + struct iovec iov; + struct iovec __user *u_iov = copy->iov; + void __user *ubuf = NULL; + struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx]; + struct vringh_kiov *riov = &mvr->riov; + struct vringh_kiov *wiov = &mvr->wiov; + struct vringh *vrh = &mvr->vrh; + u16 *head = &mvr->head; + struct mic_vring *vr = &mvr->vring; + size_t len = 0, out_len; + + copy->out_len = 0; + /* Fetch a new IOVEC if all previous elements have been processed */ + if (riov->i == riov->used && wiov->i == wiov->used) { + ret = vringh_getdesc_kern(vrh, riov, wiov, + head, GFP_KERNEL); + /* Check if there are available descriptors */ + if (ret <= 0) + return ret; + } + while (iovcnt) { + if (!len) { + /* Copy over a new iovec from user space. */ + ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); + if (ret) { + ret = -EINVAL; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len = iov.iov_len; + ubuf = iov.iov_base; + } + /* Issue all the read descriptors first */ + ret = mic_vringh_copy(mvdev, riov, ubuf, len, MIC_VRINGH_READ, + copy->vr_idx, &out_len); + if (ret) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= out_len; + ubuf += out_len; + copy->out_len += out_len; + /* Issue the write descriptors next */ + ret = mic_vringh_copy(mvdev, wiov, ubuf, len, !MIC_VRINGH_READ, + copy->vr_idx, &out_len); + if (ret) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= out_len; + ubuf += out_len; + copy->out_len += out_len; + if (!len) { + /* One user space iovec is now completed */ + iovcnt--; + u_iov++; + } + /* Exit loop if all elements in KIOVs have been processed. */ + if (riov->i == riov->used && wiov->i == wiov->used) + break; + } + /* + * Update the used ring if a descriptor was available and some data was + * copied in/out and the user asked for a used ring update. + */ + if (*head != USHRT_MAX && copy->out_len && copy->update_used) { + u32 total = 0; + + /* Determine the total data consumed */ + total += mic_vringh_iov_consumed(riov); + total += mic_vringh_iov_consumed(wiov); + vringh_complete_kern(vrh, *head, total); + *head = USHRT_MAX; + if (vringh_need_notify_kern(vrh) > 0) + vringh_notify(vrh); + vringh_kiov_cleanup(riov); + vringh_kiov_cleanup(wiov); + /* Update avail idx for user space */ + vr->info->avail_idx = vrh->last_avail_idx; + } + return ret; +} + +static inline int mic_verify_copy_args(struct mic_vdev *mvdev, + struct mic_copy_desc *copy) +{ + if (copy->vr_idx >= mvdev->dd->num_vq) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -EINVAL); + return -EINVAL; + } + return 0; +} + +/* Copy a specified number of virtio descriptors in a chain */ +int mic_virtio_copy_desc(struct mic_vdev *mvdev, + struct mic_copy_desc *copy) +{ + int err; + struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx]; + + err = mic_verify_copy_args(mvdev, copy); + if (err) + return err; + + mutex_lock(&mvr->vr_mutex); + if (!mic_vdevup(mvdev)) { + err = -ENODEV; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + err = _mic_virtio_copy(mvdev, copy); + if (err) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, err); + } +err: + mutex_unlock(&mvr->vr_mutex); + return err; +} + +static void mic_virtio_init_post(struct mic_vdev *mvdev) +{ + struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd); + int i; + + for (i = 0; i < mvdev->dd->num_vq; i++) { + if (!le64_to_cpu(vqconfig[i].used_address)) { + dev_warn(mic_dev(mvdev), "used_address zero??\n"); + continue; + } + mvdev->mvr[i].vrh.vring.used = + (void __force *)mvdev->mdev->aper.va + + le64_to_cpu(vqconfig[i].used_address); + } + + mvdev->dc->used_address_updated = 0; + + dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n", + __func__, mvdev->virtio_id); +} + +static inline void mic_virtio_device_reset(struct mic_vdev *mvdev) +{ + int i; + + dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n", + __func__, mvdev->dd->status, mvdev->virtio_id); + + for (i = 0; i < mvdev->dd->num_vq; i++) + /* + * Avoid lockdep false positive. The + 1 is for the mic + * mutex which is held in the reset devices code path. + */ + mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1); + + /* 0 status means "reset" */ + mvdev->dd->status = 0; + mvdev->dc->vdev_reset = 0; + mvdev->dc->host_ack = 1; + + for (i = 0; i < mvdev->dd->num_vq; i++) { + struct vringh *vrh = &mvdev->mvr[i].vrh; + mvdev->mvr[i].vring.info->avail_idx = 0; + vrh->completed = 0; + vrh->last_avail_idx = 0; + vrh->last_used_idx = 0; + } + + for (i = 0; i < mvdev->dd->num_vq; i++) + mutex_unlock(&mvdev->mvr[i].vr_mutex); +} + +void mic_virtio_reset_devices(struct mic_device *mdev) +{ + struct list_head *pos, *tmp; + struct mic_vdev *mvdev; + + dev_dbg(mdev->sdev->parent, "%s\n", __func__); + + list_for_each_safe(pos, tmp, &mdev->vdev_list) { + mvdev = list_entry(pos, struct mic_vdev, list); + mic_virtio_device_reset(mvdev); + mvdev->poll_wake = 1; + wake_up(&mvdev->waitq); + } +} + +void mic_bh_handler(struct work_struct *work) +{ + struct mic_vdev *mvdev = container_of(work, struct mic_vdev, + virtio_bh_work); + + if (mvdev->dc->used_address_updated) + mic_virtio_init_post(mvdev); + + if (mvdev->dc->vdev_reset) + mic_virtio_device_reset(mvdev); + + mvdev->poll_wake = 1; + wake_up(&mvdev->waitq); +} + +static irqreturn_t mic_virtio_intr_handler(int irq, void *data) +{ + struct mic_vdev *mvdev = data; + struct mic_device *mdev = mvdev->mdev; + + mdev->ops->intr_workarounds(mdev); + schedule_work(&mvdev->virtio_bh_work); + return IRQ_HANDLED; +} + +int mic_virtio_config_change(struct mic_vdev *mvdev, + void __user *argp) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + int ret = 0, retry, i; + struct mic_bootparam *bootparam = mvdev->mdev->dp; + s8 db = bootparam->h2c_config_db; + + mutex_lock(&mvdev->mdev->mic_mutex); + for (i = 0; i < mvdev->dd->num_vq; i++) + mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1); + + if (db == -1 || mvdev->dd->type == -1) { + ret = -EIO; + goto exit; + } + + if (copy_from_user(mic_vq_configspace(mvdev->dd), + argp, mvdev->dd->config_len)) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -EFAULT); + ret = -EFAULT; + goto exit; + } + mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; + mvdev->mdev->ops->send_intr(mvdev->mdev, db); + + for (retry = 100; retry--;) { + ret = wait_event_timeout(wake, + mvdev->dc->guest_ack, msecs_to_jiffies(100)); + if (ret) + break; + } + + dev_dbg(mic_dev(mvdev), + "%s %d retry: %d\n", __func__, __LINE__, retry); + mvdev->dc->config_change = 0; + mvdev->dc->guest_ack = 0; +exit: + for (i = 0; i < mvdev->dd->num_vq; i++) + mutex_unlock(&mvdev->mvr[i].vr_mutex); + mutex_unlock(&mvdev->mdev->mic_mutex); + return ret; +} + +static int mic_copy_dp_entry(struct mic_vdev *mvdev, + void __user *argp, + __u8 *type, + struct mic_device_desc **devpage) +{ + struct mic_device *mdev = mvdev->mdev; + struct mic_device_desc dd, *dd_config, *devp; + struct mic_vqconfig *vqconfig; + int ret = 0, i; + bool slot_found = false; + + if (copy_from_user(&dd, argp, sizeof(dd))) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -EFAULT); + return -EFAULT; + } + + if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE || + dd.num_vq > MIC_MAX_VRINGS) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -EINVAL); + return -EINVAL; + } + + dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL); + if (dd_config == NULL) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -ENOMEM); + return -ENOMEM; + } + if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) { + ret = -EFAULT; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + + vqconfig = mic_vq_config(dd_config); + for (i = 0; i < dd.num_vq; i++) { + if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { + ret = -EINVAL; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + } + + /* Find the first free device page entry */ + for (i = sizeof(struct mic_bootparam); + i < MIC_DP_SIZE - mic_total_desc_size(dd_config); + i += mic_total_desc_size(devp)) { + devp = mdev->dp + i; + if (devp->type == 0 || devp->type == -1) { + slot_found = true; + break; + } + } + if (!slot_found) { + ret = -EINVAL; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + /* + * Save off the type before doing the memcpy. Type will be set in the + * end after completing all initialization for the new device. + */ + *type = dd_config->type; + dd_config->type = 0; + memcpy(devp, dd_config, mic_desc_size(dd_config)); + + *devpage = devp; +exit: + kfree(dd_config); + return ret; +} + +static void mic_init_device_ctrl(struct mic_vdev *mvdev, + struct mic_device_desc *devpage) +{ + struct mic_device_ctrl *dc; + + dc = (void *)devpage + mic_aligned_desc_size(devpage); + + dc->config_change = 0; + dc->guest_ack = 0; + dc->vdev_reset = 0; + dc->host_ack = 0; + dc->used_address_updated = 0; + dc->c2h_vdev_db = -1; + dc->h2c_vdev_db = -1; + mvdev->dc = dc; +} + +int mic_virtio_add_device(struct mic_vdev *mvdev, + void __user *argp) +{ + struct mic_device *mdev = mvdev->mdev; + struct mic_device_desc *dd = NULL; + struct mic_vqconfig *vqconfig; + int vr_size, i, j, ret; + u8 type = 0; + s8 db; + char irqname[10]; + struct mic_bootparam *bootparam = mdev->dp; + u16 num; + dma_addr_t vr_addr; + + mutex_lock(&mdev->mic_mutex); + + ret = mic_copy_dp_entry(mvdev, argp, &type, &dd); + if (ret) { + mutex_unlock(&mdev->mic_mutex); + return ret; + } + + mic_init_device_ctrl(mvdev, dd); + + mvdev->dd = dd; + mvdev->virtio_id = type; + vqconfig = mic_vq_config(dd); + INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler); + + for (i = 0; i < dd->num_vq; i++) { + struct mic_vringh *mvr = &mvdev->mvr[i]; + struct mic_vring *vr = &mvdev->mvr[i].vring; + num = le16_to_cpu(vqconfig[i].num); + mutex_init(&mvr->vr_mutex); + vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) + + sizeof(struct _mic_vring_info)); + vr->va = (void *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(vr_size)); + if (!vr->va) { + ret = -ENOMEM; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vr->len = vr_size; + vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN); + vr->info->magic = cpu_to_le32(MIC_MAGIC + mvdev->virtio_id + i); + vr_addr = mic_map_single(mdev, vr->va, vr_size); + if (mic_map_error(vr_addr)) { + free_pages((unsigned long)vr->va, get_order(vr_size)); + ret = -ENOMEM; + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vqconfig[i].address = cpu_to_le64(vr_addr); + + vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN); + ret = vringh_init_kern(&mvr->vrh, + *(u32 *)mic_vq_features(mvdev->dd), num, false, + vr->vr.desc, vr->vr.avail, vr->vr.used); + if (ret) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vringh_kiov_init(&mvr->riov, NULL, 0); + vringh_kiov_init(&mvr->wiov, NULL, 0); + mvr->head = USHRT_MAX; + mvr->mvdev = mvdev; + mvr->vrh.notify = mic_notify; + dev_dbg(mdev->sdev->parent, + "%s %d index %d va %p info %p vr_size 0x%x\n", + __func__, __LINE__, i, vr->va, vr->info, vr_size); + mvr->buf = (void *)__get_free_pages(GFP_KERNEL, + get_order(MIC_INT_DMA_BUF_SIZE)); + mvr->buf_da = mic_map_single(mvdev->mdev, mvr->buf, + MIC_INT_DMA_BUF_SIZE); + } + + snprintf(irqname, sizeof(irqname), "mic%dvirtio%d", mdev->id, + mvdev->virtio_id); + mvdev->virtio_db = mic_next_db(mdev); + mvdev->virtio_cookie = mic_request_threaded_irq(mdev, + mic_virtio_intr_handler, + NULL, irqname, mvdev, + mvdev->virtio_db, MIC_INTR_DB); + if (IS_ERR(mvdev->virtio_cookie)) { + ret = PTR_ERR(mvdev->virtio_cookie); + dev_dbg(mdev->sdev->parent, "request irq failed\n"); + goto err; + } + + mvdev->dc->c2h_vdev_db = mvdev->virtio_db; + + list_add_tail(&mvdev->list, &mdev->vdev_list); + /* + * Order the type update with previous stores. This write barrier + * is paired with the corresponding read barrier before the uncached + * system memory read of the type, on the card while scanning the + * device page. + */ + smp_wmb(); + dd->type = type; + + dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type); + + db = bootparam->h2c_config_db; + if (db != -1) + mdev->ops->send_intr(mdev, db); + mutex_unlock(&mdev->mic_mutex); + return 0; +err: + vqconfig = mic_vq_config(dd); + for (j = 0; j < i; j++) { + struct mic_vringh *mvr = &mvdev->mvr[j]; + mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address), + mvr->vring.len); + free_pages((unsigned long)mvr->vring.va, + get_order(mvr->vring.len)); + } + mutex_unlock(&mdev->mic_mutex); + return ret; +} + +void mic_virtio_del_device(struct mic_vdev *mvdev) +{ + struct list_head *pos, *tmp; + struct mic_vdev *tmp_mvdev; + struct mic_device *mdev = mvdev->mdev; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + int i, ret, retry; + struct mic_vqconfig *vqconfig; + struct mic_bootparam *bootparam = mdev->dp; + s8 db; + + mutex_lock(&mdev->mic_mutex); + db = bootparam->h2c_config_db; + if (db == -1) + goto skip_hot_remove; + dev_dbg(mdev->sdev->parent, + "Requesting hot remove id %d\n", mvdev->virtio_id); + mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; + mdev->ops->send_intr(mdev, db); + for (retry = 100; retry--;) { + ret = wait_event_timeout(wake, + mvdev->dc->guest_ack, msecs_to_jiffies(100)); + if (ret) + break; + } + dev_dbg(mdev->sdev->parent, + "Device id %d config_change %d guest_ack %d retry %d\n", + mvdev->virtio_id, mvdev->dc->config_change, + mvdev->dc->guest_ack, retry); + mvdev->dc->config_change = 0; + mvdev->dc->guest_ack = 0; +skip_hot_remove: + mic_free_irq(mdev, mvdev->virtio_cookie, mvdev); + flush_work(&mvdev->virtio_bh_work); + vqconfig = mic_vq_config(mvdev->dd); + for (i = 0; i < mvdev->dd->num_vq; i++) { + struct mic_vringh *mvr = &mvdev->mvr[i]; + + mic_unmap_single(mvdev->mdev, mvr->buf_da, + MIC_INT_DMA_BUF_SIZE); + free_pages((unsigned long)mvr->buf, + get_order(MIC_INT_DMA_BUF_SIZE)); + vringh_kiov_cleanup(&mvr->riov); + vringh_kiov_cleanup(&mvr->wiov); + mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address), + mvr->vring.len); + free_pages((unsigned long)mvr->vring.va, + get_order(mvr->vring.len)); + } + + list_for_each_safe(pos, tmp, &mdev->vdev_list) { + tmp_mvdev = list_entry(pos, struct mic_vdev, list); + if (tmp_mvdev == mvdev) { + list_del(pos); + dev_dbg(mdev->sdev->parent, + "Removing virtio device id %d\n", + mvdev->virtio_id); + break; + } + } + /* + * Order the type update with previous stores. This write barrier + * is paired with the corresponding read barrier before the uncached + * system memory read of the type, on the card while scanning the + * device page. + */ + smp_wmb(); + mvdev->dd->type = -1; + mutex_unlock(&mdev->mic_mutex); +} diff --git a/kernel/drivers/misc/mic/host/mic_virtio.h b/kernel/drivers/misc/mic/host/mic_virtio.h new file mode 100644 index 000000000..d574efb85 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_virtio.h @@ -0,0 +1,155 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef MIC_VIRTIO_H +#define MIC_VIRTIO_H + +#include <linux/virtio_config.h> +#include <linux/mic_ioctl.h> + +/* + * Note on endianness. + * 1. Host can be both BE or LE + * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail + * rings and ioreadXX/iowriteXX to access used ring. + * 3. Device page exposed by host to guest contains LE values. Guest + * accesses these using ioreadXX/iowriteXX etc. This way in general we + * obey the virtio spec according to which guest works with native + * endianness and host is aware of guest endianness and does all + * required endianness conversion. + * 4. Data provided from user space to guest (in ADD_DEVICE and + * CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be + * in guest endianness. + */ + +/** + * struct mic_vringh - Virtio ring host information. + * + * @vring: The MIC vring used for setting up user space mappings. + * @vrh: The host VRINGH used for accessing the card vrings. + * @riov: The VRINGH read kernel IOV. + * @wiov: The VRINGH write kernel IOV. + * @vr_mutex: Mutex for synchronizing access to the VRING. + * @buf: Temporary kernel buffer used to copy in/out data + * from/to the card via DMA. + * @buf_da: dma address of buf. + * @mvdev: Back pointer to MIC virtio device for vringh_notify(..). + * @head: The VRINGH head index address passed to vringh_getdesc_kern(..). + */ +struct mic_vringh { + struct mic_vring vring; + struct vringh vrh; + struct vringh_kiov riov; + struct vringh_kiov wiov; + struct mutex vr_mutex; + void *buf; + dma_addr_t buf_da; + struct mic_vdev *mvdev; + u16 head; +}; + +/** + * struct mic_vdev - Host information for a card Virtio device. + * + * @virtio_id - Virtio device id. + * @waitq - Waitqueue to allow ring3 apps to poll. + * @mdev - Back pointer to host MIC device. + * @poll_wake - Used for waking up threads blocked in poll. + * @out_bytes - Debug stats for number of bytes copied from host to card. + * @in_bytes - Debug stats for number of bytes copied from card to host. + * @out_bytes_dma - Debug stats for number of bytes copied from host to card + * using DMA. + * @in_bytes_dma - Debug stats for number of bytes copied from card to host + * using DMA. + * @tx_len_unaligned - Debug stats for number of bytes copied to the card where + * the transfer length did not have the required DMA alignment. + * @tx_dst_unaligned - Debug stats for number of bytes copied where the + * destination address on the card did not have the required DMA alignment. + * @mvr - Store per VRING data structures. + * @virtio_bh_work - Work struct used to schedule virtio bottom half handling. + * @dd - Virtio device descriptor. + * @dc - Virtio device control fields. + * @list - List of Virtio devices. + * @virtio_db - The doorbell used by the card to interrupt the host. + * @virtio_cookie - The cookie returned while requesting interrupts. + */ +struct mic_vdev { + int virtio_id; + wait_queue_head_t waitq; + struct mic_device *mdev; + int poll_wake; + unsigned long out_bytes; + unsigned long in_bytes; + unsigned long out_bytes_dma; + unsigned long in_bytes_dma; + unsigned long tx_len_unaligned; + unsigned long tx_dst_unaligned; + struct mic_vringh mvr[MIC_MAX_VRINGS]; + struct work_struct virtio_bh_work; + struct mic_device_desc *dd; + struct mic_device_ctrl *dc; + struct list_head list; + int virtio_db; + struct mic_irq *virtio_cookie; +}; + +void mic_virtio_uninit(struct mic_device *mdev); +int mic_virtio_add_device(struct mic_vdev *mvdev, + void __user *argp); +void mic_virtio_del_device(struct mic_vdev *mvdev); +int mic_virtio_config_change(struct mic_vdev *mvdev, + void __user *argp); +int mic_virtio_copy_desc(struct mic_vdev *mvdev, + struct mic_copy_desc *request); +void mic_virtio_reset_devices(struct mic_device *mdev); +void mic_bh_handler(struct work_struct *work); + +/* Helper API to obtain the MIC PCIe device */ +static inline struct device *mic_dev(struct mic_vdev *mvdev) +{ + return mvdev->mdev->sdev->parent; +} + +/* Helper API to check if a virtio device is initialized */ +static inline int mic_vdev_inited(struct mic_vdev *mvdev) +{ + /* Device has not been created yet */ + if (!mvdev->dd || !mvdev->dd->type) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -EINVAL); + return -EINVAL; + } + + /* Device has been removed/deleted */ + if (mvdev->dd->type == -1) { + dev_err(mic_dev(mvdev), "%s %d err %d\n", + __func__, __LINE__, -ENODEV); + return -ENODEV; + } + + return 0; +} + +/* Helper API to check if a virtio device is running */ +static inline bool mic_vdevup(struct mic_vdev *mvdev) +{ + return !!mvdev->dd->status; +} +#endif diff --git a/kernel/drivers/misc/mic/host/mic_x100.c b/kernel/drivers/misc/mic/host/mic_x100.c new file mode 100644 index 000000000..b7a21e11d --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_x100.c @@ -0,0 +1,582 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include <linux/fs.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/firmware.h> +#include <linux/delay.h> + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_x100.h" +#include "mic_smpt.h" + +/** + * mic_x100_write_spad - write to the scratchpad register + * @mdev: pointer to mic_device instance + * @idx: index to the scratchpad register, 0 based + * @val: the data value to put into the register + * + * This function allows writing of a 32bit value to the indexed scratchpad + * register. + * + * RETURNS: none. + */ +static void +mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val) +{ + dev_dbg(mdev->sdev->parent, "Writing 0x%x to scratch pad index %d\n", + val, idx); + mic_mmio_write(&mdev->mmio, val, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SPAD0 + idx * 4); +} + +/** + * mic_x100_read_spad - read from the scratchpad register + * @mdev: pointer to mic_device instance + * @idx: index to scratchpad register, 0 based + * + * This function allows reading of the 32bit scratchpad register. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static u32 +mic_x100_read_spad(struct mic_device *mdev, unsigned int idx) +{ + u32 val = mic_mmio_read(&mdev->mmio, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SPAD0 + idx * 4); + + dev_dbg(mdev->sdev->parent, + "Reading 0x%x from scratch pad index %d\n", val, idx); + return val; +} + +/** + * mic_x100_enable_interrupts - Enable interrupts. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_enable_interrupts(struct mic_device *mdev) +{ + u32 reg; + struct mic_mw *mw = &mdev->mmio; + u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0; + u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0; + + reg = mic_mmio_read(mw, sice0); + reg |= MIC_X100_SBOX_DBR_BITS(0xf) | MIC_X100_SBOX_DMA_BITS(0xff); + mic_mmio_write(mw, reg, sice0); + + /* + * Enable auto-clear when enabling interrupts. Applicable only for + * MSI-x. Legacy and MSI mode cannot have auto-clear enabled. + */ + if (mdev->irq_info.num_vectors > 1) { + reg = mic_mmio_read(mw, siac0); + reg |= MIC_X100_SBOX_DBR_BITS(0xf) | + MIC_X100_SBOX_DMA_BITS(0xff); + mic_mmio_write(mw, reg, siac0); + } +} + +/** + * mic_x100_disable_interrupts - Disable interrupts. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_disable_interrupts(struct mic_device *mdev) +{ + u32 reg; + struct mic_mw *mw = &mdev->mmio; + u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0; + u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0; + u32 sicc0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICC0; + + reg = mic_mmio_read(mw, sice0); + mic_mmio_write(mw, reg, sicc0); + + if (mdev->irq_info.num_vectors > 1) { + reg = mic_mmio_read(mw, siac0); + reg &= ~(MIC_X100_SBOX_DBR_BITS(0xf) | + MIC_X100_SBOX_DMA_BITS(0xff)); + mic_mmio_write(mw, reg, siac0); + } +} + +/** + * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_send_sbox_intr(struct mic_device *mdev, + int doorbell) +{ + struct mic_mw *mw = &mdev->mmio; + u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; + u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS + + apic_icr_offset); + + /* for MIC we need to make sure we "hit" the send_icr bit (13) */ + apicicr_low = (apicicr_low | (1 << 13)); + + /* Ensure that the interrupt is ordered w.r.t. previous stores. */ + wmb(); + mic_mmio_write(mw, apicicr_low, + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset); +} + +/** + * mic_x100_send_rdmasr_intr - Send an RDMASR interrupt to MIC. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_send_rdmasr_intr(struct mic_device *mdev, + int doorbell) +{ + int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); + /* Ensure that the interrupt is ordered w.r.t. previous stores. */ + wmb(); + mic_mmio_write(&mdev->mmio, 0, + MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset); +} + +/** + * __mic_x100_send_intr - Send interrupt to MIC. + * @mdev: pointer to mic_device instance + * @doorbell: doorbell number. + */ +static void mic_x100_send_intr(struct mic_device *mdev, int doorbell) +{ + int rdmasr_db; + if (doorbell < MIC_X100_NUM_SBOX_IRQ) { + mic_x100_send_sbox_intr(mdev, doorbell); + } else { + rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ + + MIC_X100_RDMASR_IRQ_BASE; + mic_x100_send_rdmasr_intr(mdev, rdmasr_db); + } +} + +/** + * mic_x100_ack_interrupt - Read the interrupt sources register and + * clear it. This function will be called in the MSI/INTx case. + * @mdev: Pointer to mic_device instance. + * + * Returns: bitmask of interrupt sources triggered. + */ +static u32 mic_x100_ack_interrupt(struct mic_device *mdev) +{ + u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0; + u32 reg = mic_mmio_read(&mdev->mmio, sicr0); + mic_mmio_write(&mdev->mmio, reg, sicr0); + return reg; +} + +/** + * mic_x100_intr_workarounds - These hardware specific workarounds are + * to be invoked everytime an interrupt is handled. + * @mdev: Pointer to mic_device instance. + * + * Returns: none + */ +static void mic_x100_intr_workarounds(struct mic_device *mdev) +{ + struct mic_mw *mw = &mdev->mmio; + + /* Clear pending bit array. */ + if (MIC_A0_STEP == mdev->stepping) + mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_MSIXPBACR); + + if (mdev->stepping >= MIC_B0_STEP) + mdev->intr_ops->enable_interrupts(mdev); +} + +/** + * mic_x100_hw_intr_init - Initialize h/w specific interrupt + * information. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_hw_intr_init(struct mic_device *mdev) +{ + mdev->intr_info = (struct mic_intr_info *)mic_x100_intr_init; +} + +/** + * mic_x100_read_msi_to_src_map - read from the MSI mapping registers + * @mdev: pointer to mic_device instance + * @idx: index to the mapping register, 0 based + * + * This function allows reading of the 32bit MSI mapping register. + * + * RETURNS: The value in the register. + */ +static u32 +mic_x100_read_msi_to_src_map(struct mic_device *mdev, int idx) +{ + return mic_mmio_read(&mdev->mmio, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_MXAR0 + idx * 4); +} + +/** + * mic_x100_program_msi_to_src_map - program the MSI mapping registers + * @mdev: pointer to mic_device instance + * @idx: index to the mapping register, 0 based + * @offset: The bit offset in the register that needs to be updated. + * @set: boolean specifying if the bit in the specified offset needs + * to be set or cleared. + * + * RETURNS: None. + */ +static void +mic_x100_program_msi_to_src_map(struct mic_device *mdev, + int idx, int offset, bool set) +{ + unsigned long reg; + struct mic_mw *mw = &mdev->mmio; + u32 mxar = MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_MXAR0 + idx * 4; + + reg = mic_mmio_read(mw, mxar); + if (set) + __set_bit(offset, ®); + else + __clear_bit(offset, ®); + mic_mmio_write(mw, reg, mxar); +} + +/* + * mic_x100_reset_fw_ready - Reset Firmware ready status field. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_reset_fw_ready(struct mic_device *mdev) +{ + mdev->ops->write_spad(mdev, MIC_X100_DOWNLOAD_INFO, 0); +} + +/* + * mic_x100_is_fw_ready - Check if firmware is ready. + * @mdev: pointer to mic_device instance + */ +static bool mic_x100_is_fw_ready(struct mic_device *mdev) +{ + u32 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); + return MIC_X100_SPAD2_DOWNLOAD_STATUS(scratch2) ? true : false; +} + +/** + * mic_x100_get_apic_id - Get bootstrap APIC ID. + * @mdev: pointer to mic_device instance + */ +static u32 mic_x100_get_apic_id(struct mic_device *mdev) +{ + u32 scratch2 = 0; + + scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); + return MIC_X100_SPAD2_APIC_ID(scratch2); +} + +/** + * mic_x100_send_firmware_intr - Send an interrupt to the firmware on MIC. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_send_firmware_intr(struct mic_device *mdev) +{ + u32 apicicr_low; + u64 apic_icr_offset = MIC_X100_SBOX_APICICR7; + int vector = MIC_X100_BSP_INTERRUPT_VECTOR; + struct mic_mw *mw = &mdev->mmio; + + /* + * For MIC we need to make sure we "hit" + * the send_icr bit (13). + */ + apicicr_low = (vector | (1 << 13)); + + mic_mmio_write(mw, mic_x100_get_apic_id(mdev), + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset + 4); + + /* Ensure that the interrupt is ordered w.r.t. previous stores. */ + wmb(); + mic_mmio_write(mw, apicicr_low, + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset); +} + +/** + * mic_x100_hw_reset - Reset the MIC device. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_hw_reset(struct mic_device *mdev) +{ + u32 reset_reg; + u32 rgcr = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_RGCR; + struct mic_mw *mw = &mdev->mmio; + + /* Ensure that the reset is ordered w.r.t. previous loads and stores */ + mb(); + /* Trigger reset */ + reset_reg = mic_mmio_read(mw, rgcr); + reset_reg |= 0x1; + mic_mmio_write(mw, reset_reg, rgcr); + /* + * It seems we really want to delay at least 1 second + * after touching reset to prevent a lot of problems. + */ + msleep(1000); +} + +/** + * mic_x100_load_command_line - Load command line to MIC. + * @mdev: pointer to mic_device instance + * @fw: the firmware image + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw) +{ + u32 len = 0; + u32 boot_mem; + char *buf; + void __iomem *cmd_line_va = mdev->aper.va + mdev->bootaddr + fw->size; +#define CMDLINE_SIZE 2048 + + boot_mem = mdev->aper.len >> 20; + buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL); + if (!buf) { + dev_err(mdev->sdev->parent, + "%s %d allocation failed\n", __func__, __LINE__); + return -ENOMEM; + } + len += snprintf(buf, CMDLINE_SIZE - len, + " mem=%dM", boot_mem); + if (mdev->cmdline) + snprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cmdline); + memcpy_toio(cmd_line_va, buf, strlen(buf) + 1); + kfree(buf); + return 0; +} + +/** + * mic_x100_load_ramdisk - Load ramdisk to MIC. + * @mdev: pointer to mic_device instance + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_load_ramdisk(struct mic_device *mdev) +{ + const struct firmware *fw; + int rc; + struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr; + + rc = request_firmware(&fw, + mdev->ramdisk, mdev->sdev->parent); + if (rc < 0) { + dev_err(mdev->sdev->parent, + "ramdisk request_firmware failed: %d %s\n", + rc, mdev->ramdisk); + goto error; + } + /* + * Typically the bootaddr for card OS is 64M + * so copy over the ramdisk @ 128M. + */ + memcpy_toio(mdev->aper.va + (mdev->bootaddr << 1), fw->data, fw->size); + iowrite32(mdev->bootaddr << 1, &bp->hdr.ramdisk_image); + iowrite32(fw->size, &bp->hdr.ramdisk_size); + release_firmware(fw); +error: + return rc; +} + +/** + * mic_x100_get_boot_addr - Get MIC boot address. + * @mdev: pointer to mic_device instance + * + * This function is called during firmware load to determine + * the address at which the OS should be downloaded in card + * memory i.e. GDDR. + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_get_boot_addr(struct mic_device *mdev) +{ + u32 scratch2, boot_addr; + int rc = 0; + + scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); + boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2); + dev_dbg(mdev->sdev->parent, "%s %d boot_addr 0x%x\n", + __func__, __LINE__, boot_addr); + if (boot_addr > (1 << 31)) { + dev_err(mdev->sdev->parent, + "incorrect bootaddr 0x%x\n", + boot_addr); + rc = -EINVAL; + goto error; + } + mdev->bootaddr = boot_addr; +error: + return rc; +} + +/** + * mic_x100_load_firmware - Load firmware to MIC. + * @mdev: pointer to mic_device instance + * @buf: buffer containing boot string including firmware/ramdisk path. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_load_firmware(struct mic_device *mdev, const char *buf) +{ + int rc; + const struct firmware *fw; + + rc = mic_x100_get_boot_addr(mdev); + if (rc) + goto error; + /* load OS */ + rc = request_firmware(&fw, mdev->firmware, mdev->sdev->parent); + if (rc < 0) { + dev_err(mdev->sdev->parent, + "ramdisk request_firmware failed: %d %s\n", + rc, mdev->firmware); + goto error; + } + if (mdev->bootaddr > mdev->aper.len - fw->size) { + rc = -EINVAL; + dev_err(mdev->sdev->parent, "%s %d rc %d bootaddr 0x%x\n", + __func__, __LINE__, rc, mdev->bootaddr); + release_firmware(fw); + goto error; + } + memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size); + mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size); + if (!strcmp(mdev->bootmode, "elf")) + goto done; + /* load command line */ + rc = mic_x100_load_command_line(mdev, fw); + if (rc) { + dev_err(mdev->sdev->parent, "%s %d rc %d\n", + __func__, __LINE__, rc); + goto error; + } + release_firmware(fw); + /* load ramdisk */ + if (mdev->ramdisk) + rc = mic_x100_load_ramdisk(mdev); +error: + dev_dbg(mdev->sdev->parent, "%s %d rc %d\n", __func__, __LINE__, rc); +done: + return rc; +} + +/** + * mic_x100_get_postcode - Get postcode status from firmware. + * @mdev: pointer to mic_device instance + * + * RETURNS: postcode. + */ +static u32 mic_x100_get_postcode(struct mic_device *mdev) +{ + return mic_mmio_read(&mdev->mmio, MIC_X100_POSTCODE); +} + +/** + * mic_x100_smpt_set - Update an SMPT entry with a DMA address. + * @mdev: pointer to mic_device instance + * + * RETURNS: none. + */ +static void +mic_x100_smpt_set(struct mic_device *mdev, dma_addr_t dma_addr, u8 index) +{ +#define SNOOP_ON (0 << 0) +#define SNOOP_OFF (1 << 0) +/* + * Sbox Smpt Reg Bits: + * Bits 31:2 Host address + * Bits 1 RSVD + * Bits 0 No snoop + */ +#define BUILD_SMPT(NO_SNOOP, HOST_ADDR) \ + (u32)(((HOST_ADDR) << 2) | ((NO_SNOOP) & 0x01)) + + uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON, + dma_addr >> mdev->smpt->info.page_shift); + mic_mmio_write(&mdev->mmio, smpt_reg_val, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SMPT00 + (4 * index)); +} + +/** + * mic_x100_smpt_hw_init - Initialize SMPT X100 specific fields. + * @mdev: pointer to mic_device instance + * + * RETURNS: none. + */ +static void mic_x100_smpt_hw_init(struct mic_device *mdev) +{ + struct mic_smpt_hw_info *info = &mdev->smpt->info; + + info->num_reg = 32; + info->page_shift = 34; + info->page_size = (1ULL << info->page_shift); + info->base = 0x8000000000ULL; +} + +struct mic_smpt_ops mic_x100_smpt_ops = { + .init = mic_x100_smpt_hw_init, + .set = mic_x100_smpt_set, +}; + +static bool mic_x100_dma_filter(struct dma_chan *chan, void *param) +{ + if (chan->device->dev->parent == (struct device *)param) + return true; + return false; +} + +struct mic_hw_ops mic_x100_ops = { + .aper_bar = MIC_X100_APER_BAR, + .mmio_bar = MIC_X100_MMIO_BAR, + .read_spad = mic_x100_read_spad, + .write_spad = mic_x100_write_spad, + .send_intr = mic_x100_send_intr, + .ack_interrupt = mic_x100_ack_interrupt, + .intr_workarounds = mic_x100_intr_workarounds, + .reset = mic_x100_hw_reset, + .reset_fw_ready = mic_x100_reset_fw_ready, + .is_fw_ready = mic_x100_is_fw_ready, + .send_firmware_intr = mic_x100_send_firmware_intr, + .load_mic_fw = mic_x100_load_firmware, + .get_postcode = mic_x100_get_postcode, + .dma_filter = mic_x100_dma_filter, +}; + +struct mic_hw_intr_ops mic_x100_intr_ops = { + .intr_init = mic_x100_hw_intr_init, + .enable_interrupts = mic_x100_enable_interrupts, + .disable_interrupts = mic_x100_disable_interrupts, + .program_msi_to_src_map = mic_x100_program_msi_to_src_map, + .read_msi_to_src_map = mic_x100_read_msi_to_src_map, +}; diff --git a/kernel/drivers/misc/mic/host/mic_x100.h b/kernel/drivers/misc/mic/host/mic_x100.h new file mode 100644 index 000000000..8b7daa182 --- /dev/null +++ b/kernel/drivers/misc/mic/host/mic_x100.h @@ -0,0 +1,98 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_X100_HW_H_ +#define _MIC_X100_HW_H_ + +#define MIC_X100_PCI_DEVICE_2250 0x2250 +#define MIC_X100_PCI_DEVICE_2251 0x2251 +#define MIC_X100_PCI_DEVICE_2252 0x2252 +#define MIC_X100_PCI_DEVICE_2253 0x2253 +#define MIC_X100_PCI_DEVICE_2254 0x2254 +#define MIC_X100_PCI_DEVICE_2255 0x2255 +#define MIC_X100_PCI_DEVICE_2256 0x2256 +#define MIC_X100_PCI_DEVICE_2257 0x2257 +#define MIC_X100_PCI_DEVICE_2258 0x2258 +#define MIC_X100_PCI_DEVICE_2259 0x2259 +#define MIC_X100_PCI_DEVICE_225a 0x225a +#define MIC_X100_PCI_DEVICE_225b 0x225b +#define MIC_X100_PCI_DEVICE_225c 0x225c +#define MIC_X100_PCI_DEVICE_225d 0x225d +#define MIC_X100_PCI_DEVICE_225e 0x225e + +#define MIC_X100_APER_BAR 0 +#define MIC_X100_MMIO_BAR 4 + +#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000 +#define MIC_X100_SBOX_SPAD0 0x0000AB20 +#define MIC_X100_SBOX_SICR0_DBR(x) ((x) & 0xf) +#define MIC_X100_SBOX_SICR0_DMA(x) (((x) >> 8) & 0xff) +#define MIC_X100_SBOX_SICE0_DBR(x) ((x) & 0xf) +#define MIC_X100_SBOX_DBR_BITS(x) ((x) & 0xf) +#define MIC_X100_SBOX_SICE0_DMA(x) (((x) >> 8) & 0xff) +#define MIC_X100_SBOX_DMA_BITS(x) (((x) & 0xff) << 8) + +#define MIC_X100_SBOX_APICICR0 0x0000A9D0 +#define MIC_X100_SBOX_SICR0 0x00009004 +#define MIC_X100_SBOX_SICE0 0x0000900C +#define MIC_X100_SBOX_SICC0 0x00009010 +#define MIC_X100_SBOX_SIAC0 0x00009014 +#define MIC_X100_SBOX_MSIXPBACR 0x00009084 +#define MIC_X100_SBOX_MXAR0 0x00009044 +#define MIC_X100_SBOX_SMPT00 0x00003100 +#define MIC_X100_SBOX_RDMASR0 0x0000B180 + +#define MIC_X100_DOORBELL_IDX_START 0 +#define MIC_X100_NUM_DOORBELL 4 +#define MIC_X100_DMA_IDX_START 8 +#define MIC_X100_NUM_DMA 8 +#define MIC_X100_ERR_IDX_START 30 +#define MIC_X100_NUM_ERR 1 + +#define MIC_X100_NUM_SBOX_IRQ 8 +#define MIC_X100_NUM_RDMASR_IRQ 8 +#define MIC_X100_RDMASR_IRQ_BASE 17 +#define MIC_X100_SPAD2_DOWNLOAD_STATUS(x) ((x) & 0x1) +#define MIC_X100_SPAD2_APIC_ID(x) (((x) >> 1) & 0x1ff) +#define MIC_X100_SPAD2_DOWNLOAD_ADDR(x) ((x) & 0xfffff000) +#define MIC_X100_SBOX_APICICR7 0x0000AA08 +#define MIC_X100_SBOX_RGCR 0x00004010 +#define MIC_X100_SBOX_SDBIC0 0x0000CC90 +#define MIC_X100_DOWNLOAD_INFO 2 +#define MIC_X100_FW_SIZE 5 +#define MIC_X100_POSTCODE 0x242c + +static const u16 mic_x100_intr_init[] = { + MIC_X100_DOORBELL_IDX_START, + MIC_X100_DMA_IDX_START, + MIC_X100_ERR_IDX_START, + MIC_X100_NUM_DOORBELL, + MIC_X100_NUM_DMA, + MIC_X100_NUM_ERR, +}; + +/* Host->Card(bootstrap) Interrupt Vector */ +#define MIC_X100_BSP_INTERRUPT_VECTOR 229 + +extern struct mic_hw_ops mic_x100_ops; +extern struct mic_smpt_ops mic_x100_smpt_ops; +extern struct mic_hw_intr_ops mic_x100_intr_ops; + +#endif diff --git a/kernel/drivers/misc/pch_phub.c b/kernel/drivers/misc/pch_phub.c new file mode 100644 index 000000000..9a17a9bab --- /dev/null +++ b/kernel/drivers/misc/pch_phub.c @@ -0,0 +1,898 @@ +/* + * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/if_ether.h> +#include <linux/ctype.h> +#include <linux/dmi.h> + +#define PHUB_STATUS 0x00 /* Status Register offset */ +#define PHUB_CONTROL 0x04 /* Control Register offset */ +#define PHUB_TIMEOUT 0x05 /* Time out value for Status Register */ +#define PCH_PHUB_ROM_WRITE_ENABLE 0x01 /* Enabling for writing ROM */ +#define PCH_PHUB_ROM_WRITE_DISABLE 0x00 /* Disabling for writing ROM */ +#define PCH_PHUB_MAC_START_ADDR_EG20T 0x14 /* MAC data area start address + offset */ +#define PCH_PHUB_MAC_START_ADDR_ML7223 0x20C /* MAC data area start address + offset */ +#define PCH_PHUB_ROM_START_ADDR_EG20T 0x80 /* ROM data area start address offset + (Intel EG20T PCH)*/ +#define PCH_PHUB_ROM_START_ADDR_ML7213 0x400 /* ROM data area start address + offset(LAPIS Semicon ML7213) + */ +#define PCH_PHUB_ROM_START_ADDR_ML7223 0x400 /* ROM data area start address + offset(LAPIS Semicon ML7223) + */ + +/* MAX number of INT_REDUCE_CONTROL registers */ +#define MAX_NUM_INT_REDUCE_CONTROL_REG 128 +#define PCI_DEVICE_ID_PCH1_PHUB 0x8801 +#define PCH_MINOR_NOS 1 +#define CLKCFG_CAN_50MHZ 0x12000000 +#define CLKCFG_CANCLK_MASK 0xFF000000 +#define CLKCFG_UART_MASK 0xFFFFFF + +/* CM-iTC */ +#define CLKCFG_UART_48MHZ (1 << 16) +#define CLKCFG_BAUDDIV (2 << 20) +#define CLKCFG_PLL2VCO (8 << 9) +#define CLKCFG_UARTCLKSEL (1 << 18) + +/* Macros for ML7213 */ +#define PCI_VENDOR_ID_ROHM 0x10db +#define PCI_DEVICE_ID_ROHM_ML7213_PHUB 0x801A + +/* Macros for ML7223 */ +#define PCI_DEVICE_ID_ROHM_ML7223_mPHUB 0x8012 /* for Bus-m */ +#define PCI_DEVICE_ID_ROHM_ML7223_nPHUB 0x8002 /* for Bus-n */ + +/* Macros for ML7831 */ +#define PCI_DEVICE_ID_ROHM_ML7831_PHUB 0x8801 + +/* SROM ACCESS Macro */ +#define PCH_WORD_ADDR_MASK (~((1 << 2) - 1)) + +/* Registers address offset */ +#define PCH_PHUB_ID_REG 0x0000 +#define PCH_PHUB_QUEUE_PRI_VAL_REG 0x0004 +#define PCH_PHUB_RC_QUEUE_MAXSIZE_REG 0x0008 +#define PCH_PHUB_BRI_QUEUE_MAXSIZE_REG 0x000C +#define PCH_PHUB_COMP_RESP_TIMEOUT_REG 0x0010 +#define PCH_PHUB_BUS_SLAVE_CONTROL_REG 0x0014 +#define PCH_PHUB_DEADLOCK_AVOID_TYPE_REG 0x0018 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG0 0x0020 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG1 0x0024 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG2 0x0028 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG3 0x002C +#define PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE 0x0040 +#define CLKCFG_REG_OFFSET 0x500 +#define FUNCSEL_REG_OFFSET 0x508 + +#define PCH_PHUB_OROM_SIZE 15360 + +/** + * struct pch_phub_reg - PHUB register structure + * @phub_id_reg: PHUB_ID register val + * @q_pri_val_reg: QUEUE_PRI_VAL register val + * @rc_q_maxsize_reg: RC_QUEUE_MAXSIZE register val + * @bri_q_maxsize_reg: BRI_QUEUE_MAXSIZE register val + * @comp_resp_timeout_reg: COMP_RESP_TIMEOUT register val + * @bus_slave_control_reg: BUS_SLAVE_CONTROL_REG register val + * @deadlock_avoid_type_reg: DEADLOCK_AVOID_TYPE register val + * @intpin_reg_wpermit_reg0: INTPIN_REG_WPERMIT register 0 val + * @intpin_reg_wpermit_reg1: INTPIN_REG_WPERMIT register 1 val + * @intpin_reg_wpermit_reg2: INTPIN_REG_WPERMIT register 2 val + * @intpin_reg_wpermit_reg3: INTPIN_REG_WPERMIT register 3 val + * @int_reduce_control_reg: INT_REDUCE_CONTROL registers val + * @clkcfg_reg: CLK CFG register val + * @funcsel_reg: Function select register value + * @pch_phub_base_address: Register base address + * @pch_phub_extrom_base_address: external rom base address + * @pch_mac_start_address: MAC address area start address + * @pch_opt_rom_start_address: Option ROM start address + * @ioh_type: Save IOH type + * @pdev: pointer to pci device struct + */ +struct pch_phub_reg { + u32 phub_id_reg; + u32 q_pri_val_reg; + u32 rc_q_maxsize_reg; + u32 bri_q_maxsize_reg; + u32 comp_resp_timeout_reg; + u32 bus_slave_control_reg; + u32 deadlock_avoid_type_reg; + u32 intpin_reg_wpermit_reg0; + u32 intpin_reg_wpermit_reg1; + u32 intpin_reg_wpermit_reg2; + u32 intpin_reg_wpermit_reg3; + u32 int_reduce_control_reg[MAX_NUM_INT_REDUCE_CONTROL_REG]; + u32 clkcfg_reg; + u32 funcsel_reg; + void __iomem *pch_phub_base_address; + void __iomem *pch_phub_extrom_base_address; + u32 pch_mac_start_address; + u32 pch_opt_rom_start_address; + int ioh_type; + struct pci_dev *pdev; +}; + +/* SROM SPEC for MAC address assignment offset */ +static const int pch_phub_mac_offset[ETH_ALEN] = {0x3, 0x2, 0x1, 0x0, 0xb, 0xa}; + +static DEFINE_MUTEX(pch_phub_mutex); + +/** + * pch_phub_read_modify_write_reg() - Reading modifying and writing register + * @reg_addr_offset: Register offset address value. + * @data: Writing value. + * @mask: Mask value. + */ +static void pch_phub_read_modify_write_reg(struct pch_phub_reg *chip, + unsigned int reg_addr_offset, + unsigned int data, unsigned int mask) +{ + void __iomem *reg_addr = chip->pch_phub_base_address + reg_addr_offset; + iowrite32(((ioread32(reg_addr) & ~mask)) | data, reg_addr); +} + +#ifdef CONFIG_PM +/* pch_phub_save_reg_conf - saves register configuration */ +static void pch_phub_save_reg_conf(struct pci_dev *pdev) +{ + unsigned int i; + struct pch_phub_reg *chip = pci_get_drvdata(pdev); + + void __iomem *p = chip->pch_phub_base_address; + + chip->phub_id_reg = ioread32(p + PCH_PHUB_ID_REG); + chip->q_pri_val_reg = ioread32(p + PCH_PHUB_QUEUE_PRI_VAL_REG); + chip->rc_q_maxsize_reg = ioread32(p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG); + chip->bri_q_maxsize_reg = ioread32(p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG); + chip->comp_resp_timeout_reg = + ioread32(p + PCH_PHUB_COMP_RESP_TIMEOUT_REG); + chip->bus_slave_control_reg = + ioread32(p + PCH_PHUB_BUS_SLAVE_CONTROL_REG); + chip->deadlock_avoid_type_reg = + ioread32(p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG); + chip->intpin_reg_wpermit_reg0 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0); + chip->intpin_reg_wpermit_reg1 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1); + chip->intpin_reg_wpermit_reg2 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2); + chip->intpin_reg_wpermit_reg3 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3); + dev_dbg(&pdev->dev, "%s : " + "chip->phub_id_reg=%x, " + "chip->q_pri_val_reg=%x, " + "chip->rc_q_maxsize_reg=%x, " + "chip->bri_q_maxsize_reg=%x, " + "chip->comp_resp_timeout_reg=%x, " + "chip->bus_slave_control_reg=%x, " + "chip->deadlock_avoid_type_reg=%x, " + "chip->intpin_reg_wpermit_reg0=%x, " + "chip->intpin_reg_wpermit_reg1=%x, " + "chip->intpin_reg_wpermit_reg2=%x, " + "chip->intpin_reg_wpermit_reg3=%x\n", __func__, + chip->phub_id_reg, + chip->q_pri_val_reg, + chip->rc_q_maxsize_reg, + chip->bri_q_maxsize_reg, + chip->comp_resp_timeout_reg, + chip->bus_slave_control_reg, + chip->deadlock_avoid_type_reg, + chip->intpin_reg_wpermit_reg0, + chip->intpin_reg_wpermit_reg1, + chip->intpin_reg_wpermit_reg2, + chip->intpin_reg_wpermit_reg3); + for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) { + chip->int_reduce_control_reg[i] = + ioread32(p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i); + dev_dbg(&pdev->dev, "%s : " + "chip->int_reduce_control_reg[%d]=%x\n", + __func__, i, chip->int_reduce_control_reg[i]); + } + chip->clkcfg_reg = ioread32(p + CLKCFG_REG_OFFSET); + if ((chip->ioh_type == 2) || (chip->ioh_type == 4)) + chip->funcsel_reg = ioread32(p + FUNCSEL_REG_OFFSET); +} + +/* pch_phub_restore_reg_conf - restore register configuration */ +static void pch_phub_restore_reg_conf(struct pci_dev *pdev) +{ + unsigned int i; + struct pch_phub_reg *chip = pci_get_drvdata(pdev); + void __iomem *p; + p = chip->pch_phub_base_address; + + iowrite32(chip->phub_id_reg, p + PCH_PHUB_ID_REG); + iowrite32(chip->q_pri_val_reg, p + PCH_PHUB_QUEUE_PRI_VAL_REG); + iowrite32(chip->rc_q_maxsize_reg, p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG); + iowrite32(chip->bri_q_maxsize_reg, p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG); + iowrite32(chip->comp_resp_timeout_reg, + p + PCH_PHUB_COMP_RESP_TIMEOUT_REG); + iowrite32(chip->bus_slave_control_reg, + p + PCH_PHUB_BUS_SLAVE_CONTROL_REG); + iowrite32(chip->deadlock_avoid_type_reg, + p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG); + iowrite32(chip->intpin_reg_wpermit_reg0, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0); + iowrite32(chip->intpin_reg_wpermit_reg1, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1); + iowrite32(chip->intpin_reg_wpermit_reg2, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2); + iowrite32(chip->intpin_reg_wpermit_reg3, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3); + dev_dbg(&pdev->dev, "%s : " + "chip->phub_id_reg=%x, " + "chip->q_pri_val_reg=%x, " + "chip->rc_q_maxsize_reg=%x, " + "chip->bri_q_maxsize_reg=%x, " + "chip->comp_resp_timeout_reg=%x, " + "chip->bus_slave_control_reg=%x, " + "chip->deadlock_avoid_type_reg=%x, " + "chip->intpin_reg_wpermit_reg0=%x, " + "chip->intpin_reg_wpermit_reg1=%x, " + "chip->intpin_reg_wpermit_reg2=%x, " + "chip->intpin_reg_wpermit_reg3=%x\n", __func__, + chip->phub_id_reg, + chip->q_pri_val_reg, + chip->rc_q_maxsize_reg, + chip->bri_q_maxsize_reg, + chip->comp_resp_timeout_reg, + chip->bus_slave_control_reg, + chip->deadlock_avoid_type_reg, + chip->intpin_reg_wpermit_reg0, + chip->intpin_reg_wpermit_reg1, + chip->intpin_reg_wpermit_reg2, + chip->intpin_reg_wpermit_reg3); + for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) { + iowrite32(chip->int_reduce_control_reg[i], + p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i); + dev_dbg(&pdev->dev, "%s : " + "chip->int_reduce_control_reg[%d]=%x\n", + __func__, i, chip->int_reduce_control_reg[i]); + } + + iowrite32(chip->clkcfg_reg, p + CLKCFG_REG_OFFSET); + if ((chip->ioh_type == 2) || (chip->ioh_type == 4)) + iowrite32(chip->funcsel_reg, p + FUNCSEL_REG_OFFSET); +} +#endif + +/** + * pch_phub_read_serial_rom() - Reading Serial ROM + * @offset_address: Serial ROM offset address to read. + * @data: Read buffer for specified Serial ROM value. + */ +static void pch_phub_read_serial_rom(struct pch_phub_reg *chip, + unsigned int offset_address, u8 *data) +{ + void __iomem *mem_addr = chip->pch_phub_extrom_base_address + + offset_address; + + *data = ioread8(mem_addr); +} + +/** + * pch_phub_write_serial_rom() - Writing Serial ROM + * @offset_address: Serial ROM offset address. + * @data: Serial ROM value to write. + */ +static int pch_phub_write_serial_rom(struct pch_phub_reg *chip, + unsigned int offset_address, u8 data) +{ + void __iomem *mem_addr = chip->pch_phub_extrom_base_address + + (offset_address & PCH_WORD_ADDR_MASK); + int i; + unsigned int word_data; + unsigned int pos; + unsigned int mask; + pos = (offset_address % 4) * 8; + mask = ~(0xFF << pos); + + iowrite32(PCH_PHUB_ROM_WRITE_ENABLE, + chip->pch_phub_extrom_base_address + PHUB_CONTROL); + + word_data = ioread32(mem_addr); + iowrite32((word_data & mask) | (u32)data << pos, mem_addr); + + i = 0; + while (ioread8(chip->pch_phub_extrom_base_address + + PHUB_STATUS) != 0x00) { + msleep(1); + if (i == PHUB_TIMEOUT) + return -ETIMEDOUT; + i++; + } + + iowrite32(PCH_PHUB_ROM_WRITE_DISABLE, + chip->pch_phub_extrom_base_address + PHUB_CONTROL); + + return 0; +} + +/** + * pch_phub_read_serial_rom_val() - Read Serial ROM value + * @offset_address: Serial ROM address offset value. + * @data: Serial ROM value to read. + */ +static void pch_phub_read_serial_rom_val(struct pch_phub_reg *chip, + unsigned int offset_address, u8 *data) +{ + unsigned int mem_addr; + + mem_addr = chip->pch_mac_start_address + + pch_phub_mac_offset[offset_address]; + + pch_phub_read_serial_rom(chip, mem_addr, data); +} + +/** + * pch_phub_write_serial_rom_val() - writing Serial ROM value + * @offset_address: Serial ROM address offset value. + * @data: Serial ROM value. + */ +static int pch_phub_write_serial_rom_val(struct pch_phub_reg *chip, + unsigned int offset_address, u8 data) +{ + int retval; + unsigned int mem_addr; + + mem_addr = chip->pch_mac_start_address + + pch_phub_mac_offset[offset_address]; + + retval = pch_phub_write_serial_rom(chip, mem_addr, data); + + return retval; +} + +/* pch_phub_gbe_serial_rom_conf - makes Serial ROM header format configuration + * for Gigabit Ethernet MAC address + */ +static int pch_phub_gbe_serial_rom_conf(struct pch_phub_reg *chip) +{ + int retval; + + retval = pch_phub_write_serial_rom(chip, 0x0b, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x0a, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x09, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x08, 0x02); + + retval |= pch_phub_write_serial_rom(chip, 0x0f, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x0e, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x0d, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x0c, 0x80); + + retval |= pch_phub_write_serial_rom(chip, 0x13, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x12, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x11, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x10, 0x18); + + retval |= pch_phub_write_serial_rom(chip, 0x1b, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x1a, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x19, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x18, 0x19); + + retval |= pch_phub_write_serial_rom(chip, 0x23, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x22, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x21, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x20, 0x3a); + + retval |= pch_phub_write_serial_rom(chip, 0x27, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x26, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x25, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x24, 0x00); + + return retval; +} + +/* pch_phub_gbe_serial_rom_conf_mp - makes SerialROM header format configuration + * for Gigabit Ethernet MAC address + */ +static int pch_phub_gbe_serial_rom_conf_mp(struct pch_phub_reg *chip) +{ + int retval; + u32 offset_addr; + + offset_addr = 0x200; + retval = pch_phub_write_serial_rom(chip, 0x03 + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x02 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x01 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x00 + offset_addr, 0x02); + + retval |= pch_phub_write_serial_rom(chip, 0x07 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x06 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x05 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x04 + offset_addr, 0x80); + + retval |= pch_phub_write_serial_rom(chip, 0x0b + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x0a + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x09 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x08 + offset_addr, 0x18); + + retval |= pch_phub_write_serial_rom(chip, 0x13 + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x12 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x11 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x10 + offset_addr, 0x19); + + retval |= pch_phub_write_serial_rom(chip, 0x1b + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x1a + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x19 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x18 + offset_addr, 0x3a); + + retval |= pch_phub_write_serial_rom(chip, 0x1f + offset_addr, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x1e + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x1d + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x1c + offset_addr, 0x00); + + return retval; +} + +/** + * pch_phub_read_gbe_mac_addr() - Read Gigabit Ethernet MAC address + * @offset_address: Gigabit Ethernet MAC address offset value. + * @data: Buffer of the Gigabit Ethernet MAC address value. + */ +static void pch_phub_read_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data) +{ + int i; + for (i = 0; i < ETH_ALEN; i++) + pch_phub_read_serial_rom_val(chip, i, &data[i]); +} + +/** + * pch_phub_write_gbe_mac_addr() - Write MAC address + * @offset_address: Gigabit Ethernet MAC address offset value. + * @data: Gigabit Ethernet MAC address value. + */ +static int pch_phub_write_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data) +{ + int retval; + int i; + + if ((chip->ioh_type == 1) || (chip->ioh_type == 5)) /* EG20T or ML7831*/ + retval = pch_phub_gbe_serial_rom_conf(chip); + else /* ML7223 */ + retval = pch_phub_gbe_serial_rom_conf_mp(chip); + if (retval) + return retval; + + for (i = 0; i < ETH_ALEN; i++) { + retval = pch_phub_write_serial_rom_val(chip, i, data[i]); + if (retval) + return retval; + } + + return retval; +} + +static ssize_t pch_phub_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int rom_signature; + unsigned char rom_length; + unsigned int tmp; + unsigned int addr_offset; + unsigned int orom_size; + int ret; + int err; + ssize_t rom_size; + + struct pch_phub_reg *chip = + dev_get_drvdata(container_of(kobj, struct device, kobj)); + + ret = mutex_lock_interruptible(&pch_phub_mutex); + if (ret) { + err = -ERESTARTSYS; + goto return_err_nomutex; + } + + /* Get Rom signature */ + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) + goto exrom_map_err; + + pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address, + (unsigned char *)&rom_signature); + rom_signature &= 0xff; + pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address + 1, + (unsigned char *)&tmp); + rom_signature |= (tmp & 0xff) << 8; + if (rom_signature == 0xAA55) { + pch_phub_read_serial_rom(chip, + chip->pch_opt_rom_start_address + 2, + &rom_length); + orom_size = rom_length * 512; + if (orom_size < off) { + addr_offset = 0; + goto return_ok; + } + if (orom_size < count) { + addr_offset = 0; + goto return_ok; + } + + for (addr_offset = 0; addr_offset < count; addr_offset++) { + pch_phub_read_serial_rom(chip, + chip->pch_opt_rom_start_address + addr_offset + off, + &buf[addr_offset]); + } + } else { + err = -ENODATA; + goto return_err; + } +return_ok: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + mutex_unlock(&pch_phub_mutex); + return addr_offset; + +return_err: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); +exrom_map_err: + mutex_unlock(&pch_phub_mutex); +return_err_nomutex: + return err; +} + +static ssize_t pch_phub_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + int err; + unsigned int addr_offset; + int ret; + ssize_t rom_size; + struct pch_phub_reg *chip = + dev_get_drvdata(container_of(kobj, struct device, kobj)); + + ret = mutex_lock_interruptible(&pch_phub_mutex); + if (ret) + return -ERESTARTSYS; + + if (off > PCH_PHUB_OROM_SIZE) { + addr_offset = 0; + goto return_ok; + } + if (count > PCH_PHUB_OROM_SIZE) { + addr_offset = 0; + goto return_ok; + } + + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) { + err = -ENOMEM; + goto exrom_map_err; + } + + for (addr_offset = 0; addr_offset < count; addr_offset++) { + if (PCH_PHUB_OROM_SIZE < off + addr_offset) + goto return_ok; + + ret = pch_phub_write_serial_rom(chip, + chip->pch_opt_rom_start_address + addr_offset + off, + buf[addr_offset]); + if (ret) { + err = ret; + goto return_err; + } + } + +return_ok: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + mutex_unlock(&pch_phub_mutex); + return addr_offset; + +return_err: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + +exrom_map_err: + mutex_unlock(&pch_phub_mutex); + return err; +} + +static ssize_t show_pch_mac(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u8 mac[8]; + struct pch_phub_reg *chip = dev_get_drvdata(dev); + ssize_t rom_size; + + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) + return -ENOMEM; + + pch_phub_read_gbe_mac_addr(chip, mac); + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + + return sprintf(buf, "%pM\n", mac); +} + +static ssize_t store_pch_mac(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + u8 mac[ETH_ALEN]; + ssize_t rom_size; + struct pch_phub_reg *chip = dev_get_drvdata(dev); + int ret; + + if (!mac_pton(buf, mac)) + return -EINVAL; + + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) + return -ENOMEM; + + ret = pch_phub_write_gbe_mac_addr(chip, mac); + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + if (ret) + return ret; + + return count; +} + +static DEVICE_ATTR(pch_mac, S_IRUGO | S_IWUSR, show_pch_mac, store_pch_mac); + +static struct bin_attribute pch_bin_attr = { + .attr = { + .name = "pch_firmware", + .mode = S_IRUGO | S_IWUSR, + }, + .size = PCH_PHUB_OROM_SIZE + 1, + .read = pch_phub_bin_read, + .write = pch_phub_bin_write, +}; + +static int pch_phub_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int ret; + struct pch_phub_reg *chip; + + chip = kzalloc(sizeof(struct pch_phub_reg), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, + "%s : pci_enable_device FAILED(ret=%d)", __func__, ret); + goto err_pci_enable_dev; + } + dev_dbg(&pdev->dev, "%s : pci_enable_device returns %d\n", __func__, + ret); + + ret = pci_request_regions(pdev, KBUILD_MODNAME); + if (ret) { + dev_err(&pdev->dev, + "%s : pci_request_regions FAILED(ret=%d)", __func__, ret); + goto err_req_regions; + } + dev_dbg(&pdev->dev, "%s : " + "pci_request_regions returns %d\n", __func__, ret); + + chip->pch_phub_base_address = pci_iomap(pdev, 1, 0); + + + if (chip->pch_phub_base_address == NULL) { + dev_err(&pdev->dev, "%s : pci_iomap FAILED", __func__); + ret = -ENOMEM; + goto err_pci_iomap; + } + dev_dbg(&pdev->dev, "%s : pci_iomap SUCCESS and value " + "in pch_phub_base_address variable is %p\n", __func__, + chip->pch_phub_base_address); + + chip->pdev = pdev; /* Save pci device struct */ + + if (id->driver_data == 1) { /* EG20T PCH */ + const char *board_name; + + ret = sysfs_create_file(&pdev->dev.kobj, + &dev_attr_pch_mac.attr); + if (ret) + goto err_sysfs_create; + + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto exit_bin_attr; + + pch_phub_read_modify_write_reg(chip, + (unsigned int)CLKCFG_REG_OFFSET, + CLKCFG_CAN_50MHZ, + CLKCFG_CANCLK_MASK); + + /* quirk for CM-iTC board */ + board_name = dmi_get_system_info(DMI_BOARD_NAME); + if (board_name && strstr(board_name, "CM-iTC")) + pch_phub_read_modify_write_reg(chip, + (unsigned int)CLKCFG_REG_OFFSET, + CLKCFG_UART_48MHZ | CLKCFG_BAUDDIV | + CLKCFG_PLL2VCO | CLKCFG_UARTCLKSEL, + CLKCFG_UART_MASK); + + /* set the prefech value */ + iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14); + /* set the interrupt delay value */ + iowrite32(0x25, chip->pch_phub_base_address + 0x44); + chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T; + } else if (id->driver_data == 2) { /* ML7213 IOH */ + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto err_sysfs_create; + /* set the prefech value + * Device2(USB OHCI #1/ USB EHCI #1/ USB Device):a + * Device4(SDIO #0,1,2):f + * Device6(SATA 2):f + * Device8(USB OHCI #0/ USB EHCI #0):a + */ + iowrite32(0x000affa0, chip->pch_phub_base_address + 0x14); + chip->pch_opt_rom_start_address =\ + PCH_PHUB_ROM_START_ADDR_ML7213; + } else if (id->driver_data == 3) { /* ML7223 IOH Bus-m*/ + /* set the prefech value + * Device8(GbE) + */ + iowrite32(0x000a0000, chip->pch_phub_base_address + 0x14); + /* set the interrupt delay value */ + iowrite32(0x25, chip->pch_phub_base_address + 0x140); + chip->pch_opt_rom_start_address =\ + PCH_PHUB_ROM_START_ADDR_ML7223; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223; + } else if (id->driver_data == 4) { /* ML7223 IOH Bus-n*/ + ret = sysfs_create_file(&pdev->dev.kobj, + &dev_attr_pch_mac.attr); + if (ret) + goto err_sysfs_create; + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto exit_bin_attr; + /* set the prefech value + * Device2(USB OHCI #0,1,2,3/ USB EHCI #0):a + * Device4(SDIO #0,1):f + * Device6(SATA 2):f + */ + iowrite32(0x0000ffa0, chip->pch_phub_base_address + 0x14); + chip->pch_opt_rom_start_address =\ + PCH_PHUB_ROM_START_ADDR_ML7223; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223; + } else if (id->driver_data == 5) { /* ML7831 */ + ret = sysfs_create_file(&pdev->dev.kobj, + &dev_attr_pch_mac.attr); + if (ret) + goto err_sysfs_create; + + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto exit_bin_attr; + + /* set the prefech value */ + iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14); + /* set the interrupt delay value */ + iowrite32(0x25, chip->pch_phub_base_address + 0x44); + chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T; + } + + chip->ioh_type = id->driver_data; + pci_set_drvdata(pdev, chip); + + return 0; +exit_bin_attr: + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr); + +err_sysfs_create: + pci_iounmap(pdev, chip->pch_phub_base_address); +err_pci_iomap: + pci_release_regions(pdev); +err_req_regions: + pci_disable_device(pdev); +err_pci_enable_dev: + kfree(chip); + dev_err(&pdev->dev, "%s returns %d\n", __func__, ret); + return ret; +} + +static void pch_phub_remove(struct pci_dev *pdev) +{ + struct pch_phub_reg *chip = pci_get_drvdata(pdev); + + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr); + sysfs_remove_bin_file(&pdev->dev.kobj, &pch_bin_attr); + pci_iounmap(pdev, chip->pch_phub_base_address); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(chip); +} + +#ifdef CONFIG_PM + +static int pch_phub_suspend(struct pci_dev *pdev, pm_message_t state) +{ + int ret; + + pch_phub_save_reg_conf(pdev); + ret = pci_save_state(pdev); + if (ret) { + dev_err(&pdev->dev, + " %s -pci_save_state returns %d\n", __func__, ret); + return ret; + } + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int pch_phub_resume(struct pci_dev *pdev) +{ + int ret; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, + "%s-pci_enable_device failed(ret=%d) ", __func__, ret); + return ret; + } + + pci_enable_wake(pdev, PCI_D3hot, 0); + pch_phub_restore_reg_conf(pdev); + + return 0; +} +#else +#define pch_phub_suspend NULL +#define pch_phub_resume NULL +#endif /* CONFIG_PM */ + +static struct pci_device_id pch_phub_pcidev_id[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH1_PHUB), 1, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7213_PHUB), 2, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_mPHUB), 3, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_nPHUB), 4, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7831_PHUB), 5, }, + { } +}; +MODULE_DEVICE_TABLE(pci, pch_phub_pcidev_id); + +static struct pci_driver pch_phub_driver = { + .name = "pch_phub", + .id_table = pch_phub_pcidev_id, + .probe = pch_phub_probe, + .remove = pch_phub_remove, + .suspend = pch_phub_suspend, + .resume = pch_phub_resume +}; + +module_pci_driver(pch_phub_driver); + +MODULE_DESCRIPTION("Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7223) PHUB"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/phantom.c b/kernel/drivers/misc/phantom.c new file mode 100644 index 000000000..30754927f --- /dev/null +++ b/kernel/drivers/misc/phantom.c @@ -0,0 +1,571 @@ +/* + * Copyright (C) 2005-2007 Jiri Slaby <jirislaby@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * You need a userspace library to cooperate with this driver. It (and other + * info) may be obtained here: + * http://www.fi.muni.cz/~xslaby/phantom.html + * or alternatively, you might use OpenHaptics provided by Sensable. + */ + +#include <linux/compat.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/interrupt.h> +#include <linux/cdev.h> +#include <linux/slab.h> +#include <linux/phantom.h> +#include <linux/sched.h> +#include <linux/mutex.h> + +#include <linux/atomic.h> +#include <asm/io.h> + +#define PHANTOM_VERSION "n0.9.8" + +#define PHANTOM_MAX_MINORS 8 + +#define PHN_IRQCTL 0x4c /* irq control in caddr space */ + +#define PHB_RUNNING 1 +#define PHB_NOT_OH 2 + +static DEFINE_MUTEX(phantom_mutex); +static struct class *phantom_class; +static int phantom_major; + +struct phantom_device { + unsigned int opened; + void __iomem *caddr; + u32 __iomem *iaddr; + u32 __iomem *oaddr; + unsigned long status; + atomic_t counter; + + wait_queue_head_t wait; + struct cdev cdev; + + struct mutex open_lock; + spinlock_t regs_lock; + + /* used in NOT_OH mode */ + struct phm_regs oregs; + u32 ctl_reg; +}; + +static unsigned char phantom_devices[PHANTOM_MAX_MINORS]; + +static int phantom_status(struct phantom_device *dev, unsigned long newstat) +{ + pr_debug("phantom_status %lx %lx\n", dev->status, newstat); + + if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) { + atomic_set(&dev->counter, 0); + iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL); + iowrite32(0x43, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + } else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) { + iowrite32(0, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + } + + dev->status = newstat; + + return 0; +} + +/* + * File ops + */ + +static long phantom_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct phantom_device *dev = file->private_data; + struct phm_regs rs; + struct phm_reg r; + void __user *argp = (void __user *)arg; + unsigned long flags; + unsigned int i; + + switch (cmd) { + case PHN_SETREG: + case PHN_SET_REG: + if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + if (r.reg > 7) + return -EINVAL; + + spin_lock_irqsave(&dev->regs_lock, flags); + if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) && + phantom_status(dev, dev->status | PHB_RUNNING)){ + spin_unlock_irqrestore(&dev->regs_lock, flags); + return -ENODEV; + } + + pr_debug("phantom: writing %x to %u\n", r.value, r.reg); + + /* preserve amp bit (don't allow to change it when in NOT_OH) */ + if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) { + r.value &= ~PHN_CTL_AMP; + r.value |= dev->ctl_reg & PHN_CTL_AMP; + dev->ctl_reg = r.value; + } + + iowrite32(r.value, dev->iaddr + r.reg); + ioread32(dev->iaddr); /* PCI posting */ + + if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ)) + phantom_status(dev, dev->status & ~PHB_RUNNING); + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + case PHN_SETREGS: + case PHN_SET_REGS: + if (copy_from_user(&rs, argp, sizeof(rs))) + return -EFAULT; + + pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask); + spin_lock_irqsave(&dev->regs_lock, flags); + if (dev->status & PHB_NOT_OH) + memcpy(&dev->oregs, &rs, sizeof(rs)); + else { + u32 m = min(rs.count, 8U); + for (i = 0; i < m; i++) + if (rs.mask & BIT(i)) + iowrite32(rs.values[i], dev->oaddr + i); + ioread32(dev->iaddr); /* PCI posting */ + } + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + case PHN_GETREG: + case PHN_GET_REG: + if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + if (r.reg > 7) + return -EINVAL; + + r.value = ioread32(dev->iaddr + r.reg); + + if (copy_to_user(argp, &r, sizeof(r))) + return -EFAULT; + break; + case PHN_GETREGS: + case PHN_GET_REGS: { + u32 m; + + if (copy_from_user(&rs, argp, sizeof(rs))) + return -EFAULT; + + m = min(rs.count, 8U); + + pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask); + spin_lock_irqsave(&dev->regs_lock, flags); + for (i = 0; i < m; i++) + if (rs.mask & BIT(i)) + rs.values[i] = ioread32(dev->iaddr + i); + atomic_set(&dev->counter, 0); + spin_unlock_irqrestore(&dev->regs_lock, flags); + + if (copy_to_user(argp, &rs, sizeof(rs))) + return -EFAULT; + break; + } case PHN_NOT_OH: + spin_lock_irqsave(&dev->regs_lock, flags); + if (dev->status & PHB_RUNNING) { + printk(KERN_ERR "phantom: you need to set NOT_OH " + "before you start the device!\n"); + spin_unlock_irqrestore(&dev->regs_lock, flags); + return -EINVAL; + } + dev->status |= PHB_NOT_OH; + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + default: + return -ENOTTY; + } + + return 0; +} + +#ifdef CONFIG_COMPAT +static long phantom_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { + cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT); + cmd |= sizeof(void *) << _IOC_SIZESHIFT; + } + return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define phantom_compat_ioctl NULL +#endif + +static int phantom_open(struct inode *inode, struct file *file) +{ + struct phantom_device *dev = container_of(inode->i_cdev, + struct phantom_device, cdev); + + mutex_lock(&phantom_mutex); + nonseekable_open(inode, file); + + if (mutex_lock_interruptible(&dev->open_lock)) { + mutex_unlock(&phantom_mutex); + return -ERESTARTSYS; + } + + if (dev->opened) { + mutex_unlock(&dev->open_lock); + mutex_unlock(&phantom_mutex); + return -EINVAL; + } + + WARN_ON(dev->status & PHB_NOT_OH); + + file->private_data = dev; + + atomic_set(&dev->counter, 0); + dev->opened++; + mutex_unlock(&dev->open_lock); + mutex_unlock(&phantom_mutex); + return 0; +} + +static int phantom_release(struct inode *inode, struct file *file) +{ + struct phantom_device *dev = file->private_data; + + mutex_lock(&dev->open_lock); + + dev->opened = 0; + phantom_status(dev, dev->status & ~PHB_RUNNING); + dev->status &= ~PHB_NOT_OH; + + mutex_unlock(&dev->open_lock); + + return 0; +} + +static unsigned int phantom_poll(struct file *file, poll_table *wait) +{ + struct phantom_device *dev = file->private_data; + unsigned int mask = 0; + + pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter)); + poll_wait(file, &dev->wait, wait); + + if (!(dev->status & PHB_RUNNING)) + mask = POLLERR; + else if (atomic_read(&dev->counter)) + mask = POLLIN | POLLRDNORM; + + pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter)); + + return mask; +} + +static const struct file_operations phantom_file_ops = { + .open = phantom_open, + .release = phantom_release, + .unlocked_ioctl = phantom_ioctl, + .compat_ioctl = phantom_compat_ioctl, + .poll = phantom_poll, + .llseek = no_llseek, +}; + +static irqreturn_t phantom_isr(int irq, void *data) +{ + struct phantom_device *dev = data; + unsigned int i; + u32 ctl; + + spin_lock(&dev->regs_lock); + ctl = ioread32(dev->iaddr + PHN_CONTROL); + if (!(ctl & PHN_CTL_IRQ)) { + spin_unlock(&dev->regs_lock); + return IRQ_NONE; + } + + iowrite32(0, dev->iaddr); + iowrite32(0xc0, dev->iaddr); + + if (dev->status & PHB_NOT_OH) { + struct phm_regs *r = &dev->oregs; + u32 m = min(r->count, 8U); + + for (i = 0; i < m; i++) + if (r->mask & BIT(i)) + iowrite32(r->values[i], dev->oaddr + i); + + dev->ctl_reg ^= PHN_CTL_AMP; + iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL); + } + spin_unlock(&dev->regs_lock); + + ioread32(dev->iaddr); /* PCI posting */ + + atomic_inc(&dev->counter); + wake_up_interruptible(&dev->wait); + + return IRQ_HANDLED; +} + +/* + * Init and deinit driver + */ + +static unsigned int phantom_get_free(void) +{ + unsigned int i; + + for (i = 0; i < PHANTOM_MAX_MINORS; i++) + if (phantom_devices[i] == 0) + break; + + return i; +} + +static int phantom_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + struct phantom_device *pht; + unsigned int minor; + int retval; + + retval = pci_enable_device(pdev); + if (retval) { + dev_err(&pdev->dev, "pci_enable_device failed!\n"); + goto err; + } + + minor = phantom_get_free(); + if (minor == PHANTOM_MAX_MINORS) { + dev_err(&pdev->dev, "too many devices found!\n"); + retval = -EIO; + goto err_dis; + } + + phantom_devices[minor] = 1; + + retval = pci_request_regions(pdev, "phantom"); + if (retval) { + dev_err(&pdev->dev, "pci_request_regions failed!\n"); + goto err_null; + } + + retval = -ENOMEM; + pht = kzalloc(sizeof(*pht), GFP_KERNEL); + if (pht == NULL) { + dev_err(&pdev->dev, "unable to allocate device\n"); + goto err_reg; + } + + pht->caddr = pci_iomap(pdev, 0, 0); + if (pht->caddr == NULL) { + dev_err(&pdev->dev, "can't remap conf space\n"); + goto err_fr; + } + pht->iaddr = pci_iomap(pdev, 2, 0); + if (pht->iaddr == NULL) { + dev_err(&pdev->dev, "can't remap input space\n"); + goto err_unmc; + } + pht->oaddr = pci_iomap(pdev, 3, 0); + if (pht->oaddr == NULL) { + dev_err(&pdev->dev, "can't remap output space\n"); + goto err_unmi; + } + + mutex_init(&pht->open_lock); + spin_lock_init(&pht->regs_lock); + init_waitqueue_head(&pht->wait); + cdev_init(&pht->cdev, &phantom_file_ops); + pht->cdev.owner = THIS_MODULE; + + iowrite32(0, pht->caddr + PHN_IRQCTL); + ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */ + retval = request_irq(pdev->irq, phantom_isr, + IRQF_SHARED, "phantom", pht); + if (retval) { + dev_err(&pdev->dev, "can't establish ISR\n"); + goto err_unmo; + } + + retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1); + if (retval) { + dev_err(&pdev->dev, "chardev registration failed\n"); + goto err_irq; + } + + if (IS_ERR(device_create(phantom_class, &pdev->dev, + MKDEV(phantom_major, minor), NULL, + "phantom%u", minor))) + dev_err(&pdev->dev, "can't create device\n"); + + pci_set_drvdata(pdev, pht); + + return 0; +err_irq: + free_irq(pdev->irq, pht); +err_unmo: + pci_iounmap(pdev, pht->oaddr); +err_unmi: + pci_iounmap(pdev, pht->iaddr); +err_unmc: + pci_iounmap(pdev, pht->caddr); +err_fr: + kfree(pht); +err_reg: + pci_release_regions(pdev); +err_null: + phantom_devices[minor] = 0; +err_dis: + pci_disable_device(pdev); +err: + return retval; +} + +static void phantom_remove(struct pci_dev *pdev) +{ + struct phantom_device *pht = pci_get_drvdata(pdev); + unsigned int minor = MINOR(pht->cdev.dev); + + device_destroy(phantom_class, MKDEV(phantom_major, minor)); + + cdev_del(&pht->cdev); + + iowrite32(0, pht->caddr + PHN_IRQCTL); + ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */ + free_irq(pdev->irq, pht); + + pci_iounmap(pdev, pht->oaddr); + pci_iounmap(pdev, pht->iaddr); + pci_iounmap(pdev, pht->caddr); + + kfree(pht); + + pci_release_regions(pdev); + + phantom_devices[minor] = 0; + + pci_disable_device(pdev); +} + +#ifdef CONFIG_PM +static int phantom_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct phantom_device *dev = pci_get_drvdata(pdev); + + iowrite32(0, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + + synchronize_irq(pdev->irq); + + return 0; +} + +static int phantom_resume(struct pci_dev *pdev) +{ + struct phantom_device *dev = pci_get_drvdata(pdev); + + iowrite32(0, dev->caddr + PHN_IRQCTL); + + return 0; +} +#else +#define phantom_suspend NULL +#define phantom_resume NULL +#endif + +static struct pci_device_id phantom_pci_tbl[] = { + { .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050, + .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050, + .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, phantom_pci_tbl); + +static struct pci_driver phantom_pci_driver = { + .name = "phantom", + .id_table = phantom_pci_tbl, + .probe = phantom_probe, + .remove = phantom_remove, + .suspend = phantom_suspend, + .resume = phantom_resume +}; + +static CLASS_ATTR_STRING(version, 0444, PHANTOM_VERSION); + +static int __init phantom_init(void) +{ + int retval; + dev_t dev; + + phantom_class = class_create(THIS_MODULE, "phantom"); + if (IS_ERR(phantom_class)) { + retval = PTR_ERR(phantom_class); + printk(KERN_ERR "phantom: can't register phantom class\n"); + goto err; + } + retval = class_create_file(phantom_class, &class_attr_version.attr); + if (retval) { + printk(KERN_ERR "phantom: can't create sysfs version file\n"); + goto err_class; + } + + retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom"); + if (retval) { + printk(KERN_ERR "phantom: can't register character device\n"); + goto err_attr; + } + phantom_major = MAJOR(dev); + + retval = pci_register_driver(&phantom_pci_driver); + if (retval) { + printk(KERN_ERR "phantom: can't register pci driver\n"); + goto err_unchr; + } + + printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", " + "init OK\n"); + + return 0; +err_unchr: + unregister_chrdev_region(dev, PHANTOM_MAX_MINORS); +err_attr: + class_remove_file(phantom_class, &class_attr_version.attr); +err_class: + class_destroy(phantom_class); +err: + return retval; +} + +static void __exit phantom_exit(void) +{ + pci_unregister_driver(&phantom_pci_driver); + + unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS); + + class_remove_file(phantom_class, &class_attr_version.attr); + class_destroy(phantom_class); + + pr_debug("phantom: module successfully removed\n"); +} + +module_init(phantom_init); +module_exit(phantom_exit); + +MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>"); +MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(PHANTOM_VERSION); diff --git a/kernel/drivers/misc/pti.c b/kernel/drivers/misc/pti.c new file mode 100644 index 000000000..eda38cbe8 --- /dev/null +++ b/kernel/drivers/misc/pti.c @@ -0,0 +1,988 @@ +/* + * pti.c - PTI driver for cJTAG data extration + * + * Copyright (C) Intel 2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * The PTI (Parallel Trace Interface) driver directs trace data routed from + * various parts in the system out through the Intel Penwell PTI port and + * out of the mobile device for analysis with a debugging tool + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, + * compact JTAG, standard. + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/tty.h> +#include <linux/tty_driver.h> +#include <linux/pci.h> +#include <linux/mutex.h> +#include <linux/miscdevice.h> +#include <linux/pti.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#define DRIVERNAME "pti" +#define PCINAME "pciPTI" +#define TTYNAME "ttyPTI" +#define CHARNAME "pti" +#define PTITTY_MINOR_START 0 +#define PTITTY_MINOR_NUM 2 +#define MAX_APP_IDS 16 /* 128 channel ids / u8 bit size */ +#define MAX_OS_IDS 16 /* 128 channel ids / u8 bit size */ +#define MAX_MODEM_IDS 16 /* 128 channel ids / u8 bit size */ +#define MODEM_BASE_ID 71 /* modem master ID address */ +#define CONTROL_ID 72 /* control master ID address */ +#define CONSOLE_ID 73 /* console master ID address */ +#define OS_BASE_ID 74 /* base OS master ID address */ +#define APP_BASE_ID 80 /* base App master ID address */ +#define CONTROL_FRAME_LEN 32 /* PTI control frame maximum size */ +#define USER_COPY_SIZE 8192 /* 8Kb buffer for user space copy */ +#define APERTURE_14 0x3800000 /* offset to first OS write addr */ +#define APERTURE_LEN 0x400000 /* address length */ + +struct pti_tty { + struct pti_masterchannel *mc; +}; + +struct pti_dev { + struct tty_port port[PTITTY_MINOR_NUM]; + unsigned long pti_addr; + unsigned long aperture_base; + void __iomem *pti_ioaddr; + u8 ia_app[MAX_APP_IDS]; + u8 ia_os[MAX_OS_IDS]; + u8 ia_modem[MAX_MODEM_IDS]; +}; + +/* + * This protects access to ia_app, ia_os, and ia_modem, + * which keeps track of channels allocated in + * an aperture write id. + */ +static DEFINE_MUTEX(alloclock); + +static const struct pci_device_id pci_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)}, + {0} +}; + +static struct tty_driver *pti_tty_driver; +static struct pti_dev *drv_data; + +static unsigned int pti_console_channel; +static unsigned int pti_control_channel; + +/** + * pti_write_to_aperture()- The private write function to PTI HW. + * + * @mc: The 'aperture'. It's part of a write address that holds + * a master and channel ID. + * @buf: Data being written to the HW that will ultimately be seen + * in a debugging tool (Fido, Lauterbach). + * @len: Size of buffer. + * + * Since each aperture is specified by a unique + * master/channel ID, no two processes will be writing + * to the same aperture at the same time so no lock is required. The + * PTI-Output agent will send these out in the order that they arrived, and + * thus, it will intermix these messages. The debug tool can then later + * regroup the appropriate message segments together reconstituting each + * message. + */ +static void pti_write_to_aperture(struct pti_masterchannel *mc, + u8 *buf, + int len) +{ + int dwordcnt; + int final; + int i; + u32 ptiword; + u32 __iomem *aperture; + u8 *p = buf; + + /* + * calculate the aperture offset from the base using the master and + * channel id's. + */ + aperture = drv_data->pti_ioaddr + (mc->master << 15) + + (mc->channel << 8); + + dwordcnt = len >> 2; + final = len - (dwordcnt << 2); /* final = trailing bytes */ + if (final == 0 && dwordcnt != 0) { /* always need a final dword */ + final += 4; + dwordcnt--; + } + + for (i = 0; i < dwordcnt; i++) { + ptiword = be32_to_cpu(*(u32 *)p); + p += 4; + iowrite32(ptiword, aperture); + } + + aperture += PTI_LASTDWORD_DTS; /* adding DTS signals that is EOM */ + + ptiword = 0; + for (i = 0; i < final; i++) + ptiword |= *p++ << (24-(8*i)); + + iowrite32(ptiword, aperture); + return; +} + +/** + * pti_control_frame_built_and_sent()- control frame build and send function. + * + * @mc: The master / channel structure on which the function + * built a control frame. + * @thread_name: The thread name associated with the master / channel or + * 'NULL' if using the 'current' global variable. + * + * To be able to post process the PTI contents on host side, a control frame + * is added before sending any PTI content. So the host side knows on + * each PTI frame the name of the thread using a dedicated master / channel. + * The thread name is retrieved from 'current' global variable if 'thread_name' + * is 'NULL', else it is retrieved from 'thread_name' parameter. + * This function builds this frame and sends it to a master ID CONTROL_ID. + * The overhead is only 32 bytes since the driver only writes to HW + * in 32 byte chunks. + */ +static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc, + const char *thread_name) +{ + /* + * Since we access the comm member in current's task_struct, we only + * need to be as large as what 'comm' in that structure is. + */ + char comm[TASK_COMM_LEN]; + struct pti_masterchannel mccontrol = {.master = CONTROL_ID, + .channel = 0}; + const char *thread_name_p; + const char *control_format = "%3d %3d %s"; + u8 control_frame[CONTROL_FRAME_LEN]; + + if (!thread_name) { + if (!in_interrupt()) + get_task_comm(comm, current); + else + strncpy(comm, "Interrupt", TASK_COMM_LEN); + + /* Absolutely ensure our buffer is zero terminated. */ + comm[TASK_COMM_LEN-1] = 0; + thread_name_p = comm; + } else { + thread_name_p = thread_name; + } + + mccontrol.channel = pti_control_channel; + pti_control_channel = (pti_control_channel + 1) & 0x7f; + + snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master, + mc->channel, thread_name_p); + pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame)); +} + +/** + * pti_write_full_frame_to_aperture()- high level function to + * write to PTI. + * + * @mc: The 'aperture'. It's part of a write address that holds + * a master and channel ID. + * @buf: Data being written to the HW that will ultimately be seen + * in a debugging tool (Fido, Lauterbach). + * @len: Size of buffer. + * + * All threads sending data (either console, user space application, ...) + * are calling the high level function to write to PTI meaning that it is + * possible to add a control frame before sending the content. + */ +static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc, + const unsigned char *buf, + int len) +{ + pti_control_frame_built_and_sent(mc, NULL); + pti_write_to_aperture(mc, (u8 *)buf, len); +} + +/** + * get_id()- Allocate a master and channel ID. + * + * @id_array: an array of bits representing what channel + * id's are allocated for writing. + * @max_ids: The max amount of available write IDs to use. + * @base_id: The starting SW channel ID, based on the Intel + * PTI arch. + * @thread_name: The thread name associated with the master / channel or + * 'NULL' if using the 'current' global variable. + * + * Returns: + * pti_masterchannel struct with master, channel ID address + * 0 for error + * + * Each bit in the arrays ia_app and ia_os correspond to a master and + * channel id. The bit is one if the id is taken and 0 if free. For + * every master there are 128 channel id's. + */ +static struct pti_masterchannel *get_id(u8 *id_array, + int max_ids, + int base_id, + const char *thread_name) +{ + struct pti_masterchannel *mc; + int i, j, mask; + + mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL); + if (mc == NULL) + return NULL; + + /* look for a byte with a free bit */ + for (i = 0; i < max_ids; i++) + if (id_array[i] != 0xff) + break; + if (i == max_ids) { + kfree(mc); + return NULL; + } + /* find the bit in the 128 possible channel opportunities */ + mask = 0x80; + for (j = 0; j < 8; j++) { + if ((id_array[i] & mask) == 0) + break; + mask >>= 1; + } + + /* grab it */ + id_array[i] |= mask; + mc->master = base_id; + mc->channel = ((i & 0xf)<<3) + j; + /* write new master Id / channel Id allocation to channel control */ + pti_control_frame_built_and_sent(mc, thread_name); + return mc; +} + +/* + * The following three functions: + * pti_request_mastercahannel(), mipi_release_masterchannel() + * and pti_writedata() are an API for other kernel drivers to + * access PTI. + */ + +/** + * pti_request_masterchannel()- Kernel API function used to allocate + * a master, channel ID address + * to write to PTI HW. + * + * @type: 0- request Application master, channel aperture ID + * write address. + * 1- request OS master, channel aperture ID write + * address. + * 2- request Modem master, channel aperture ID + * write address. + * Other values, error. + * @thread_name: The thread name associated with the master / channel or + * 'NULL' if using the 'current' global variable. + * + * Returns: + * pti_masterchannel struct + * 0 for error + */ +struct pti_masterchannel *pti_request_masterchannel(u8 type, + const char *thread_name) +{ + struct pti_masterchannel *mc; + + mutex_lock(&alloclock); + + switch (type) { + + case 0: + mc = get_id(drv_data->ia_app, MAX_APP_IDS, + APP_BASE_ID, thread_name); + break; + + case 1: + mc = get_id(drv_data->ia_os, MAX_OS_IDS, + OS_BASE_ID, thread_name); + break; + + case 2: + mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS, + MODEM_BASE_ID, thread_name); + break; + default: + mc = NULL; + } + + mutex_unlock(&alloclock); + return mc; +} +EXPORT_SYMBOL_GPL(pti_request_masterchannel); + +/** + * pti_release_masterchannel()- Kernel API function used to release + * a master, channel ID address + * used to write to PTI HW. + * + * @mc: master, channel apeture ID address to be released. This + * will de-allocate the structure via kfree(). + */ +void pti_release_masterchannel(struct pti_masterchannel *mc) +{ + u8 master, channel, i; + + mutex_lock(&alloclock); + + if (mc) { + master = mc->master; + channel = mc->channel; + + if (master == APP_BASE_ID) { + i = channel >> 3; + drv_data->ia_app[i] &= ~(0x80>>(channel & 0x7)); + } else if (master == OS_BASE_ID) { + i = channel >> 3; + drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7)); + } else { + i = channel >> 3; + drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7)); + } + + kfree(mc); + } + + mutex_unlock(&alloclock); +} +EXPORT_SYMBOL_GPL(pti_release_masterchannel); + +/** + * pti_writedata()- Kernel API function used to write trace + * debugging data to PTI HW. + * + * @mc: Master, channel aperture ID address to write to. + * Null value will return with no write occurring. + * @buf: Trace debuging data to write to the PTI HW. + * Null value will return with no write occurring. + * @count: Size of buf. Value of 0 or a negative number will + * return with no write occuring. + */ +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count) +{ + /* + * since this function is exported, this is treated like an + * API function, thus, all parameters should + * be checked for validity. + */ + if ((mc != NULL) && (buf != NULL) && (count > 0)) + pti_write_to_aperture(mc, buf, count); + return; +} +EXPORT_SYMBOL_GPL(pti_writedata); + +/* + * for the tty_driver_*() basic function descriptions, see tty_driver.h. + * Specific header comments made for PTI-related specifics. + */ + +/** + * pti_tty_driver_open()- Open an Application master, channel aperture + * ID to the PTI device via tty device. + * + * @tty: tty interface. + * @filp: filp interface pased to tty_port_open() call. + * + * Returns: + * int, 0 for success + * otherwise, fail value + * + * The main purpose of using the tty device interface is for + * each tty port to have a unique PTI write aperture. In an + * example use case, ttyPTI0 gets syslogd and an APP aperture + * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route + * modem messages into PTI. Modem trace data does not have to + * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct + * master IDs. These messages go through the PTI HW and out of + * the handheld platform and to the Fido/Lauterbach device. + */ +static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp) +{ + /* + * we actually want to allocate a new channel per open, per + * system arch. HW gives more than plenty channels for a single + * system task to have its own channel to write trace data. This + * also removes a locking requirement for the actual write + * procedure. + */ + return tty_port_open(tty->port, tty, filp); +} + +/** + * pti_tty_driver_close()- close tty device and release Application + * master, channel aperture ID to the PTI device via tty device. + * + * @tty: tty interface. + * @filp: filp interface pased to tty_port_close() call. + * + * The main purpose of using the tty device interface is to route + * syslog daemon messages to the PTI HW and out of the handheld platform + * and to the Fido/Lauterbach device. + */ +static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp) +{ + tty_port_close(tty->port, tty, filp); +} + +/** + * pti_tty_install()- Used to set up specific master-channels + * to tty ports for organizational purposes when + * tracing viewed from debuging tools. + * + * @driver: tty driver information. + * @tty: tty struct containing pti information. + * + * Returns: + * 0 for success + * otherwise, error + */ +static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty) +{ + int idx = tty->index; + struct pti_tty *pti_tty_data; + int ret = tty_standard_install(driver, tty); + + if (ret == 0) { + pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL); + if (pti_tty_data == NULL) + return -ENOMEM; + + if (idx == PTITTY_MINOR_START) + pti_tty_data->mc = pti_request_masterchannel(0, NULL); + else + pti_tty_data->mc = pti_request_masterchannel(2, NULL); + + if (pti_tty_data->mc == NULL) { + kfree(pti_tty_data); + return -ENXIO; + } + tty->driver_data = pti_tty_data; + } + + return ret; +} + +/** + * pti_tty_cleanup()- Used to de-allocate master-channel resources + * tied to tty's of this driver. + * + * @tty: tty struct containing pti information. + */ +static void pti_tty_cleanup(struct tty_struct *tty) +{ + struct pti_tty *pti_tty_data = tty->driver_data; + if (pti_tty_data == NULL) + return; + pti_release_masterchannel(pti_tty_data->mc); + kfree(pti_tty_data); + tty->driver_data = NULL; +} + +/** + * pti_tty_driver_write()- Write trace debugging data through the char + * interface to the PTI HW. Part of the misc device implementation. + * + * @filp: Contains private data which is used to obtain + * master, channel write ID. + * @data: trace data to be written. + * @len: # of byte to write. + * + * Returns: + * int, # of bytes written + * otherwise, error + */ +static int pti_tty_driver_write(struct tty_struct *tty, + const unsigned char *buf, int len) +{ + struct pti_tty *pti_tty_data = tty->driver_data; + if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) { + pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len); + return len; + } + /* + * we can't write to the pti hardware if the private driver_data + * and the mc address is not there. + */ + else + return -EFAULT; +} + +/** + * pti_tty_write_room()- Always returns 2048. + * + * @tty: contains tty info of the pti driver. + */ +static int pti_tty_write_room(struct tty_struct *tty) +{ + return 2048; +} + +/** + * pti_char_open()- Open an Application master, channel aperture + * ID to the PTI device. Part of the misc device implementation. + * + * @inode: not used. + * @filp: Output- will have a masterchannel struct set containing + * the allocated application PTI aperture write address. + * + * Returns: + * int, 0 for success + * otherwise, a fail value + */ +static int pti_char_open(struct inode *inode, struct file *filp) +{ + struct pti_masterchannel *mc; + + /* + * We really do want to fail immediately if + * pti_request_masterchannel() fails, + * before assigning the value to filp->private_data. + * Slightly easier to debug if this driver needs debugging. + */ + mc = pti_request_masterchannel(0, NULL); + if (mc == NULL) + return -ENOMEM; + filp->private_data = mc; + return 0; +} + +/** + * pti_char_release()- Close a char channel to the PTI device. Part + * of the misc device implementation. + * + * @inode: Not used in this implementaiton. + * @filp: Contains private_data that contains the master, channel + * ID to be released by the PTI device. + * + * Returns: + * always 0 + */ +static int pti_char_release(struct inode *inode, struct file *filp) +{ + pti_release_masterchannel(filp->private_data); + filp->private_data = NULL; + return 0; +} + +/** + * pti_char_write()- Write trace debugging data through the char + * interface to the PTI HW. Part of the misc device implementation. + * + * @filp: Contains private data which is used to obtain + * master, channel write ID. + * @data: trace data to be written. + * @len: # of byte to write. + * @ppose: Not used in this function implementation. + * + * Returns: + * int, # of bytes written + * otherwise, error value + * + * Notes: From side discussions with Alan Cox and experimenting + * with PTI debug HW like Nokia's Fido box and Lauterbach + * devices, 8192 byte write buffer used by USER_COPY_SIZE was + * deemed an appropriate size for this type of usage with + * debugging HW. + */ +static ssize_t pti_char_write(struct file *filp, const char __user *data, + size_t len, loff_t *ppose) +{ + struct pti_masterchannel *mc; + void *kbuf; + const char __user *tmp; + size_t size = USER_COPY_SIZE; + size_t n = 0; + + tmp = data; + mc = filp->private_data; + + kbuf = kmalloc(size, GFP_KERNEL); + if (kbuf == NULL) { + pr_err("%s(%d): buf allocation failed\n", + __func__, __LINE__); + return -ENOMEM; + } + + do { + if (len - n > USER_COPY_SIZE) + size = USER_COPY_SIZE; + else + size = len - n; + + if (copy_from_user(kbuf, tmp, size)) { + kfree(kbuf); + return n ? n : -EFAULT; + } + + pti_write_to_aperture(mc, kbuf, size); + n += size; + tmp += size; + + } while (len > n); + + kfree(kbuf); + return len; +} + +static const struct tty_operations pti_tty_driver_ops = { + .open = pti_tty_driver_open, + .close = pti_tty_driver_close, + .write = pti_tty_driver_write, + .write_room = pti_tty_write_room, + .install = pti_tty_install, + .cleanup = pti_tty_cleanup +}; + +static const struct file_operations pti_char_driver_ops = { + .owner = THIS_MODULE, + .write = pti_char_write, + .open = pti_char_open, + .release = pti_char_release, +}; + +static struct miscdevice pti_char_driver = { + .minor = MISC_DYNAMIC_MINOR, + .name = CHARNAME, + .fops = &pti_char_driver_ops +}; + +/** + * pti_console_write()- Write to the console that has been acquired. + * + * @c: Not used in this implementaiton. + * @buf: Data to be written. + * @len: Length of buf. + */ +static void pti_console_write(struct console *c, const char *buf, unsigned len) +{ + static struct pti_masterchannel mc = {.master = CONSOLE_ID, + .channel = 0}; + + mc.channel = pti_console_channel; + pti_console_channel = (pti_console_channel + 1) & 0x7f; + + pti_write_full_frame_to_aperture(&mc, buf, len); +} + +/** + * pti_console_device()- Return the driver tty structure and set the + * associated index implementation. + * + * @c: Console device of the driver. + * @index: index associated with c. + * + * Returns: + * always value of pti_tty_driver structure when this function + * is called. + */ +static struct tty_driver *pti_console_device(struct console *c, int *index) +{ + *index = c->index; + return pti_tty_driver; +} + +/** + * pti_console_setup()- Initialize console variables used by the driver. + * + * @c: Not used. + * @opts: Not used. + * + * Returns: + * always 0. + */ +static int pti_console_setup(struct console *c, char *opts) +{ + pti_console_channel = 0; + pti_control_channel = 0; + return 0; +} + +/* + * pti_console struct, used to capture OS printk()'s and shift + * out to the PTI device for debugging. This cannot be + * enabled upon boot because of the possibility of eating + * any serial console printk's (race condition discovered). + * The console should be enabled upon when the tty port is + * used for the first time. Since the primary purpose for + * the tty port is to hook up syslog to it, the tty port + * will be open for a really long time. + */ +static struct console pti_console = { + .name = TTYNAME, + .write = pti_console_write, + .device = pti_console_device, + .setup = pti_console_setup, + .flags = CON_PRINTBUFFER, + .index = 0, +}; + +/** + * pti_port_activate()- Used to start/initialize any items upon + * first opening of tty_port(). + * + * @port- The tty port number of the PTI device. + * @tty- The tty struct associated with this device. + * + * Returns: + * always returns 0 + * + * Notes: The primary purpose of the PTI tty port 0 is to hook + * the syslog daemon to it; thus this port will be open for a + * very long time. + */ +static int pti_port_activate(struct tty_port *port, struct tty_struct *tty) +{ + if (port->tty->index == PTITTY_MINOR_START) + console_start(&pti_console); + return 0; +} + +/** + * pti_port_shutdown()- Used to stop/shutdown any items upon the + * last tty port close. + * + * @port- The tty port number of the PTI device. + * + * Notes: The primary purpose of the PTI tty port 0 is to hook + * the syslog daemon to it; thus this port will be open for a + * very long time. + */ +static void pti_port_shutdown(struct tty_port *port) +{ + if (port->tty->index == PTITTY_MINOR_START) + console_stop(&pti_console); +} + +static const struct tty_port_operations tty_port_ops = { + .activate = pti_port_activate, + .shutdown = pti_port_shutdown, +}; + +/* + * Note the _probe() call sets everything up and ties the char and tty + * to successfully detecting the PTI device on the pci bus. + */ + +/** + * pti_pci_probe()- Used to detect pti on the pci bus and set + * things up in the driver. + * + * @pdev- pci_dev struct values for pti. + * @ent- pci_device_id struct for pti driver. + * + * Returns: + * 0 for success + * otherwise, error + */ +static int pti_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + unsigned int a; + int retval = -EINVAL; + int pci_bar = 1; + + dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__, + __func__, __LINE__, pdev->vendor, pdev->device); + + retval = misc_register(&pti_char_driver); + if (retval) { + pr_err("%s(%d): CHAR registration failed of pti driver\n", + __func__, __LINE__); + pr_err("%s(%d): Error value returned: %d\n", + __func__, __LINE__, retval); + goto err; + } + + retval = pci_enable_device(pdev); + if (retval != 0) { + dev_err(&pdev->dev, + "%s: pci_enable_device() returned error %d\n", + __func__, retval); + goto err_unreg_misc; + } + + drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL); + if (drv_data == NULL) { + retval = -ENOMEM; + dev_err(&pdev->dev, + "%s(%d): kmalloc() returned NULL memory.\n", + __func__, __LINE__); + goto err_disable_pci; + } + drv_data->pti_addr = pci_resource_start(pdev, pci_bar); + + retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev)); + if (retval != 0) { + dev_err(&pdev->dev, + "%s(%d): pci_request_region() returned error %d\n", + __func__, __LINE__, retval); + goto err_free_dd; + } + drv_data->aperture_base = drv_data->pti_addr+APERTURE_14; + drv_data->pti_ioaddr = + ioremap_nocache((u32)drv_data->aperture_base, + APERTURE_LEN); + if (!drv_data->pti_ioaddr) { + retval = -ENOMEM; + goto err_rel_reg; + } + + pci_set_drvdata(pdev, drv_data); + + for (a = 0; a < PTITTY_MINOR_NUM; a++) { + struct tty_port *port = &drv_data->port[a]; + tty_port_init(port); + port->ops = &tty_port_ops; + + tty_port_register_device(port, pti_tty_driver, a, &pdev->dev); + } + + register_console(&pti_console); + + return 0; +err_rel_reg: + pci_release_region(pdev, pci_bar); +err_free_dd: + kfree(drv_data); +err_disable_pci: + pci_disable_device(pdev); +err_unreg_misc: + misc_deregister(&pti_char_driver); +err: + return retval; +} + +/** + * pti_pci_remove()- Driver exit method to remove PTI from + * PCI bus. + * @pdev: variable containing pci info of PTI. + */ +static void pti_pci_remove(struct pci_dev *pdev) +{ + struct pti_dev *drv_data = pci_get_drvdata(pdev); + unsigned int a; + + unregister_console(&pti_console); + + for (a = 0; a < PTITTY_MINOR_NUM; a++) { + tty_unregister_device(pti_tty_driver, a); + tty_port_destroy(&drv_data->port[a]); + } + + iounmap(drv_data->pti_ioaddr); + kfree(drv_data); + pci_release_region(pdev, 1); + pci_disable_device(pdev); + + misc_deregister(&pti_char_driver); +} + +static struct pci_driver pti_pci_driver = { + .name = PCINAME, + .id_table = pci_ids, + .probe = pti_pci_probe, + .remove = pti_pci_remove, +}; + +/** + * + * pti_init()- Overall entry/init call to the pti driver. + * It starts the registration process with the kernel. + * + * Returns: + * int __init, 0 for success + * otherwise value is an error + * + */ +static int __init pti_init(void) +{ + int retval = -EINVAL; + + /* First register module as tty device */ + + pti_tty_driver = alloc_tty_driver(PTITTY_MINOR_NUM); + if (pti_tty_driver == NULL) { + pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n", + __func__, __LINE__); + return -ENOMEM; + } + + pti_tty_driver->driver_name = DRIVERNAME; + pti_tty_driver->name = TTYNAME; + pti_tty_driver->major = 0; + pti_tty_driver->minor_start = PTITTY_MINOR_START; + pti_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM; + pti_tty_driver->subtype = SYSTEM_TYPE_SYSCONS; + pti_tty_driver->flags = TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV; + pti_tty_driver->init_termios = tty_std_termios; + + tty_set_operations(pti_tty_driver, &pti_tty_driver_ops); + + retval = tty_register_driver(pti_tty_driver); + if (retval) { + pr_err("%s(%d): TTY registration failed of pti driver\n", + __func__, __LINE__); + pr_err("%s(%d): Error value returned: %d\n", + __func__, __LINE__, retval); + + goto put_tty; + } + + retval = pci_register_driver(&pti_pci_driver); + if (retval) { + pr_err("%s(%d): PCI registration failed of pti driver\n", + __func__, __LINE__); + pr_err("%s(%d): Error value returned: %d\n", + __func__, __LINE__, retval); + goto unreg_tty; + } + + return 0; +unreg_tty: + tty_unregister_driver(pti_tty_driver); +put_tty: + put_tty_driver(pti_tty_driver); + pti_tty_driver = NULL; + return retval; +} + +/** + * pti_exit()- Unregisters this module as a tty and pci driver. + */ +static void __exit pti_exit(void) +{ + tty_unregister_driver(pti_tty_driver); + pci_unregister_driver(&pti_pci_driver); + put_tty_driver(pti_tty_driver); +} + +module_init(pti_init); +module_exit(pti_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ken Mills, Jay Freyensee"); +MODULE_DESCRIPTION("PTI Driver"); + diff --git a/kernel/drivers/misc/sgi-gru/Makefile b/kernel/drivers/misc/sgi-gru/Makefile new file mode 100644 index 000000000..0003a1d56 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/Makefile @@ -0,0 +1,5 @@ +ccflags-$(CONFIG_SGI_GRU_DEBUG) := -DDEBUG + +obj-$(CONFIG_SGI_GRU) := gru.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o + diff --git a/kernel/drivers/misc/sgi-gru/gru.h b/kernel/drivers/misc/sgi-gru/gru.h new file mode 100644 index 000000000..3ad76cd18 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/gru.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_H__ +#define __GRU_H__ + +/* + * GRU architectural definitions + */ +#define GRU_CACHE_LINE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 +#define GRU_CB_BASE 0 +#define GRU_DS_BASE 0x20000 + +/* + * Size used to map GRU GSeg + */ +#if defined(CONFIG_IA64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) +#elif defined(CONFIG_X86_64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ +#else +#error "Unsupported architecture" +#endif + +/* + * Structure for obtaining GRU resource information + */ +struct gru_chiplet_info { + int node; + int chiplet; + int blade; + int total_dsr_bytes; + int total_cbr; + int total_user_dsr_bytes; + int total_user_cbr; + int free_user_dsr_bytes; + int free_user_cbr; +}; + +/* + * Statictics kept for each context. + */ +struct gru_gseg_statistics { + unsigned long fmm_tlbmiss; + unsigned long upm_tlbmiss; + unsigned long tlbdropin; + unsigned long context_stolen; + unsigned long reserved[10]; +}; + +/* Flags for GRU options on the gru_create_context() call */ +/* Select one of the follow 4 options to specify how TLB misses are handled */ +#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ +#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */ +#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to + handle fault */ +#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */ +#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */ + + + +#endif /* __GRU_H__ */ diff --git a/kernel/drivers/misc/sgi-gru/gru_instructions.h b/kernel/drivers/misc/sgi-gru/gru_instructions.h new file mode 100644 index 000000000..04d5170ac --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/gru_instructions.h @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_INSTRUCTIONS_H__ +#define __GRU_INSTRUCTIONS_H__ + +extern int gru_check_status_proc(void *cb); +extern int gru_wait_proc(void *cb); +extern void gru_wait_abort_proc(void *cb); + + + +/* + * Architecture dependent functions + */ + +#if defined(CONFIG_IA64) +#include <linux/compiler.h> +#include <asm/intrinsics.h> +#define __flush_cache(p) ia64_fc((unsigned long)p) +/* Use volatile on IA64 to ensure ordering via st4.rel */ +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ + } while (0) +#elif defined(CONFIG_X86_64) +#include <asm/cacheflush.h> +#define __flush_cache(p) clflush(p) +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *(unsigned long *)p = v; \ + } while (0) +#else +#error "Unsupported architecture" +#endif + +/* + * Control block status and exception codes + */ +#define CBS_IDLE 0 +#define CBS_EXCEPTION 1 +#define CBS_ACTIVE 2 +#define CBS_CALL_OS 3 + +/* CB substatus bitmasks */ +#define CBSS_MSG_QUEUE_MASK 7 +#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8 + +/* CB substatus message queue values (low 3 bits of substatus) */ +#define CBSS_NO_ERROR 0 +#define CBSS_LB_OVERFLOWED 1 +#define CBSS_QLIMIT_REACHED 2 +#define CBSS_PAGE_OVERFLOW 3 +#define CBSS_AMO_NACKED 4 +#define CBSS_PUT_NACKED 5 + +/* + * Structure used to fetch exception detail for CBs that terminate with + * CBS_EXCEPTION + */ +struct control_block_extended_exc_detail { + unsigned long cb; + int opc; + int ecause; + int exopc; + long exceptdet0; + int exceptdet1; + int cbrstate; + int cbrexecstatus; +}; + +/* + * Instruction formats + */ + +/* + * Generic instruction format. + * This definition has precise bit field definitions. + */ +struct gru_instruction_bits { + /* DW 0 - low */ + unsigned int icmd: 1; + unsigned char ima: 3; /* CB_DelRep, unmapped mode */ + unsigned char reserved0: 4; + unsigned int xtype: 3; + unsigned int iaa0: 2; + unsigned int iaa1: 2; + unsigned char reserved1: 1; + unsigned char opc: 8; /* opcode */ + unsigned char exopc: 8; /* extended opcode */ + /* DW 0 - high */ + unsigned int idef2: 22; /* TRi0 */ + unsigned char reserved2: 2; + unsigned char istatus: 2; + unsigned char isubstatus:4; + unsigned char reserved3: 1; + unsigned char tlb_fault_color: 1; + /* DW 1 */ + unsigned long idef4; /* 42 bits: TRi1, BufSize */ + /* DW 2-6 */ + unsigned long idef1; /* BAddr0 */ + unsigned long idef5; /* Nelem */ + unsigned long idef6; /* Stride, Operand1 */ + unsigned long idef3; /* BAddr1, Value, Operand2 */ + unsigned long reserved4; + /* DW 7 */ + unsigned long avalue; /* AValue */ +}; + +/* + * Generic instruction with friendlier names. This format is used + * for inline instructions. + */ +struct gru_instruction { + /* DW 0 */ + union { + unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */ + struct { + unsigned int op32; + unsigned int tri0; + }; + }; + unsigned long tri1_bufsize; /* DW 1 */ + unsigned long baddr0; /* DW 2 */ + unsigned long nelem; /* DW 3 */ + unsigned long op1_stride; /* DW 4 */ + unsigned long op2_value_baddr1; /* DW 5 */ + unsigned long reserved0; /* DW 6 */ + unsigned long avalue; /* DW 7 */ +}; + +/* Some shifts and masks for the low 64 bits of a GRU command */ +#define GRU_CB_ICMD_SHFT 0 +#define GRU_CB_ICMD_MASK 0x1 +#define GRU_CB_XTYPE_SHFT 8 +#define GRU_CB_XTYPE_MASK 0x7 +#define GRU_CB_IAA0_SHFT 11 +#define GRU_CB_IAA0_MASK 0x3 +#define GRU_CB_IAA1_SHFT 13 +#define GRU_CB_IAA1_MASK 0x3 +#define GRU_CB_IMA_SHFT 1 +#define GRU_CB_IMA_MASK 0x3 +#define GRU_CB_OPC_SHFT 16 +#define GRU_CB_OPC_MASK 0xff +#define GRU_CB_EXOPC_SHFT 24 +#define GRU_CB_EXOPC_MASK 0xff +#define GRU_IDEF2_SHFT 32 +#define GRU_IDEF2_MASK 0x3ffff +#define GRU_ISTATUS_SHFT 56 +#define GRU_ISTATUS_MASK 0x3 + +/* GRU instruction opcodes (opc field) */ +#define OP_NOP 0x00 +#define OP_BCOPY 0x01 +#define OP_VLOAD 0x02 +#define OP_IVLOAD 0x03 +#define OP_VSTORE 0x04 +#define OP_IVSTORE 0x05 +#define OP_VSET 0x06 +#define OP_IVSET 0x07 +#define OP_MESQ 0x08 +#define OP_GAMXR 0x09 +#define OP_GAMIR 0x0a +#define OP_GAMIRR 0x0b +#define OP_GAMER 0x0c +#define OP_GAMERR 0x0d +#define OP_BSTORE 0x0e +#define OP_VFLUSH 0x0f + + +/* Extended opcodes values (exopc field) */ + +/* GAMIR - AMOs with implicit operands */ +#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */ +#define EOP_IR_CLR 0x02 /* Fetch and clear */ +#define EOP_IR_INC 0x05 /* Fetch and increment */ +#define EOP_IR_DEC 0x07 /* Fetch and decrement */ +#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */ +#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */ + +/* GAMIRR - Registered AMOs with implicit operands */ +#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */ +#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */ +#define EOP_IRR_INC 0x05 /* Registered fetch and increment */ +#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */ +#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/ + +/* GAMER - AMOs with explicit operands */ +#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ER_OR 0x01 /* Logical OR with memory */ +#define EOP_ER_AND 0x02 /* Logical AND with memory */ +#define EOP_ER_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ER_ADD 0x04 /* Add value to memory */ +#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */ + +/* GAMERR - Registered AMOs with explicit operands */ +#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ERR_OR 0x01 /* Logical OR with memory */ +#define EOP_ERR_AND 0x02 /* Logical AND with memory */ +#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ERR_ADD 0x04 /* Add value to memory */ +#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ERR_EPOLL 0x09 /* Poll for equality */ +#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */ + +/* GAMXR - SGI Arithmetic unit */ +#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */ + + +/* Transfer types (xtype field) */ +#define XTYPE_B 0x0 /* byte */ +#define XTYPE_S 0x1 /* short (2-byte) */ +#define XTYPE_W 0x2 /* word (4-byte) */ +#define XTYPE_DW 0x3 /* doubleword (8-byte) */ +#define XTYPE_CL 0x6 /* cacheline (64-byte) */ + + +/* Instruction access attributes (iaa0, iaa1 fields) */ +#define IAA_RAM 0x0 /* normal cached RAM access */ +#define IAA_NCRAM 0x2 /* noncoherent RAM access */ +#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */ +#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */ + + +/* Instruction mode attributes (ima field) */ +#define IMA_MAPPED 0x0 /* Virtual mode */ +#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */ +#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */ +#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */ + +/* CBE ecause bits */ +#define CBE_CAUSE_RI (1 << 0) +#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1) +#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2) +#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3) +#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4) +#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5) +#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6) +#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7) +#define CBE_CAUSE_TLBHW_ERROR (1 << 8) +#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9) +#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10) +#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11) +#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12) +#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) +#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) +#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) +#define CBE_CAUSE_FORCED_ERROR (1 << 19) + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLB_INVAL_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 +#define CBR_EXS_CB_INT_PENDING_BIT 6 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) +#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT) + +/* + * Exceptions are retried for the following cases. If any OTHER bits are set + * in ecause, the exception is not retryable. + */ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \ + CBE_CAUSE_TLBHW_ERROR | \ + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_RA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_HA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_HA_RESPONSE_DATA_ERROR \ + ) + +/* Message queue head structure */ +union gru_mesqhead { + unsigned long val; + struct { + unsigned int head; + unsigned int limit; + }; +}; + + +/* Generate the low word of a GRU instruction */ +static inline unsigned long +__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype, + unsigned char iaa0, unsigned char iaa1, + unsigned long idef2, unsigned char ima) +{ + return (1 << GRU_CB_ICMD_SHFT) | + ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) | + (idef2<< GRU_IDEF2_SHFT) | + (iaa0 << GRU_CB_IAA0_SHFT) | + (iaa1 << GRU_CB_IAA1_SHFT) | + (ima << GRU_CB_IMA_SHFT) | + (xtype << GRU_CB_XTYPE_SHFT) | + (opcode << GRU_CB_OPC_SHFT) | + (exopc << GRU_CB_EXOPC_SHFT); +} + +/* + * Architecture specific intrinsics + */ +static inline void gru_flush_cache(void *p) +{ + __flush_cache(p); +} + +/* + * Store the lower 64 bits of the command including the "start" bit. Then + * start the instruction executing. + */ +static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64) +{ + gru_ordered_store_ulong(ins, op64); + mb(); + gru_flush_cache(ins); +} + + +/* Convert "hints" to IMA */ +#define CB_IMA(h) ((h) | IMA_UNMAPPED) + +/* Convert data segment cache line index into TRI0 / TRI1 value */ +#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES) + +/* Inline functions for GRU instructions. + * Note: + * - nelem and stride are in elements + * - tri0/tri1 is in bytes for the beginning of the data segment. + */ +static inline void gru_vload_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_ivload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_ivstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, + unsigned char xtype, unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_vset(void *cb, unsigned long mem_addr, + unsigned long value, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_ivset(void *cb, unsigned long mem_addr, + unsigned int tri1, unsigned long value, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_vflush(void *cb, unsigned long mem_addr, + unsigned long nelem, unsigned char xtype, unsigned long stride, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op1_stride = stride; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_nop(void *cb, int hints) +{ + struct gru_instruction *ins = (void *)cb; + + gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints))); +} + + +static inline void gru_bcopy(void *cb, const unsigned long src, + unsigned long dest, + unsigned int tri0, unsigned int xtype, unsigned long nelem, + unsigned int bufsize, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri1_bufsize = bufsize; + gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM, + IAA_RAM, tri0, CB_IMA(hints))); +} + +static inline void gru_bstore(void *cb, const unsigned long src, + unsigned long dest, unsigned int tri0, unsigned int xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM, + tri0, CB_IMA(hints))); +} + +static inline void gru_gamir(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamirr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamer(void *cb, int exopc, unsigned long src, + unsigned int xtype, + unsigned long operand1, unsigned long operand2, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamerr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long operand1, + unsigned long operand2, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamxr(void *cb, unsigned long src, + unsigned int tri0, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->nelem = 4; + gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, + IAA_RAM, 0, 0, CB_IMA(hints))); +} + +static inline void gru_mesq(void *cb, unsigned long queue, + unsigned long tri0, unsigned long nelem, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)queue; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline unsigned long gru_get_amo_value(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue; +} + +static inline int gru_get_amo_value_head(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue & 0xffffffff; +} + +static inline int gru_get_amo_value_limit(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue >> 32; +} + +static inline union gru_mesqhead gru_mesq_head(int head, int limit) +{ + union gru_mesqhead mqh; + + mqh.head = head; + mqh.limit = limit; + return mqh; +} + +/* + * Get struct control_block_extended_exc_detail for CB. + */ +extern int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet); + +#define GRU_EXC_STR_SIZE 256 + + +/* + * Control block definition for checking status + */ +struct gru_control_block_status { + unsigned int icmd :1; + unsigned int ima :3; + unsigned int reserved0 :4; + unsigned int unused1 :24; + unsigned int unused2 :24; + unsigned int istatus :2; + unsigned int isubstatus :4; + unsigned int unused3 :2; +}; + +/* Get CB status */ +static inline int gru_get_cb_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->istatus; +} + +/* Get CB message queue substatus */ +static inline int gru_get_cb_message_queue_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; +} + +/* Get CB substatus */ +static inline int gru_get_cb_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus; +} + +/* + * User interface to check an instruction status. UPM and exceptions + * are handled automatically. However, this function does NOT wait + * for an active instruction to complete. + * + */ +static inline int gru_check_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + int ret; + + ret = cbs->istatus; + if (ret != CBS_ACTIVE) + ret = gru_check_status_proc(cb); + return ret; +} + +/* + * User interface (via inline function) to wait for an instruction + * to complete. Completion status (IDLE or EXCEPTION is returned + * to the user. Exception due to hardware errors are automatically + * retried before returning an exception. + * + */ +static inline int gru_wait(void *cb) +{ + return gru_wait_proc(cb); +} + +/* + * Wait for CB to complete. Aborts program if error. (Note: error does NOT + * mean TLB mis - only fatal errors such as memory parity error or user + * bugs will cause termination. + */ +static inline void gru_wait_abort(void *cb) +{ + gru_wait_abort_proc(cb); +} + +/* + * Get a pointer to the start of a gseg + * p - Any valid pointer within the gseg + */ +static inline void *gru_get_gseg_pointer (void *p) +{ + return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1)); +} + +/* + * Get a pointer to a control block + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired CB + */ +static inline void *gru_get_cb_pointer(void *gseg, + int index) +{ + return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE; +} + +/* + * Get a pointer to a cacheline in the data segment portion of a GSeg + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired cache line + */ +static inline void *gru_get_data_pointer(void *gseg, int index) +{ + return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES; +} + +/* + * Convert a vaddr into the tri index within the GSEG + * vaddr - virtual address of within gseg + */ +static inline int gru_get_tri(void *vaddr) +{ + return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE; +} +#endif /* __GRU_INSTRUCTIONS_H__ */ diff --git a/kernel/drivers/misc/sgi-gru/grufault.c b/kernel/drivers/misc/sgi-gru/grufault.c new file mode 100644 index 000000000..f74fc0ca2 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grufault.c @@ -0,0 +1,903 @@ +/* + * SN Platform GRU Driver + * + * FAULT HANDLER FOR GRU DETECTED TLB MISSES + * + * This file contains code that handles TLB misses within the GRU. + * These misses are reported either via interrupts or user polling of + * the user CB. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/security.h> +#include <linux/prefetch.h> +#include <asm/pgtable.h> +#include "gru.h" +#include "grutables.h" +#include "grulib.h" +#include "gru_instructions.h" +#include <asm/uv/uv_hub.h> + +/* Return codes for vtop functions */ +#define VTOP_SUCCESS 0 +#define VTOP_INVALID -1 +#define VTOP_RETRY -2 + + +/* + * Test if a physical address is a valid GRU GSEG address + */ +static inline int is_gru_paddr(unsigned long paddr) +{ + return paddr >= gru_start_paddr && paddr < gru_end_paddr; +} + +/* + * Find the vma of a GRU segment. Caller must hold mmap_sem. + */ +struct vm_area_struct *gru_find_vma(unsigned long vaddr) +{ + struct vm_area_struct *vma; + + vma = find_vma(current->mm, vaddr); + if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops) + return vma; + return NULL; +} + +/* + * Find and lock the gts that contains the specified user vaddr. + * + * Returns: + * - *gts with the mmap_sem locked for read and the GTS locked. + * - NULL if vaddr invalid OR is not a valid GSEG vaddr. + */ + +static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = NULL; + + down_read(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (vma) + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (gts) + mutex_lock(>s->ts_ctxlock); + else + up_read(&mm->mmap_sem); + return gts; +} + +static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = ERR_PTR(-EINVAL); + + down_write(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (!vma) + goto err; + + gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); + if (IS_ERR(gts)) + goto err; + mutex_lock(>s->ts_ctxlock); + downgrade_write(&mm->mmap_sem); + return gts; + +err: + up_write(&mm->mmap_sem); + return gts; +} + +/* + * Unlock a GTS that was previously locked with gru_find_lock_gts(). + */ +static void gru_unlock_gts(struct gru_thread_state *gts) +{ + mutex_unlock(>s->ts_ctxlock); + up_read(¤t->mm->mmap_sem); +} + +/* + * Set a CB.istatus to active using a user virtual address. This must be done + * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY. + * If the line is evicted, the status may be lost. The in-cache update + * is necessary to prevent the user from seeing a stale cb.istatus that will + * change as soon as the TFH restart is complete. Races may cause an + * occasional failure to clear the cb.istatus, but that is ok. + */ +static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) +{ + if (cbk) { + cbk->istatus = CBS_ACTIVE; + } +} + +/* + * Read & clear a TFM + * + * The GRU has an array of fault maps. A map is private to a cpu + * Only one cpu will be accessing a cpu's fault map. + * + * This function scans the cpu-private fault map & clears all bits that + * are set. The function returns a bitmap that indicates the bits that + * were cleared. Note that sense the maps may be updated asynchronously by + * the GRU, atomic operations must be used to clear bits. + */ +static void get_clear_fault_map(struct gru_state *gru, + struct gru_tlb_fault_map *imap, + struct gru_tlb_fault_map *dmap) +{ + unsigned long i, k; + struct gru_tlb_fault_map *tfm; + + tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id()); + prefetchw(tfm); /* Helps on hardware, required for emulator */ + for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) { + k = tfm->fault_bits[i]; + if (k) + k = xchg(&tfm->fault_bits[i], 0UL); + imap->fault_bits[i] = k; + k = tfm->done_bits[i]; + if (k) + k = xchg(&tfm->done_bits[i], 0UL); + dmap->fault_bits[i] = k; + } + + /* + * Not functionally required but helps performance. (Required + * on emulator) + */ + gru_flush_cache(tfm); +} + +/* + * Atomic (interrupt context) & non-atomic (user context) functions to + * convert a vaddr into a physical address. The size of the page + * is returned in pageshift. + * returns: + * 0 - successful + * < 0 - error code + * 1 - (atomic only) try again in non-atomic context + */ +static int non_atomic_pte_lookup(struct vm_area_struct *vma, + unsigned long vaddr, int write, + unsigned long *paddr, int *pageshift) +{ + struct page *page; + +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else + *pageshift = PAGE_SHIFT; +#endif + if (get_user_pages + (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) + return -EFAULT; + *paddr = page_to_phys(page); + put_page(page); + return 0; +} + +/* + * atomic_pte_lookup + * + * Convert a user virtual address to a physical address + * Only supports Intel large pages (2MB only) on x86_64. + * ZZZ - hugepage support is incomplete + * + * NOTE: mmap_sem is already held on entry to this function. This + * guarantees existence of the page tables. + */ +static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, + int write, unsigned long *paddr, int *pageshift) +{ + pgd_t *pgdp; + pmd_t *pmdp; + pud_t *pudp; + pte_t pte; + + pgdp = pgd_offset(vma->vm_mm, vaddr); + if (unlikely(pgd_none(*pgdp))) + goto err; + + pudp = pud_offset(pgdp, vaddr); + if (unlikely(pud_none(*pudp))) + goto err; + + pmdp = pmd_offset(pudp, vaddr); + if (unlikely(pmd_none(*pmdp))) + goto err; +#ifdef CONFIG_X86_64 + if (unlikely(pmd_large(*pmdp))) + pte = *(pte_t *) pmdp; + else +#endif + pte = *pte_offset_kernel(pmdp, vaddr); + + if (unlikely(!pte_present(pte) || + (write && (!pte_write(pte) || !pte_dirty(pte))))) + return 1; + + *paddr = pte_pfn(pte) << PAGE_SHIFT; +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else + *pageshift = PAGE_SHIFT; +#endif + return 0; + +err: + return 1; +} + +static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, + int write, int atomic, unsigned long *gpa, int *pageshift) +{ + struct mm_struct *mm = gts->ts_mm; + struct vm_area_struct *vma; + unsigned long paddr; + int ret, ps; + + vma = find_vma(mm, vaddr); + if (!vma) + goto inval; + + /* + * Atomic lookup is faster & usually works even if called in non-atomic + * context. + */ + rmb(); /* Must/check ms_range_active before loading PTEs */ + ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps); + if (ret) { + if (atomic) + goto upm; + if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps)) + goto inval; + } + if (is_gru_paddr(paddr)) + goto inval; + paddr = paddr & ~((1UL << ps) - 1); + *gpa = uv_soc_phys_ram_to_gpa(paddr); + *pageshift = ps; + return VTOP_SUCCESS; + +inval: + return VTOP_INVALID; +upm: + return VTOP_RETRY; +} + + +/* + * Flush a CBE from cache. The CBE is clean in the cache. Dirty the + * CBE cacheline so that the line will be written back to home agent. + * Otherwise the line may be silently dropped. This has no impact + * except on performance. + */ +static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) +{ + if (unlikely(cbe)) { + cbe->cbrexecstatus = 0; /* make CL dirty */ + gru_flush_cache(cbe); + } +} + +/* + * Preload the TLB with entries that may be required. Currently, preloading + * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to + * the end of the bcopy tranfer, whichever is smaller. + */ +static void gru_preload_tlb(struct gru_state *gru, + struct gru_thread_state *gts, int atomic, + unsigned long fault_vaddr, int asid, int write, + unsigned char tlb_preload_count, + struct gru_tlb_fault_handle *tfh, + struct gru_control_block_extended *cbe) +{ + unsigned long vaddr = 0, gpa; + int ret, pageshift; + + if (cbe->opccpy != OP_BCOPY) + return; + + if (fault_vaddr == cbe->cbe_baddr0) + vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; + else if (fault_vaddr == cbe->cbe_baddr1) + vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; + + fault_vaddr &= PAGE_MASK; + vaddr &= PAGE_MASK; + vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); + + while (vaddr > fault_vaddr) { + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift))) + return; + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, + vaddr, asid, write, pageshift, gpa); + vaddr -= PAGE_SIZE; + STAT(tlb_preload_page); + } +} + +/* + * Drop a TLB entry into the GRU. The fault is described by info in an TFH. + * Input: + * cb Address of user CBR. Null if not running in user context + * Return: + * 0 = dropin, exception, or switch to UPM successful + * 1 = range invalidate active + * < 0 = error code + * + */ +static int gru_try_dropin(struct gru_state *gru, + struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + struct gru_instruction_bits *cbk) +{ + struct gru_control_block_extended *cbe = NULL; + unsigned char tlb_preload_count = gts->ts_tlb_preload_count; + int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; + unsigned long gpa = 0, vaddr = 0; + + /* + * NOTE: The GRU contains magic hardware that eliminates races between + * TLB invalidates and TLB dropins. If an invalidate occurs + * in the window between reading the TFH and the subsequent TLB dropin, + * the dropin is ignored. This eliminates the need for additional locks. + */ + + /* + * Prefetch the CBE if doing TLB preloading + */ + if (unlikely(tlb_preload_count)) { + cbe = gru_tfh_to_cbe(tfh); + prefetchw(cbe); + } + + /* + * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. + * Might be a hardware race OR a stupid user. Ignore FMM because FMM + * is a transient state. + */ + if (tfh->status != TFHSTATUS_EXCEPTION) { + gru_flush_cache(tfh); + sync_core(); + if (tfh->status != TFHSTATUS_EXCEPTION) + goto failnoexception; + STAT(tfh_stale_on_fault); + } + if (tfh->state == TFHSTATE_IDLE) + goto failidle; + if (tfh->state == TFHSTATE_MISS_FMM && cbk) + goto failfmm; + + write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; + vaddr = tfh->missvaddr; + asid = tfh->missasid; + indexway = tfh->indexway; + if (asid == 0) + goto failnoasid; + + rmb(); /* TFH must be cache resident before reading ms_range_active */ + + /* + * TFH is cache resident - at least briefly. Fail the dropin + * if a range invalidate is active. + */ + if (atomic_read(>s->ts_gms->ms_range_active)) + goto failactive; + + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret == VTOP_INVALID) + goto failinval; + if (ret == VTOP_RETRY) + goto failupm; + + if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { + gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); + if (atomic || !gru_update_cch(gts)) { + gts->ts_force_cch_reload = 1; + goto failupm; + } + } + + if (unlikely(cbe) && pageshift == PAGE_SHIFT) { + gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); + gru_flush_cache_cbe(cbe); + } + + gru_cb_set_istatus_active(cbk); + gts->ustats.tlbdropin++; + tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift)); + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," + " rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, + indexway, write, pageshift, gpa); + STAT(tlb_dropin); + return 0; + +failnoasid: + /* No asid (delayed unload). */ + STAT(tlb_dropin_fail_no_asid); + gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + if (!cbk) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + return -EAGAIN; + +failupm: + /* Atomic failure switch CBR to UPM */ + tfh_user_polling_mode(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_upm); + gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return 1; + +failfmm: + /* FMM state on UPM call */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_fmm); + gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failnoexception: + /* TFH status did not show exception pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_no_exception); + gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", + tfh, tfh->status, tfh->state); + return 0; + +failidle: + /* TFH state was idle - no miss pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_idle); + gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failinval: + /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ + tfh_exception(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_invalid); + gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return -EFAULT; + +failactive: + /* Range invalidate active. Switch to UPM iff atomic */ + if (!cbk) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_range_active); + gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", + tfh, vaddr); + return 1; +} + +/* + * Process an external interrupt from the GRU. This interrupt is + * caused by a TLB miss. + * Note that this is the interrupt handler that is registered with linux + * interrupt handlers. + */ +static irqreturn_t gru_intr(int chiplet, int blade) +{ + struct gru_state *gru; + struct gru_tlb_fault_map imap, dmap; + struct gru_thread_state *gts; + struct gru_tlb_fault_handle *tfh = NULL; + struct completion *cmp; + int cbrnum, ctxnum; + + STAT(intr); + + gru = &gru_base[blade]->bs_grus[chiplet]; + if (!gru) { + dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", + raw_smp_processor_id(), chiplet); + return IRQ_NONE; + } + get_clear_fault_map(gru, &imap, &dmap); + gru_dbg(grudev, + "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", + smp_processor_id(), chiplet, gru->gs_gid, + imap.fault_bits[0], imap.fault_bits[1], + dmap.fault_bits[0], dmap.fault_bits[1]); + + for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { + STAT(intr_cbr); + cmp = gru->gs_blade->bs_async_wq; + if (cmp) + complete(cmp); + gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", + gru->gs_gid, cbrnum, cmp ? cmp->done : -1); + } + + for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { + STAT(intr_tfh); + tfh = get_tfh_by_index(gru, cbrnum); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + + /* + * When hardware sets a bit in the faultmap, it implicitly + * locks the GRU context so that it cannot be unloaded. + * The gts cannot change until a TFH start/writestart command + * is issued. + */ + ctxnum = tfh->ctxnum; + gts = gru->gs_gts[ctxnum]; + + /* Spurious interrupts can cause this. Ignore. */ + if (!gts) { + STAT(intr_spurious); + continue; + } + + /* + * This is running in interrupt context. Trylock the mmap_sem. + * If it fails, retry the fault in user context. + */ + gts->ustats.fmm_tlbmiss++; + if (!gts->ts_force_cch_reload && + down_read_trylock(>s->ts_mm->mmap_sem)) { + gru_try_dropin(gru, gts, tfh, NULL); + up_read(>s->ts_mm->mmap_sem); + } else { + tfh_user_polling_mode(tfh); + STAT(intr_mm_lock_failed); + } + } + return IRQ_HANDLED; +} + +irqreturn_t gru0_intr(int irq, void *dev_id) +{ + return gru_intr(0, uv_numa_blade_id()); +} + +irqreturn_t gru1_intr(int irq, void *dev_id) +{ + return gru_intr(1, uv_numa_blade_id()); +} + +irqreturn_t gru_intr_mblade(int irq, void *dev_id) +{ + int blade; + + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_intr(0, blade); + gru_intr(1, blade); + } + return IRQ_HANDLED; +} + + +static int gru_user_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + void *cb) +{ + struct gru_mm_struct *gms = gts->ts_gms; + int ret; + + gts->ustats.upm_tlbmiss++; + while (1) { + wait_event(gms->ms_wait_queue, + atomic_read(&gms->ms_range_active) == 0); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); + if (ret <= 0) + return ret; + STAT(call_os_wait_queue); + } +} + +/* + * This interface is called as a result of a user detecting a "call OS" bit + * in a user CB. Normally means that a TLB fault has occurred. + * cb - user virtual address of the CB + */ +int gru_handle_user_call_os(unsigned long cb) +{ + struct gru_tlb_fault_handle *tfh; + struct gru_thread_state *gts; + void *cbk; + int ucbnum, cbrnum, ret = -EINVAL; + + STAT(call_os); + + /* sanity check the cb pointer */ + ucbnum = get_cb_number((void *)cb); + if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) + return -EINVAL; + + gts = gru_find_lock_gts(cb); + if (!gts) + return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); + + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) + goto exit; + + gru_check_context_placement(gts); + + /* + * CCH may contain stale data if ts_force_cch_reload is set. + */ + if (gts->ts_gru && gts->ts_force_cch_reload) { + gts->ts_force_cch_reload = 0; + gru_update_cch(gts); + } + + ret = -EAGAIN; + cbrnum = thread_cbr_number(gts, ucbnum); + if (gts->ts_gru) { + tfh = get_tfh_by_index(gts->ts_gru, cbrnum); + cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, + gts->ts_ctxnum, ucbnum); + ret = gru_user_dropin(gts, tfh, cbk); + } +exit: + gru_unlock_gts(gts); + return ret; +} + +/* + * Fetch the exception detail information for a CB that terminated with + * an exception. + */ +int gru_get_exception_detail(unsigned long arg) +{ + struct control_block_extended_exc_detail excdet; + struct gru_control_block_extended *cbe; + struct gru_thread_state *gts; + int ucbnum, cbrnum, ret; + + STAT(user_exception); + if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) + return -EFAULT; + + gts = gru_find_lock_gts(excdet.cb); + if (!gts) + return -EINVAL; + + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); + ucbnum = get_cb_number((void *)excdet.cb); + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { + ret = -EINVAL; + } else if (gts->ts_gru) { + cbrnum = thread_cbr_number(gts, ucbnum); + cbe = get_cbe_by_index(gts->ts_gru, cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); /* make sure we are have current data */ + excdet.opc = cbe->opccpy; + excdet.exopc = cbe->exopccpy; + excdet.ecause = cbe->ecause; + excdet.exceptdet0 = cbe->idef1upd; + excdet.exceptdet1 = cbe->idef3upd; + excdet.cbrstate = cbe->cbrstate; + excdet.cbrexecstatus = cbe->cbrexecstatus; + gru_flush_cache_cbe(cbe); + ret = 0; + } else { + ret = -EAGAIN; + } + gru_unlock_gts(gts); + + gru_dbg(grudev, + "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, " + "exdet0 0x%lx, exdet1 0x%x\n", + excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus, + excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); + if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) + ret = -EFAULT; + return ret; +} + +/* + * User request to unload a context. Content is saved for possible reload. + */ +static int gru_unload_all_contexts(void) +{ + struct gru_thread_state *gts; + struct gru_state *gru; + int gid, ctxnum; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + foreach_gid(gid) { + gru = GID_TO_GRU(gid); + spin_lock(&gru->gs_lock); + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + gts = gru->gs_gts[ctxnum]; + if (gts && mutex_trylock(>s->ts_ctxlock)) { + spin_unlock(&gru->gs_lock); + gru_unload_context(gts, 1); + mutex_unlock(>s->ts_ctxlock); + spin_lock(&gru->gs_lock); + } + } + spin_unlock(&gru->gs_lock); + } + return 0; +} + +int gru_user_unload_context(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_unload_context_req req; + + STAT(user_unload_context); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx\n", req.gseg); + + if (!req.gseg) + return gru_unload_all_contexts(); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) + gru_unload_context(gts, 1); + gru_unlock_gts(gts); + + return 0; +} + +/* + * User request to flush a range of virtual addresses from the GRU TLB + * (Mainly for testing). + */ +int gru_user_flush_tlb(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_flush_tlb_req req; + struct gru_mm_struct *gms; + + STAT(user_flush_tlb); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg, + req.vaddr, req.len); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + gms = gts->ts_gms; + gru_unlock_gts(gts); + gru_flush_tlb_range(gms, req.vaddr, req.len); + + return 0; +} + +/* + * Fetch GSEG statisticss + */ +long gru_get_gseg_statistics(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_get_gseg_statistics_req req; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* + * The library creates arrays of contexts for threaded programs. + * If no gts exists in the array, the context has never been used & all + * statistics are implicitly 0. + */ + gts = gru_find_lock_gts(req.gseg); + if (gts) { + memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); + gru_unlock_gts(gts); + } else { + memset(&req.stats, 0, sizeof(gts->ustats)); + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + + return 0; +} + +/* + * Register the current task as the user of the GSEG slice. + * Needed for TLB fault interrupt targeting. + */ +int gru_set_context_option(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_set_context_option_req req; + int ret = 0; + + STAT(set_context_option); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) { + gts = gru_alloc_locked_gts(req.gseg); + if (IS_ERR(gts)) + return PTR_ERR(gts); + } + + switch (req.op) { + case sco_blade_chiplet: + /* Select blade/chiplet for GRU context */ + if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB || + req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || + (req.val1 >= 0 && !gru_base[req.val1])) { + ret = -EINVAL; + } else { + gts->ts_user_blade_id = req.val1; + gts->ts_user_chiplet_id = req.val0; + gru_check_context_placement(gts); + } + break; + case sco_gseg_owner: + /* Register the current task as the GSEG owner */ + gts->ts_tgid_owner = current->tgid; + break; + case sco_cch_req_slice: + /* Set the CCH slice option */ + gts->ts_cch_req_slice = req.val1 & 3; + break; + default: + ret = -EINVAL; + } + gru_unlock_gts(gts); + + return ret; +} diff --git a/kernel/drivers/misc/sgi-gru/grufile.c b/kernel/drivers/misc/sgi-gru/grufile.c new file mode 100644 index 000000000..104a05f6b --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grufile.c @@ -0,0 +1,623 @@ +/* + * SN Platform GRU Driver + * + * FILE OPERATIONS & DRIVER INITIALIZATION + * + * This file supports the user system call for file open, close, mmap, etc. + * This also incudes the driver initialization code. + * + * Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/interrupt.h> +#include <linux/proc_fs.h> +#include <linux/uaccess.h> +#ifdef CONFIG_X86_64 +#include <asm/uv/uv_irq.h> +#endif +#include <asm/uv/uv.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#include <asm/uv/uv_hub.h> +#include <asm/uv/uv_mmrs.h> + +struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; +unsigned long gru_start_paddr __read_mostly; +void *gru_start_vaddr __read_mostly; +unsigned long gru_end_paddr __read_mostly; +unsigned int gru_max_gids __read_mostly; +struct gru_stats_s gru_stats; + +/* Guaranteed user available resources on each node */ +static int max_user_cbrs, max_user_dsr_bytes; + +static struct miscdevice gru_miscdev; + +static int gru_supported(void) +{ + return is_uv_system() && + (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE); +} + +/* + * gru_vma_close + * + * Called when unmapping a device mapping. Frees all gru resources + * and tables belonging to the vma. + */ +static void gru_vma_close(struct vm_area_struct *vma) +{ + struct gru_vma_data *vdata; + struct gru_thread_state *gts; + struct list_head *entry, *next; + + if (!vma->vm_private_data) + return; + + vdata = vma->vm_private_data; + vma->vm_private_data = NULL; + gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file, + vdata); + list_for_each_safe(entry, next, &vdata->vd_head) { + gts = + list_entry(entry, struct gru_thread_state, ts_next); + list_del(>s->ts_next); + mutex_lock(>s->ts_ctxlock); + if (gts->ts_gru) + gru_unload_context(gts, 0); + mutex_unlock(>s->ts_ctxlock); + gts_drop(gts); + } + kfree(vdata); + STAT(vdata_free); +} + +/* + * gru_file_mmap + * + * Called when mmapping the device. Initializes the vma with a fault handler + * and private data structure necessary to allocate, track, and free the + * underlying pages. + */ +static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) + return -EPERM; + + if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || + vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED | + VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_page_prot = PAGE_SHARED; + vma->vm_ops = &gru_vm_ops; + + vma->vm_private_data = gru_alloc_vma_data(vma, 0); + if (!vma->vm_private_data) + return -ENOMEM; + + gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n", + file, vma->vm_start, vma, vma->vm_private_data); + return 0; +} + +/* + * Create a new GRU context + */ +static int gru_create_new_context(unsigned long arg) +{ + struct gru_create_context_req req; + struct vm_area_struct *vma; + struct gru_vma_data *vdata; + int ret = -EINVAL; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + if (req.data_segment_bytes > max_user_dsr_bytes) + return -EINVAL; + if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count) + return -EINVAL; + + if (!(req.options & GRU_OPT_MISS_MASK)) + req.options |= GRU_OPT_MISS_FMM_INTR; + + down_write(¤t->mm->mmap_sem); + vma = gru_find_vma(req.gseg); + if (vma) { + vdata = vma->vm_private_data; + vdata->vd_user_options = req.options; + vdata->vd_dsr_au_count = + GRU_DS_BYTES_TO_AU(req.data_segment_bytes); + vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + vdata->vd_tlb_preload_count = req.tlb_preload_count; + ret = 0; + } + up_write(¤t->mm->mmap_sem); + + return ret; +} + +/* + * Get GRU configuration info (temp - for emulator testing) + */ +static long gru_get_config_info(unsigned long arg) +{ + struct gru_config_info info; + int nodesperblade; + + if (num_online_nodes() > 1 && + (uv_node_to_blade_id(1) == uv_node_to_blade_id(0))) + nodesperblade = 2; + else + nodesperblade = 1; + memset(&info, 0, sizeof(info)); + info.cpus = num_online_cpus(); + info.nodes = num_online_nodes(); + info.blades = info.nodes / nodesperblade; + info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; + + if (copy_to_user((void __user *)arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +/* + * gru_file_unlocked_ioctl + * + * Called to update file attributes via IOCTL calls. + */ +static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, + unsigned long arg) +{ + int err = -EBADRQC; + + gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg); + + switch (req) { + case GRU_CREATE_CONTEXT: + err = gru_create_new_context(arg); + break; + case GRU_SET_CONTEXT_OPTION: + err = gru_set_context_option(arg); + break; + case GRU_USER_GET_EXCEPTION_DETAIL: + err = gru_get_exception_detail(arg); + break; + case GRU_USER_UNLOAD_CONTEXT: + err = gru_user_unload_context(arg); + break; + case GRU_USER_FLUSH_TLB: + err = gru_user_flush_tlb(arg); + break; + case GRU_USER_CALL_OS: + err = gru_handle_user_call_os(arg); + break; + case GRU_GET_GSEG_STATISTICS: + err = gru_get_gseg_statistics(arg); + break; + case GRU_KTEST: + err = gru_ktest(arg); + break; + case GRU_GET_CONFIG_INFO: + err = gru_get_config_info(arg); + break; + case GRU_DUMP_CHIPLET_STATE: + err = gru_dump_chiplet_request(arg); + break; + } + return err; +} + +/* + * Called at init time to build tables for all GRUs that are present in the + * system. + */ +static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, + void *vaddr, int blade_id, int chiplet_id) +{ + spin_lock_init(&gru->gs_lock); + spin_lock_init(&gru->gs_asid_lock); + gru->gs_gru_base_paddr = paddr; + gru->gs_gru_base_vaddr = vaddr; + gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; + gru->gs_blade = gru_base[blade_id]; + gru->gs_blade_id = blade_id; + gru->gs_chiplet_id = chiplet_id; + gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; + gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; + gru->gs_asid_limit = MAX_ASID; + gru_tgh_flush_init(gru); + if (gru->gs_gid >= gru_max_gids) + gru_max_gids = gru->gs_gid + 1; + gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n", + blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, + gru->gs_gru_base_paddr); +} + +static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) +{ + int pnode, nid, bid, chip; + int cbrs, dsrbytes, n; + int order = get_order(sizeof(struct gru_blade_state)); + struct page *page; + struct gru_state *gru; + unsigned long paddr; + void *vaddr; + + max_user_cbrs = GRU_NUM_CB; + max_user_dsr_bytes = GRU_NUM_DSR_BYTES; + for_each_possible_blade(bid) { + pnode = uv_blade_to_pnode(bid); + nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ + page = alloc_pages_node(nid, GFP_KERNEL, order); + if (!page) + goto fail; + gru_base[bid] = page_address(page); + memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); + gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; + spin_lock_init(&gru_base[bid]->bs_lock); + init_rwsem(&gru_base[bid]->bs_kgts_sema); + + dsrbytes = 0; + cbrs = 0; + for (gru = gru_base[bid]->bs_grus, chip = 0; + chip < GRU_CHIPLETS_PER_BLADE; + chip++, gru++) { + paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); + vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); + gru_init_chiplet(gru, paddr, vaddr, bid, chip); + n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + cbrs = max(cbrs, n); + n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + dsrbytes = max(dsrbytes, n); + } + max_user_cbrs = min(max_user_cbrs, cbrs); + max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes); + } + + return 0; + +fail: + for (bid--; bid >= 0; bid--) + free_pages((unsigned long)gru_base[bid], order); + return -ENOMEM; +} + +static void gru_free_tables(void) +{ + int bid; + int order = get_order(sizeof(struct gru_state) * + GRU_CHIPLETS_PER_BLADE); + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); +} + +static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) +{ + unsigned long mmr = 0; + int core; + + /* + * We target the cores of a blade and not the hyperthreads themselves. + * There is a max of 8 cores per socket and 2 sockets per blade, + * making for a max total of 16 cores (i.e., 16 CPUs without + * hyperthreading and 32 CPUs with hyperthreading). + */ + core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) + return 0; + + if (chiplet == 0) { + mmr = UVH_GR0_TLB_INT0_CONFIG + + core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); + } else if (chiplet == 1) { + mmr = UVH_GR1_TLB_INT0_CONFIG + + core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); + } else { + BUG(); + } + + *corep = core; + return mmr; +} + +#ifdef CONFIG_IA64 + +static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; + +static void gru_noop(struct irq_data *d) +{ +} + +static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { + [0 ... GRU_CHIPLETS_PER_BLADE - 1] { + .irq_mask = gru_noop, + .irq_unmask = gru_noop, + .irq_ack = gru_noop + } +}; + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq = IRQ_GRU + chiplet; + int ret, core; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + if (gru_irq_count[chiplet] == 0) { + gru_chip[chiplet].name = irq_name; + ret = irq_set_chip(irq, &gru_chip[chiplet]); + if (ret) { + printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + } + gru_irq_count[chiplet]++; + + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + unsigned long mmr; + int core, irq = IRQ_GRU + chiplet; + + if (gru_irq_count[chiplet] == 0) + return; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return; + + if (--gru_irq_count[chiplet] == 0) + free_irq(irq, NULL); +} + +#elif defined CONFIG_X86_64 + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq, core; + int ret; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); + if (irq < 0) { + printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -irq); + return irq; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + uv_teardown_irq(irq); + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + int irq, core; + unsigned long mmr; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr) { + irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; + if (irq) { + free_irq(irq, NULL); + uv_teardown_irq(irq); + } + } +} + +#endif + +static void gru_teardown_tlb_irqs(void) +{ + int blade; + int cpu; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + gru_chiplet_teardown_tlb_irq(0, cpu, blade); + gru_chiplet_teardown_tlb_irq(1, cpu, blade); + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_chiplet_teardown_tlb_irq(0, 0, blade); + gru_chiplet_teardown_tlb_irq(1, 0, blade); + } +} + +static int gru_setup_tlb_irqs(void) +{ + int blade; + int cpu; + int ret; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade); + if (ret != 0) + goto exit1; + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + } + + return 0; + +exit1: + gru_teardown_tlb_irqs(); + return ret; +} + +/* + * gru_init + * + * Called at boot or module load time to initialize the GRUs. + */ +static int __init gru_init(void) +{ + int ret; + + if (!gru_supported()) + return 0; + +#if defined CONFIG_IA64 + gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ +#else + gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) & + 0x7fffffffffffUL; +#endif + gru_start_vaddr = __va(gru_start_paddr); + gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; + printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", + gru_start_paddr, gru_end_paddr); + ret = misc_register(&gru_miscdev); + if (ret) { + printk(KERN_ERR "%s: misc_register failed\n", + GRU_DRIVER_ID_STR); + goto exit0; + } + + ret = gru_proc_init(); + if (ret) { + printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); + goto exit1; + } + + ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); + if (ret) { + printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); + goto exit2; + } + + ret = gru_setup_tlb_irqs(); + if (ret != 0) + goto exit3; + + gru_kservices_init(); + + printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, + GRU_DRIVER_VERSION_STR); + return 0; + +exit3: + gru_free_tables(); +exit2: + gru_proc_exit(); +exit1: + misc_deregister(&gru_miscdev); +exit0: + return ret; + +} + +static void __exit gru_exit(void) +{ + if (!gru_supported()) + return; + + gru_teardown_tlb_irqs(); + gru_kservices_exit(); + gru_free_tables(); + misc_deregister(&gru_miscdev); + gru_proc_exit(); +} + +static const struct file_operations gru_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = gru_file_unlocked_ioctl, + .mmap = gru_file_mmap, + .llseek = noop_llseek, +}; + +static struct miscdevice gru_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "gru", + .fops = &gru_fops, +}; + +const struct vm_operations_struct gru_vm_ops = { + .close = gru_vma_close, + .fault = gru_fault, +}; + +#ifndef MODULE +fs_initcall(gru_init); +#else +module_init(gru_init); +#endif +module_exit(gru_exit); + +module_param(gru_options, ulong, 0644); +MODULE_PARM_DESC(gru_options, "Various debug options"); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR); +MODULE_VERSION(GRU_DRIVER_VERSION_STR); + diff --git a/kernel/drivers/misc/sgi-gru/gruhandles.c b/kernel/drivers/misc/sgi-gru/gruhandles.c new file mode 100644 index 000000000..2f30badc6 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/gruhandles.c @@ -0,0 +1,216 @@ +/* + * GRU KERNEL MCS INSTRUCTIONS + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +/* 10 sec */ +#ifdef CONFIG_IA64 +#include <asm/processor.h> +#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) +#else +#include <asm/tsc.h> +#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) +#endif + +/* Extract the status field from a kernel handle */ +#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) + +struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +static void update_mcs_stats(enum mcs_op op, unsigned long clks) +{ + unsigned long nsec; + + nsec = CLKS2NSEC(clks); + atomic_long_inc(&mcs_op_statistics[op].count); + atomic_long_add(nsec, &mcs_op_statistics[op].total); + if (mcs_op_statistics[op].max < nsec) + mcs_op_statistics[op].max = nsec; +} + +static void start_instruction(void *h) +{ + unsigned long *w0 = h; + + wmb(); /* setting CMD/STATUS bits must be last */ + *w0 = *w0 | 0x20001; + gru_flush_cache(h); +} + +static void report_instruction_timeout(void *h) +{ + unsigned long goff = GSEGPOFF((unsigned long)h); + char *id = "???"; + + if (TYPE_IS(CCH, goff)) + id = "CCH"; + else if (TYPE_IS(TGH, goff)) + id = "TGH"; + else if (TYPE_IS(TFH, goff)) + id = "TFH"; + + panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id); +} + +static int wait_instruction_complete(void *h, enum mcs_op opc) +{ + int status; + unsigned long start_time = get_cycles(); + + while (1) { + cpu_relax(); + status = GET_MSEG_HANDLE_STATUS(h); + if (status != CCHSTATUS_ACTIVE) + break; + if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) { + report_instruction_timeout(h); + start_time = get_cycles(); + } + } + if (gru_options & OPT_STATS) + update_mcs_stats(opc, get_cycles() - start_time); + return status; +} + +int cch_allocate(struct gru_context_configuration_handle *cch) +{ + int ret; + + cch->opc = CCHOP_ALLOCATE; + start_instruction(cch); + ret = wait_instruction_complete(cch, cchop_allocate); + + /* + * Stop speculation into the GSEG being mapped by the previous ALLOCATE. + * The GSEG memory does not exist until the ALLOCATE completes. + */ + sync_core(); + return ret; +} + +int cch_start(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_START; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_start); +} + +int cch_interrupt(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_INTERRUPT; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt); +} + +int cch_deallocate(struct gru_context_configuration_handle *cch) +{ + int ret; + + cch->opc = CCHOP_DEALLOCATE; + start_instruction(cch); + ret = wait_instruction_complete(cch, cchop_deallocate); + + /* + * Stop speculation into the GSEG being unmapped by the previous + * DEALLOCATE. + */ + sync_core(); + return ret; +} + +int cch_interrupt_sync(struct gru_context_configuration_handle + *cch) +{ + cch->opc = CCHOP_INTERRUPT_SYNC; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt_sync); +} + +int tgh_invalidate(struct gru_tlb_global_handle *tgh, + unsigned long vaddr, unsigned long vaddrmask, + int asid, int pagesize, int global, int n, + unsigned short ctxbitmap) +{ + tgh->vaddr = vaddr; + tgh->asid = asid; + tgh->pagesize = pagesize; + tgh->n = n; + tgh->global = global; + tgh->vaddrmask = vaddrmask; + tgh->ctxbitmap = ctxbitmap; + tgh->opc = TGHOP_TLBINV; + start_instruction(tgh); + return wait_instruction_complete(tgh, tghop_invalidate); +} + +int tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_ONLY; + start_instruction(tfh); + return wait_instruction_complete(tfh, tfhop_write_only); +} + +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_RESTART; + start_instruction(tfh); +} + +void tfh_restart(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_RESTART; + start_instruction(tfh); +} + +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_USER_POLLING_MODE; + start_instruction(tfh); +} + +void tfh_exception(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_EXCEPTION; + start_instruction(tfh); +} + diff --git a/kernel/drivers/misc/sgi-gru/gruhandles.h b/kernel/drivers/misc/sgi-gru/gruhandles.h new file mode 100644 index 000000000..3f998b924 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/gruhandles.h @@ -0,0 +1,531 @@ +/* + * SN Platform GRU Driver + * + * GRU HANDLE DEFINITION + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUHANDLES_H__ +#define __GRUHANDLES_H__ +#include "gru_instructions.h" + +/* + * Manifest constants for GRU Memory Map + */ +#define GRU_GSEG0_BASE 0 +#define GRU_MCS_BASE (64 * 1024 * 1024) +#define GRU_SIZE (128UL * 1024 * 1024) + +/* Handle & resource counts */ +#define GRU_NUM_CB 128 +#define GRU_NUM_DSR_BYTES (32 * 1024) +#define GRU_NUM_TFM 16 +#define GRU_NUM_TGH 24 +#define GRU_NUM_CBE 128 +#define GRU_NUM_TFH 128 +#define GRU_NUM_CCH 16 + +/* Maximum resource counts that can be reserved by user programs */ +#define GRU_NUM_USER_CBR GRU_NUM_CBE +#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES + +/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles + * are the same */ +#define GRU_HANDLE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 + +/* Base addresses of handles */ +#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000) +#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000) +#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) +#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) +#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) + +/* User gseg constants */ +#define GRU_GSEG_STRIDE (4 * 1024 * 1024) +#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1)) + +/* Data segment constants */ +#define GRU_DSR_AU_BYTES 1024 +#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES) + +/* Control block constants */ +#define GRU_CBR_AU_SIZE 2 +#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE) + +/* Convert resource counts to the number of AU */ +#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES) +#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE) + +/* UV limits */ +#define GRU_CHIPLETS_PER_HUB 2 +#define GRU_HUBS_PER_BLADE 1 +#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB) + +/* User GRU Gseg offsets */ +#define GRU_CB_BASE 0 +#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE) +#define GRU_DS_BASE 0x20000 +#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES) + +/* Convert a GRU physical address to the chiplet offset */ +#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) + +/* Convert an arbitrary handle address to the beginning of the GRU segment */ +#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) + +/* Test a valid handle address to determine the type */ +#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \ + GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \ + (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) + + +/* General addressing macros. */ +static inline void *get_gseg_base_address(void *base, int ctxnum) +{ + return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum); +} + +static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + + GRU_CB_BASE + GRU_HANDLE_STRIDE * line); +} + +static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE + + GRU_CACHE_LINE_BYTES * line); +} + +static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum) +{ + return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum) +{ + return (struct gru_control_block_extended *)(base + GRU_CBE_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_context_configuration_handle *get_cch(void *base, + int ctxnum) +{ + return (struct gru_context_configuration_handle *)(base + + GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline unsigned long get_cb_number(void *cb) +{ + return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) / + GRU_HANDLE_STRIDE; +} + +/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/ +static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode, + int chiplet) +{ + return paddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) +{ + return vaddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline struct gru_control_block_extended *gru_tfh_to_cbe( + struct gru_tlb_fault_handle *tfh) +{ + unsigned long cbe; + + cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE; + return (struct gru_control_block_extended*)cbe; +} + + + + +/* + * Global TLB Fault Map + * Bitmap of outstanding TLB misses needing interrupt/polling service. + * + */ +struct gru_tlb_fault_map { + unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill0[2]; + unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill1[2]; +}; + +/* + * TGH - TLB Global Handle + * Used for TLB flushing. + * + */ +struct gru_tlb_global_handle { + unsigned int cmd:1; /* DW 0 */ + unsigned int delresp:1; + unsigned int opc:1; + unsigned int fill1:5; + + unsigned int fill2:8; + + unsigned int status:2; + unsigned long fill3:2; + unsigned int state:3; + unsigned long fill4:1; + + unsigned int cause:3; + unsigned long fill5:37; + + unsigned long vaddr:64; /* DW 1 */ + + unsigned int asid:24; /* DW 2 */ + unsigned int fill6:8; + + unsigned int pagesize:5; + unsigned int fill7:11; + + unsigned int global:1; + unsigned int fill8:15; + + unsigned long vaddrmask:39; /* DW 3 */ + unsigned int fill9:9; + unsigned int n:10; + unsigned int fill10:6; + + unsigned int ctxbitmap:16; /* DW4 */ + unsigned long fill11[3]; +}; + +enum gru_tgh_cmd { + TGHCMD_START +}; + +enum gru_tgh_opc { + TGHOP_TLBNOP, + TGHOP_TLBINV +}; + +enum gru_tgh_status { + TGHSTATUS_IDLE, + TGHSTATUS_EXCEPTION, + TGHSTATUS_ACTIVE +}; + +enum gru_tgh_state { + TGHSTATE_IDLE, + TGHSTATE_PE_INVAL, + TGHSTATE_INTERRUPT_INVAL, + TGHSTATE_WAITDONE, + TGHSTATE_RESTART_CTX, +}; + +enum gru_tgh_cause { + TGHCAUSE_RR_ECC, + TGHCAUSE_TLB_ECC, + TGHCAUSE_LRU_ECC, + TGHCAUSE_PS_ECC, + TGHCAUSE_MUL_ERR, + TGHCAUSE_DATA_ERR, + TGHCAUSE_SW_FORCE +}; + + +/* + * TFH - TLB Global Handle + * Used for TLB dropins into the GRU TLB. + * + */ +struct gru_tlb_fault_handle { + unsigned int cmd:1; /* DW 0 - low 32*/ + unsigned int delresp:1; + unsigned int fill0:2; + unsigned int opc:3; + unsigned int fill1:9; + + unsigned int status:2; + unsigned int fill2:2; + unsigned int state:3; + unsigned int fill3:1; + + unsigned int cause:6; + unsigned int cb_int:1; + unsigned int fill4:1; + + unsigned int indexway:12; /* DW 0 - high 32 */ + unsigned int fill5:4; + + unsigned int ctxnum:4; + unsigned int fill6:12; + + unsigned long missvaddr:64; /* DW 1 */ + + unsigned int missasid:24; /* DW 2 */ + unsigned int fill7:8; + unsigned int fillasid:24; + unsigned int dirty:1; + unsigned int gaa:2; + unsigned long fill8:5; + + unsigned long pfn:41; /* DW 3 */ + unsigned int fill9:7; + unsigned int pagesize:5; + unsigned int fill10:11; + + unsigned long fillvaddr:64; /* DW 4 */ + + unsigned long fill11[3]; +}; + +enum gru_tfh_opc { + TFHOP_NOOP, + TFHOP_RESTART, + TFHOP_WRITE_ONLY, + TFHOP_WRITE_RESTART, + TFHOP_EXCEPTION, + TFHOP_USER_POLLING_MODE = 7, +}; + +enum tfh_status { + TFHSTATUS_IDLE, + TFHSTATUS_EXCEPTION, + TFHSTATUS_ACTIVE, +}; + +enum tfh_state { + TFHSTATE_INACTIVE, + TFHSTATE_IDLE, + TFHSTATE_MISS_UPM, + TFHSTATE_MISS_FMM, + TFHSTATE_HW_ERR, + TFHSTATE_WRITE_TLB, + TFHSTATE_RESTART_CBR, +}; + +/* TFH cause bits */ +enum tfh_cause { + TFHCAUSE_NONE, + TFHCAUSE_TLB_MISS, + TFHCAUSE_TLB_MOD, + TFHCAUSE_HW_ERROR_RR, + TFHCAUSE_HW_ERROR_MAIN_ARRAY, + TFHCAUSE_HW_ERROR_VALID, + TFHCAUSE_HW_ERROR_PAGESIZE, + TFHCAUSE_INSTRUCTION_EXCEPTION, + TFHCAUSE_UNCORRECTIBLE_ERROR, +}; + +/* GAA values */ +#define GAA_RAM 0x0 +#define GAA_NCRAM 0x2 +#define GAA_MMIO 0x1 +#define GAA_REGISTER 0x3 + +/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */ +#define GRU_PADDR_SHIFT 12 + +/* + * Context Configuration handle + * Used to allocate resources to a GSEG context. + * + */ +struct gru_context_configuration_handle { + unsigned int cmd:1; /* DW0 */ + unsigned int delresp:1; + unsigned int opc:3; + unsigned int unmap_enable:1; + unsigned int req_slice_set_enable:1; + unsigned int req_slice:2; + unsigned int cb_int_enable:1; + unsigned int tlb_int_enable:1; + unsigned int tfm_fault_bit_enable:1; + unsigned int tlb_int_select:4; + + unsigned int status:2; + unsigned int state:2; + unsigned int reserved2:4; + + unsigned int cause:4; + unsigned int tfm_done_bit_enable:1; + unsigned int unused:3; + + unsigned int dsr_allocation_map; + + unsigned long cbr_allocation_map; /* DW1 */ + + unsigned int asid[8]; /* DW 2 - 5 */ + unsigned short sizeavail[8]; /* DW 6 - 7 */ +} __attribute__ ((packed)); + +enum gru_cch_opc { + CCHOP_START = 1, + CCHOP_ALLOCATE, + CCHOP_INTERRUPT, + CCHOP_DEALLOCATE, + CCHOP_INTERRUPT_SYNC, +}; + +enum gru_cch_status { + CCHSTATUS_IDLE, + CCHSTATUS_EXCEPTION, + CCHSTATUS_ACTIVE, +}; + +enum gru_cch_state { + CCHSTATE_INACTIVE, + CCHSTATE_MAPPED, + CCHSTATE_ACTIVE, + CCHSTATE_INTERRUPTED, +}; + +/* CCH Exception cause */ +enum gru_cch_cause { + CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1, + CCHCAUSE_ILLEGAL_OPCODE = 2, + CCHCAUSE_INVALID_START_REQUEST = 3, + CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4, + CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5, + CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6, + CCHCAUSE_CCH_BUSY = 7, + CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8, + CCHCAUSE_BAD_TFM_CONFIG = 9, + CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10, + CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11, + CCHCAUSE_CBR_DEALLOCATION_ERROR = 12, +}; +/* + * CBE - Control Block Extended + * Maintains internal GRU state for active CBs. + * + */ +struct gru_control_block_extended { + unsigned int reserved0:1; /* DW 0 - low */ + unsigned int imacpy:3; + unsigned int reserved1:4; + unsigned int xtypecpy:3; + unsigned int iaa0cpy:2; + unsigned int iaa1cpy:2; + unsigned int reserved2:1; + unsigned int opccpy:8; + unsigned int exopccpy:8; + + unsigned int idef2cpy:22; /* DW 0 - high */ + unsigned int reserved3:10; + + unsigned int idef4cpy:22; /* DW 1 */ + unsigned int reserved4:10; + unsigned int idef4upd:22; + unsigned int reserved5:10; + + unsigned long idef1upd:64; /* DW 2 */ + + unsigned long idef5cpy:64; /* DW 3 */ + + unsigned long idef6cpy:64; /* DW 4 */ + + unsigned long idef3upd:64; /* DW 5 */ + + unsigned long idef5upd:64; /* DW 6 */ + + unsigned int idef2upd:22; /* DW 7 */ + unsigned int reserved6:10; + + unsigned int ecause:20; + unsigned int cbrstate:4; + unsigned int cbrexecstatus:8; +}; + +/* CBE fields for active BCOPY instructions */ +#define cbe_baddr0 idef1upd +#define cbe_baddr1 idef3upd +#define cbe_src_cl idef6cpy +#define cbe_nelemcur idef5upd + +enum gru_cbr_state { + CBRSTATE_INACTIVE, + CBRSTATE_IDLE, + CBRSTATE_PE_CHECK, + CBRSTATE_QUEUED, + CBRSTATE_WAIT_RESPONSE, + CBRSTATE_INTERRUPTED, + CBRSTATE_INTERRUPTED_MISS_FMM, + CBRSTATE_BUSY_INTERRUPT_MISS_FMM, + CBRSTATE_INTERRUPTED_MISS_UPM, + CBRSTATE_BUSY_INTERRUPTED_MISS_UPM, + CBRSTATE_REQUEST_ISSUE, + CBRSTATE_BUSY_INTERRUPT, +}; + +/* CBE cbrexecstatus bits - defined in gru_instructions.h*/ +/* CBE ecause bits - defined in gru_instructions.h */ + +/* + * Convert a processor pagesize into the strange encoded pagesize used by the + * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT) + * pagesize log pagesize grupagesize + * 4k 12 0 + * 16k 14 1 + * 64k 16 2 + * 256k 18 3 + * 1m 20 4 + * 2m 21 5 + * 4m 22 6 + * 16m 24 7 + * 64m 26 8 + * ... + */ +#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6) +#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) + +/* minimum TLB purge count to ensure a full purge */ +#define GRUMAXINVAL 1024UL + +int cch_allocate(struct gru_context_configuration_handle *cch); +int cch_start(struct gru_context_configuration_handle *cch); +int cch_interrupt(struct gru_context_configuration_handle *cch); +int cch_deallocate(struct gru_context_configuration_handle *cch); +int cch_interrupt_sync(struct gru_context_configuration_handle *cch); +int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, + unsigned long vaddrmask, int asid, int pagesize, int global, int n, + unsigned short ctxbitmap); +int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_restart(struct gru_tlb_fault_handle *tfh); +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); +void tfh_exception(struct gru_tlb_fault_handle *tfh); + +#endif /* __GRUHANDLES_H__ */ diff --git a/kernel/drivers/misc/sgi-gru/grukdump.c b/kernel/drivers/misc/sgi-gru/grukdump.c new file mode 100644 index 000000000..a3700a56b --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grukdump.c @@ -0,0 +1,234 @@ +/* + * SN Platform GRU Driver + * + * Dump GRU State + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" +#include "grulib.h" + +#define CCH_LOCK_ATTEMPTS 10 + +static int gru_user_copy_handle(void __user **dp, void *s) +{ + if (copy_to_user(*dp, s, GRU_HANDLE_BYTES)) + return -1; + *dp += GRU_HANDLE_BYTES; + return 0; +} + +static int gru_dump_context_data(void *grubase, + struct gru_context_configuration_handle *cch, + void __user *ubuf, int ctxnum, int dsrcnt, + int flush_cbrs) +{ + void *cb, *cbe, *tfh, *gseg; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + tfh = grubase + GRU_TFH_BASE; + + for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { + if (flush_cbrs) + gru_flush_cache(cb); + if (gru_user_copy_handle(&ubuf, cb)) + goto fail; + if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) + goto fail; + if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE)) + goto fail; + cb += GRU_HANDLE_STRIDE; + } + if (dsrcnt) + memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE); + return 0; + +fail: + return -EFAULT; +} + +static int gru_dump_tfm(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_fault_map *tfm; + int i, ret, bytes; + + bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TFM; i++) { + tfm = get_tfm(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tfm)) + goto fail; + } + return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_tgh(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_global_handle *tgh; + int i, ret, bytes; + + bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TGH; i++) { + tgh = get_tgh(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tgh)) + goto fail; + } + return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_context(struct gru_state *gru, int ctxnum, + void __user *ubuf, void __user *ubufend, char data_opt, + char lock_cch, char flush_cbrs) +{ + struct gru_dump_context_header hdr; + struct gru_dump_context_header __user *uhdr = ubuf; + struct gru_context_configuration_handle *cch, *ubufcch; + struct gru_thread_state *gts; + int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0; + void *grubase; + + memset(&hdr, 0, sizeof(hdr)); + grubase = gru->gs_gru_base_vaddr; + cch = get_cch(grubase, ctxnum); + for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) { + cch_locked = trylock_cch_handle(cch); + if (cch_locked) + break; + msleep(1); + } + + ubuf += sizeof(hdr); + ubufcch = ubuf; + if (gru_user_copy_handle(&ubuf, cch)) { + if (cch_locked) + unlock_cch_handle(cch); + return -EFAULT; + } + if (cch_locked) + ubufcch->delresp = 0; + bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES; + + if (cch_locked || !lock_cch) { + gts = gru->gs_gts[ctxnum]; + if (gts && gts->ts_vma) { + hdr.pid = gts->ts_tgid_owner; + hdr.vaddr = gts->ts_vma->vm_start; + } + if (cch->state != CCHSTATE_INACTIVE) { + cbrcnt = hweight64(cch->cbr_allocation_map) * + GRU_CBR_AU_SIZE; + dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) * + GRU_DSR_AU_CL : 0; + } + bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + else + ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, + dsrcnt, flush_cbrs); + } + if (cch_locked) + unlock_cch_handle(cch); + if (ret) + return ret; + + hdr.magic = GRU_DUMP_MAGIC; + hdr.gid = gru->gs_gid; + hdr.ctxnum = ctxnum; + hdr.cbrcnt = cbrcnt; + hdr.dsrcnt = dsrcnt; + hdr.cch_locked = cch_locked; + if (copy_to_user(uhdr, &hdr, sizeof(hdr))) + return -EFAULT; + + return bytes; +} + +int gru_dump_chiplet_request(unsigned long arg) +{ + struct gru_state *gru; + struct gru_dump_chiplet_state_req req; + void __user *ubuf; + void __user *ubufend; + int ctxnum, ret, cnt = 0; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* Currently, only dump by gid is implemented */ + if (req.gid >= gru_max_gids || req.gid < 0) + return -EINVAL; + + gru = GID_TO_GRU(req.gid); + ubuf = req.buf; + ubufend = req.buf + req.buflen; + + ret = gru_dump_tfm(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + ret = gru_dump_tgh(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (req.ctxnum == ctxnum || req.ctxnum < 0) { + ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, + req.data_opt, req.lock_cch, + req.flush_cbrs); + if (ret < 0) + goto fail; + ubuf += ret; + cnt++; + } + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + return cnt; + +fail: + return ret; +} diff --git a/kernel/drivers/misc/sgi-gru/grukservices.c b/kernel/drivers/misc/sgi-gru/grukservices.c new file mode 100644 index 000000000..913de07e5 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grukservices.c @@ -0,0 +1,1162 @@ +/* + * SN Platform GRU Driver + * + * KERNEL SERVICES THAT USE THE GRU + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/proc_fs.h> +#include <linux/interrupt.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <asm/io_apic.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#include "grukservices.h" +#include "gru_instructions.h" +#include <asm/uv/uv_hub.h> + +/* + * Kernel GRU Usage + * + * The following is an interim algorithm for management of kernel GRU + * resources. This will likely be replaced when we better understand the + * kernel/user requirements. + * + * Blade percpu resources reserved for kernel use. These resources are + * reserved whenever the the kernel context for the blade is loaded. Note + * that the kernel context is not guaranteed to be always available. It is + * loaded on demand & can be stolen by a user if the user demand exceeds the + * kernel demand. The kernel can always reload the kernel context but + * a SLEEP may be required!!!. + * + * Async Overview: + * + * Each blade has one "kernel context" that owns GRU kernel resources + * located on the blade. Kernel drivers use GRU resources in this context + * for sending messages, zeroing memory, etc. + * + * The kernel context is dynamically loaded on demand. If it is not in + * use by the kernel, the kernel context can be unloaded & given to a user. + * The kernel context will be reloaded when needed. This may require that + * a context be stolen from a user. + * NOTE: frequent unloading/reloading of the kernel context is + * expensive. We are depending on batch schedulers, cpusets, sane + * drivers or some other mechanism to prevent the need for frequent + * stealing/reloading. + * + * The kernel context consists of two parts: + * - 1 CB & a few DSRs that are reserved for each cpu on the blade. + * Each cpu has it's own private resources & does not share them + * with other cpus. These resources are used serially, ie, + * locked, used & unlocked on each call to a function in + * grukservices. + * (Now that we have dynamic loading of kernel contexts, I + * may rethink this & allow sharing between cpus....) + * + * - Additional resources can be reserved long term & used directly + * by UV drivers located in the kernel. Drivers using these GRU + * resources can use asynchronous GRU instructions that send + * interrupts on completion. + * - these resources must be explicitly locked/unlocked + * - locked resources prevent (obviously) the kernel + * context from being unloaded. + * - drivers using these resource directly issue their own + * GRU instruction and must wait/check completion. + * + * When these resources are reserved, the caller can optionally + * associate a wait_queue with the resources and use asynchronous + * GRU instructions. When an async GRU instruction completes, the + * driver will do a wakeup on the event. + * + */ + + +#define ASYNC_HAN_TO_BID(h) ((h) - 1) +#define ASYNC_BID_TO_HAN(b) ((b) + 1) +#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] + +#define GRU_NUM_KERNEL_CBR 1 +#define GRU_NUM_KERNEL_DSR_BYTES 256 +#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ + GRU_CACHE_LINE_BYTES) + +/* GRU instruction attributes for all instructions */ +#define IMA IMA_CB_DELAY + +/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ +#define __gru_cacheline_aligned__ \ + __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) + +#define MAGIC 0x1234567887654321UL + +/* Default retry count for GRU errors on kernel instructions */ +#define EXCEPTION_RETRY_LIMIT 3 + +/* Status of message queue sections */ +#define MQS_EMPTY 0 +#define MQS_FULL 1 +#define MQS_NOOP 2 + +/*----------------- RESOURCE MANAGEMENT -------------------------------------*/ +/* optimized for x86_64 */ +struct message_queue { + union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ + int qlines; /* DW 1 */ + long hstatus[2]; + void *next __gru_cacheline_aligned__;/* CL 1 */ + void *limit; + void *start; + void *start2; + char data ____cacheline_aligned; /* CL 2 */ +}; + +/* First word in every message - used by mesq interface */ +struct message_header { + char present; + char present2; + char lines; + char fill; +}; + +#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) + +/* + * Reload the blade's kernel context into a GRU chiplet. Called holding + * the bs_kgts_sema for READ. Will steal user contexts if necessary. + */ +static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) +{ + struct gru_state *gru; + struct gru_thread_state *kgts; + void *vaddr; + int ctxnum, ncpus; + + up_read(&bs->bs_kgts_sema); + down_write(&bs->bs_kgts_sema); + + if (!bs->bs_kgts) { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + bs->bs_kgts->ts_user_blade_id = blade_id; + } + kgts = bs->bs_kgts; + + if (!kgts->ts_gru) { + STAT(load_kernel_context); + ncpus = uv_blade_nr_possible_cpus(blade_id); + kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( + GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); + kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( + GRU_NUM_KERNEL_DSR_BYTES * ncpus + + bs->bs_async_dsr_bytes); + while (!gru_assign_gru_context(kgts)) { + msleep(1); + gru_steal_context(kgts); + } + gru_load_context(kgts); + gru = bs->bs_kgts->ts_gru; + vaddr = gru->gs_gru_base_vaddr; + ctxnum = kgts->ts_ctxnum; + bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); + bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); + } + downgrade_write(&bs->bs_kgts_sema); +} + +/* + * Free all kernel contexts that are not currently in use. + * Returns 0 if all freed, else number of inuse context. + */ +static int gru_free_kernel_contexts(void) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int bid, ret = 0; + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) { + bs = gru_base[bid]; + if (!bs) + continue; + + /* Ignore busy contexts. Don't want to block here. */ + if (down_write_trylock(&bs->bs_kgts_sema)) { + kgts = bs->bs_kgts; + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + bs->bs_kgts = NULL; + up_write(&bs->bs_kgts_sema); + kfree(kgts); + } else { + ret++; + } + } + return ret; +} + +/* + * Lock & load the kernel context for the specified blade. + */ +static struct gru_blade_state *gru_lock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + int bid; + + STAT(lock_kernel_context); +again: + bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; + bs = gru_base[bid]; + + /* Handle the case where migration occurred while waiting for the sema */ + down_read(&bs->bs_kgts_sema); + if (blade_id < 0 && bid != uv_numa_blade_id()) { + up_read(&bs->bs_kgts_sema); + goto again; + } + if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) + gru_load_kernel_context(bs, bid); + return bs; + +} + +/* + * Unlock the kernel context for the specified blade. Context is not + * unloaded but may be stolen before next use. + */ +static void gru_unlock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + + bs = gru_base[blade_id]; + up_read(&bs->bs_kgts_sema); + STAT(unlock_kernel_context); +} + +/* + * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. + * - returns with preemption disabled + */ +static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) +{ + struct gru_blade_state *bs; + int lcpu; + + BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); + preempt_disable(); + bs = gru_lock_kernel_context(-1); + lcpu = uv_blade_processor_id(); + *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; + *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; + return 0; +} + +/* + * Free the current cpus reserved DSR/CBR resources. + */ +static void gru_free_cpu_resources(void *cb, void *dsr) +{ + gru_unlock_kernel_context(uv_numa_blade_id()); + preempt_enable(); +} + +/* + * Reserve GRU resources to be used asynchronously. + * Note: currently supports only 1 reservation per blade. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * output: + * handle to identify resource + * (0 = async resources already reserved) + */ +unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int ret = 0; + + bs = gru_base[blade_id]; + + down_write(&bs->bs_kgts_sema); + + /* Verify no resources already reserved */ + if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) + goto done; + bs->bs_async_dsr_bytes = dsr_bytes; + bs->bs_async_cbrs = cbrs; + bs->bs_async_wq = cmp; + kgts = bs->bs_kgts; + + /* Resources changed. Unload context if already loaded */ + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + ret = ASYNC_BID_TO_HAN(blade_id); + +done: + up_write(&bs->bs_kgts_sema); + return ret; +} + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +void gru_release_async_resources(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + down_write(&bs->bs_kgts_sema); + bs->bs_async_dsr_bytes = 0; + bs->bs_async_cbrs = 0; + bs->bs_async_wq = NULL; + up_write(&bs->bs_kgts_sema); +} + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +void gru_wait_async_cbr(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + wait_for_completion(bs->bs_async_wq); + mb(); +} + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + int blade_id = ASYNC_HAN_TO_BID(han); + int ncpus; + + gru_lock_kernel_context(blade_id); + ncpus = uv_blade_nr_possible_cpus(blade_id); + if (cb) + *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; + if (dsr) + *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; +} + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +void gru_unlock_async_resource(unsigned long han) +{ + int blade_id = ASYNC_HAN_TO_BID(han); + + gru_unlock_kernel_context(blade_id); +} + +/*----------------------------------------------------------------------*/ +int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet) +{ + struct gru_control_block_extended *cbe; + struct gru_thread_state *kgts = NULL; + unsigned long off; + int cbrnum, bid; + + /* + * Locate kgts for cb. This algorithm is SLOW but + * this function is rarely called (ie., almost never). + * Performance does not matter. + */ + for_each_possible_blade(bid) { + if (!gru_base[bid]) + break; + kgts = gru_base[bid]->bs_kgts; + if (!kgts || !kgts->ts_gru) + continue; + off = cb - kgts->ts_gru->gs_gru_base_vaddr; + if (off < GRU_SIZE) + break; + kgts = NULL; + } + BUG_ON(!kgts); + cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); + cbe = get_cbe(GRUBASE(cb), cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); + excdet->opc = cbe->opccpy; + excdet->exopc = cbe->exopccpy; + excdet->ecause = cbe->ecause; + excdet->exceptdet0 = cbe->idef1upd; + excdet->exceptdet1 = cbe->idef3upd; + gru_flush_cache(cbe); + return 0; +} + +char *gru_get_cb_exception_detail_str(int ret, void *cb, + char *buf, int size) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + + if (ret > 0 && gen->istatus == CBS_EXCEPTION) { + gru_get_cb_exception_detail(cb, &excdet); + snprintf(buf, size, + "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," + "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), + gen, excdet.opc, excdet.exopc, excdet.ecause, + excdet.exceptdet0, excdet.exceptdet1); + } else { + snprintf(buf, size, "No exception"); + } + return buf; +} + +static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) +{ + while (gen->istatus >= CBS_ACTIVE) { + cpu_relax(); + barrier(); + } + return gen->istatus; +} + +static int gru_retry_exception(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + int retry = EXCEPTION_RETRY_LIMIT; + + while (1) { + if (gru_wait_idle_or_exception(gen) == CBS_IDLE) + return CBS_IDLE; + if (gru_get_cb_message_queue_substatus(cb)) + return CBS_EXCEPTION; + gru_get_cb_exception_detail(cb, &excdet); + if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || + (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) + break; + if (retry-- == 0) + break; + gen->icmd = 1; + gru_flush_cache(gen); + } + return CBS_EXCEPTION; +} + +int gru_check_status_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gen->istatus; + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; + +} + +int gru_wait_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gru_wait_idle_or_exception(gen); + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; +} + +void gru_abort(int ret, void *cb, char *str) +{ + char buf[GRU_EXC_STR_SIZE]; + + panic("GRU FATAL ERROR: %s - %s\n", str, + gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); +} + +void gru_wait_abort_proc(void *cb) +{ + int ret; + + ret = gru_wait_proc(cb); + if (ret) + gru_abort(ret, cb, "gru_wait_abort"); +} + + +/*------------------------------ MESSAGE QUEUES -----------------------------*/ + +/* Internal status . These are NOT returned to the user. */ +#define MQIE_AGAIN -1 /* try again */ + + +/* + * Save/restore the "present" flag that is in the second line of 2-line + * messages + */ +static inline int get_present2(void *p) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + return mhdr->present; +} + +static inline void restore_present2(void *p, int val) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + mhdr->present = val; +} + +/* + * Create a message queue. + * qlines - message queue size in cache lines. Includes 2-line header. + */ +int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid) +{ + struct message_queue *mq = p; + unsigned int qlines; + + qlines = bytes / GRU_CACHE_LINE_BYTES - 2; + memset(mq, 0, bytes); + mq->start = &mq->data; + mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; + mq->next = &mq->data; + mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; + mq->qlines = qlines; + mq->hstatus[0] = 0; + mq->hstatus[1] = 1; + mq->head = gru_mesq_head(2, qlines / 2 + 1); + mqd->mq = mq; + mqd->mq_gpa = uv_gpa(mq); + mqd->qlines = qlines; + mqd->interrupt_pnode = nasid >> 1; + mqd->interrupt_vector = vector; + mqd->interrupt_apicid = apicid; + return 0; +} +EXPORT_SYMBOL_GPL(gru_create_message_queue); + +/* + * Send a NOOP message to a message queue + * Returns: + * 0 - if queue is full after the send. This is the normal case + * but various races can change this. + * -1 - if mesq sent successfully but queue not full + * >0 - unexpected error. MQE_xxx returned + */ +static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, + void *mesg) +{ + const struct message_header noop_header = { + .present = MQS_NOOP, .lines = 1}; + unsigned long m; + int substatus, ret; + struct message_header save_mhdr, *mhdr = mesg; + + STAT(mesq_noop); + save_mhdr = *mhdr; + *mhdr = noop_header; + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); + ret = gru_wait(cb); + + if (ret) { + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_noop_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_noop_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_noop_qlimit_reached); + ret = 0; + break; + case CBSS_AMO_NACKED: + STAT(mesq_noop_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_noop_put_nacked); + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, + IMA); + if (gru_wait(cb) == CBS_IDLE) + ret = MQIE_AGAIN; + else + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_noop_page_overflow); + /* fallthru */ + default: + BUG(); + } + } + *mhdr = save_mhdr; + return ret; +} + +/* + * Handle a gru_mesq full. + */ +static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + union gru_mesqhead mqh; + unsigned int limit, head; + unsigned long avalue; + int half, qlines; + + /* Determine if switching to first/second half of q */ + avalue = gru_get_amo_value(cb); + head = gru_get_amo_value_head(cb); + limit = gru_get_amo_value_limit(cb); + + qlines = mqd->qlines; + half = (limit != qlines); + + if (half) + mqh = gru_mesq_head(qlines / 2 + 1, qlines); + else + mqh = gru_mesq_head(2, qlines / 2 + 1); + + /* Try to get lock for switching head pointer */ + gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + if (!gru_get_amo_value(cb)) { + STAT(mesq_qf_locked); + return MQE_QUEUE_FULL; + } + + /* Got the lock. Send optional NOP if queue not full, */ + if (head != limit) { + if (send_noop_message(cb, mqd, mesg)) { + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), + XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + STAT(mesq_qf_noop_not_full); + return MQIE_AGAIN; + } + avalue++; + } + + /* Then flip queuehead to other half of queue. */ + gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, + IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + + /* If not successfully in swapping queue head, clear the hstatus lock */ + if (gru_get_amo_value(cb) != avalue) { + STAT(mesq_qf_switch_head_failed); + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, + IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + } + return MQIE_AGAIN; +cberr: + STAT(mesq_qf_unexpected_error); + return MQE_UNEXPECTED_CB_ERR; +} + +/* + * Handle a PUT failure. Note: if message was a 2-line message, one of the + * lines might have successfully have been written. Before sending the + * message, "present" must be cleared in BOTH lines to prevent the receiver + * from prematurely seeing the full message. + */ +static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + unsigned long m, *val = mesg, gpa, save; + int ret; + + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + if (lines == 2) { + gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + } + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + + if (!mqd->interrupt_vector) + return MQE_OK; + + /* + * Send a cross-partition interrupt to the SSI that contains the target + * message queue. Normally, the interrupt is automatically delivered by + * hardware but some error conditions require explicit delivery. + * Use the GRU to deliver the interrupt. Otherwise partition failures + * could cause unrecovered errors. + */ + gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); + save = *val; + *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, + dest_Fixed); + gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); + ret = gru_wait(cb); + *val = save; + if (ret != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + return MQE_OK; +} + +/* + * Handle a gru_mesq failure. Some of these failures are software recoverable + * or retryable. + */ +static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + int substatus, ret = 0; + + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_send_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_send_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_send_qlimit_reached); + ret = send_message_queue_full(cb, mqd, mesg, lines); + break; + case CBSS_AMO_NACKED: + STAT(mesq_send_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_send_put_nacked); + ret = send_message_put_nacked(cb, mqd, mesg, lines); + break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_page_overflow); + /* fallthru */ + default: + BUG(); + } + return ret; +} + +/* + * Send a message to a message queue + * mqd message queue descriptor + * mesg message. ust be vaddr within a GSEG + * bytes message size (<= 2 CL) + */ +int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, + unsigned int bytes) +{ + struct message_header *mhdr; + void *cb; + void *dsr; + int istatus, clines, ret; + + STAT(mesq_send); + BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); + + clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); + if (gru_get_cpu_resources(bytes, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + memcpy(dsr, mesg, bytes); + mhdr = dsr; + mhdr->present = MQS_FULL; + mhdr->lines = clines; + if (clines == 2) { + mhdr->present2 = get_present2(mhdr); + restore_present2(mhdr, MQS_FULL); + } + + do { + ret = MQE_OK; + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); + istatus = gru_wait(cb); + if (istatus != CBS_IDLE) + ret = send_message_failure(cb, mqd, dsr, clines); + } while (ret == MQIE_AGAIN); + gru_free_cpu_resources(cb, dsr); + + if (ret) + STAT(mesq_send_failed); + return ret; +} +EXPORT_SYMBOL_GPL(gru_send_message_gpa); + +/* + * Advance the receive pointer for the queue to the next message. + */ +void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) +{ + struct message_queue *mq = mqd->mq; + struct message_header *mhdr = mq->next; + void *next, *pnext; + int half = -1; + int lines = mhdr->lines; + + if (lines == 2) + restore_present2(mhdr, MQS_EMPTY); + mhdr->present = MQS_EMPTY; + + pnext = mq->next; + next = pnext + GRU_CACHE_LINE_BYTES * lines; + if (next == mq->limit) { + next = mq->start; + half = 1; + } else if (pnext < mq->start2 && next >= mq->start2) { + half = 0; + } + + if (half >= 0) + mq->hstatus[half] = 1; + mq->next = next; +} +EXPORT_SYMBOL_GPL(gru_free_message); + +/* + * Get next message from message queue. Return NULL if no message + * present. User must call next_message() to move to next message. + * rmq message queue + */ +void *gru_get_next_message(struct gru_message_queue_desc *mqd) +{ + struct message_queue *mq = mqd->mq; + struct message_header *mhdr = mq->next; + int present = mhdr->present; + + /* skip NOOP messages */ + while (present == MQS_NOOP) { + gru_free_message(mqd, mhdr); + mhdr = mq->next; + present = mhdr->present; + } + + /* Wait for both halves of 2 line messages */ + if (present == MQS_FULL && mhdr->lines == 2 && + get_present2(mhdr) == MQS_EMPTY) + present = MQS_EMPTY; + + if (!present) { + STAT(mesq_receive_none); + return NULL; + } + + if (mhdr->lines == 2) + restore_present2(mhdr, mhdr->present2); + + STAT(mesq_receive); + return mhdr; +} +EXPORT_SYMBOL_GPL(gru_get_next_message); + +/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ + +/* + * Load a DW from a global GPA. The GPA can be a memory or MMR address. + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa) +{ + void *cb; + void *dsr; + int ret, iaa; + + STAT(read_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + iaa = gpa >> 62; + gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); + ret = gru_wait(cb); + if (ret == CBS_IDLE) + *value = *(unsigned long *)dsr; + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_read_gpa); + + +/* + * Copy a block of data using the GRU resources + */ +int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes) +{ + void *cb; + void *dsr; + int ret; + + STAT(copy_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), + XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); + ret = gru_wait(cb); + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_copy_gpa); + +/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ +/* Temp - will delete after we gain confidence in the GRU */ + +static int quicktest0(unsigned long arg) +{ + unsigned long word0; + unsigned long word1; + void *cb; + void *dsr; + unsigned long *p; + int ret = -EIO; + + if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + p = dsr; + word0 = MAGIC; + word1 = 0; + + gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); + goto done; + } + + if (*p != MAGIC) { + printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); + goto done; + } + gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); + goto done; + } + + if (word0 != word1 || word1 != MAGIC) { + printk(KERN_DEBUG + "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", + smp_processor_id(), word1, MAGIC); + goto done; + } + ret = 0; + +done: + gru_free_cpu_resources(cb, dsr); + return ret; +} + +#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) + +static int quicktest1(unsigned long arg) +{ + struct gru_message_queue_desc mqd; + void *p, *mq; + unsigned long *dw; + int i, ret = -EIO; + char mes[GRU_CACHE_LINE_BYTES], *m; + + /* Need 1K cacheline aligned that does not cross page boundary */ + p = kmalloc(4096, 0); + if (p == NULL) + return -ENOMEM; + mq = ALIGNUP(p, 1024); + memset(mes, 0xee, sizeof(mes)); + dw = mq; + + gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); + for (i = 0; i < 6; i++) { + mes[8] = i; + do { + ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); + } while (ret == MQE_CONGESTION); + if (ret) + break; + } + if (ret != MQE_QUEUE_FULL || i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", + smp_processor_id(), ret, i); + goto done; + } + + for (i = 0; i < 6; i++) { + m = gru_get_next_message(&mqd); + if (!m || m[8] != i) + break; + gru_free_message(&mqd, m); + } + if (i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", + smp_processor_id(), i, m, m ? m[8] : -1); + goto done; + } + ret = 0; + +done: + kfree(p); + return ret; +} + +static int quicktest2(unsigned long arg) +{ + static DECLARE_COMPLETION(cmp); + unsigned long han; + int blade_id = 0; + int numcb = 4; + int ret = 0; + unsigned long *buf; + void *cb0, *cb; + struct gru_control_block_status *gen; + int i, k, istatus, bytes; + + bytes = numcb * 4 * 8; + buf = kmalloc(bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = -EBUSY; + han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); + if (!han) + goto done; + + gru_lock_async_resource(han, &cb0, NULL); + memset(buf, 0xee, bytes); + for (i = 0; i < numcb; i++) + gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, + XTYPE_DW, 4, 1, IMA_INTERRUPT); + + ret = 0; + k = numcb; + do { + gru_wait_async_cbr(han); + for (i = 0; i < numcb; i++) { + cb = cb0 + i * GRU_HANDLE_STRIDE; + istatus = gru_check_status(cb); + if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) + break; + } + if (i == numcb) + continue; + if (istatus != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); + ret = -EFAULT; + } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || + buf[4 * i + 3]) { + printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", + smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); + ret = -EIO; + } + k--; + gen = cb; + gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ + } while (k); + BUG_ON(cmp.done); + + gru_unlock_async_resource(han); + gru_release_async_resources(han); +done: + kfree(buf); + return ret; +} + +#define BUFSIZE 200 +static int quicktest3(unsigned long arg) +{ + char buf1[BUFSIZE], buf2[BUFSIZE]; + int ret = 0; + + memset(buf2, 0, sizeof(buf2)); + memset(buf1, get_cycles() & 255, sizeof(buf1)); + gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); + if (memcmp(buf1, buf2, BUFSIZE)) { + printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); + ret = -EIO; + } + return ret; +} + +/* + * Debugging only. User hook for various kernel tests + * of driver & gru. + */ +int gru_ktest(unsigned long arg) +{ + int ret = -EINVAL; + + switch (arg & 0xff) { + case 0: + ret = quicktest0(arg); + break; + case 1: + ret = quicktest1(arg); + break; + case 2: + ret = quicktest2(arg); + break; + case 3: + ret = quicktest3(arg); + break; + case 99: + ret = gru_free_kernel_contexts(); + break; + } + return ret; + +} + +int gru_kservices_init(void) +{ + return 0; +} + +void gru_kservices_exit(void) +{ + if (gru_free_kernel_contexts()) + BUG(); +} + diff --git a/kernel/drivers/misc/sgi-gru/grukservices.h b/kernel/drivers/misc/sgi-gru/grukservices.h new file mode 100644 index 000000000..02aa94d84 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grukservices.h @@ -0,0 +1,214 @@ + +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __GRU_KSERVICES_H_ +#define __GRU_KSERVICES_H_ + + +/* + * Message queues using the GRU to send/receive messages. + * + * These function allow the user to create a message queue for + * sending/receiving 1 or 2 cacheline messages using the GRU. + * + * Processes SENDING messages will use a kernel CBR/DSR to send + * the message. This is transparent to the caller. + * + * The receiver does not use any GRU resources. + * + * The functions support: + * - single receiver + * - multiple senders + * - cross partition message + * + * Missing features ZZZ: + * - user options for dealing with timeouts, queue full, etc. + * - gru_create_message_queue() needs interrupt vector info + */ + +struct gru_message_queue_desc { + void *mq; /* message queue vaddress */ + unsigned long mq_gpa; /* global address of mq */ + int qlines; /* queue size in CL */ + int interrupt_vector; /* interrupt vector */ + int interrupt_pnode; /* pnode for interrupt */ + int interrupt_apicid; /* lapicid for interrupt */ +}; + +/* + * Initialize a user allocated chunk of memory to be used as + * a message queue. The caller must ensure that the queue is + * in contiguous physical memory and is cacheline aligned. + * + * Message queue size is the total number of bytes allocated + * to the queue including a 2 cacheline header that is used + * to manage the queue. + * + * Input: + * mqd pointer to message queue descriptor + * p pointer to user allocated mesq memory. + * bytes size of message queue in bytes + * vector interrupt vector (zero if no interrupts) + * nasid nasid of blade where interrupt is delivered + * apicid apicid of cpu for interrupt + * + * Errors: + * 0 OK + * >0 error + */ +extern int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid); + +/* + * Send a message to a message queue. + * + * Note: The message queue transport mechanism uses the first 32 + * bits of the message. Users should avoid using these bits. + * + * + * Input: + * mqd pointer to message queue descriptor + * mesg pointer to message. Must be 64-bit aligned + * bytes size of message in bytes + * + * Output: + * 0 message sent + * >0 Send failure - see error codes below + * + */ +extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd, + void *mesg, unsigned int bytes); + +/* Status values for gru_send_message() */ +#define MQE_OK 0 /* message sent successfully */ +#define MQE_CONGESTION 1 /* temporary congestion, try again */ +#define MQE_QUEUE_FULL 2 /* queue is full */ +#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */ +#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */ +#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */ + +/* + * Advance the receive pointer for the message queue to the next message. + * Note: current API requires messages to be gotten & freed in order. Future + * API extensions may allow for out-of-order freeing. + * + * Input + * mqd pointer to message queue descriptor + * mesq message being freed + */ +extern void gru_free_message(struct gru_message_queue_desc *mqd, + void *mesq); + +/* + * Get next message from message queue. Returns pointer to + * message OR NULL if no message present. + * User must call gru_free_message() after message is processed + * in order to move the queue pointers to next message. + * + * Input + * mqd pointer to message queue descriptor + * + * Output: + * p pointer to message + * NULL no message available + */ +extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); + + +/* + * Read a GRU global GPA. Source can be located in a remote partition. + * + * Input: + * value memory address where MMR value is returned + * gpa source numalink physical address of GPA + * + * Output: + * 0 OK + * >0 error + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa); + + +/* + * Copy data using the GRU. Source or destination can be located in a remote + * partition. + * + * Input: + * dest_gpa destination global physical address + * src_gpa source global physical address + * bytes number of bytes to copy + * + * Output: + * 0 OK + * >0 error + */ +extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes); + +/* + * Reserve GRU resources to be used asynchronously. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * cmp - completion structure for waiting for + * async completions + * output: + * handle to identify resource + * (0 = no resources) + */ +extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp); + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +extern void gru_release_async_resources(unsigned long han); + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +extern void gru_wait_async_cbr(unsigned long han); + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr); + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +extern void gru_unlock_async_resource(unsigned long han); + +#endif /* __GRU_KSERVICES_H_ */ diff --git a/kernel/drivers/misc/sgi-gru/grulib.h b/kernel/drivers/misc/sgi-gru/grulib.h new file mode 100644 index 000000000..e77d1b1f9 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grulib.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRULIB_H__ +#define __GRULIB_H__ + +#define GRU_BASENAME "gru" +#define GRU_FULLNAME "/dev/gru" +#define GRU_IOCTL_NUM 'G' + +/* + * Maximum number of GRU segments that a user can have open + * ZZZ temp - set high for testing. Revisit. + */ +#define GRU_MAX_OPEN_CONTEXTS 32 + +/* Set Number of Request Blocks */ +#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) + +/* Set Context Options */ +#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *) + +/* Fetch exception detail */ +#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) + +/* For user call_os handling - normally a TLB fault */ +#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *) + +/* For user unload context */ +#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) + +/* For dumpping GRU chiplet state */ +#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *) + +/* For getting gseg statistics */ +#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *) + +/* For user TLB flushing (primarily for tests) */ +#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) + +/* Get some config options (primarily for tests & emulator) */ +#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) + +/* Various kernel self-tests */ +#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *) + +#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) +#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) +#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) + +struct gru_get_gseg_statistics_req { + unsigned long gseg; + struct gru_gseg_statistics stats; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_create_context_req { + unsigned long gseg; + unsigned int data_segment_bytes; + unsigned int control_blocks; + unsigned int maximum_thread_count; + unsigned int options; + unsigned char tlb_preload_count; +}; + +/* + * Structure used to pass unload context parameters to the driver + */ +struct gru_unload_context_req { + unsigned long gseg; +}; + +/* + * Structure used to set context options + */ +enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet}; +struct gru_set_context_option_req { + unsigned long gseg; + int op; + int val0; + long val1; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_flush_tlb_req { + unsigned long gseg; + unsigned long vaddr; + size_t len; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +enum {dcs_pid, dcs_gid}; +struct gru_dump_chiplet_state_req { + unsigned int op; + unsigned int gid; + int ctxnum; + char data_opt; + char lock_cch; + char flush_cbrs; + char fill[10]; + pid_t pid; + void *buf; + size_t buflen; + /* ---- output --- */ + unsigned int num_contexts; +}; + +#define GRU_DUMP_MAGIC 0x3474ab6c +struct gru_dump_context_header { + unsigned int magic; + unsigned int gid; + unsigned char ctxnum; + unsigned char cbrcnt; + unsigned char dsrcnt; + pid_t pid; + unsigned long vaddr; + int cch_locked; + unsigned long data[0]; +}; + +/* + * GRU configuration info (temp - for testing) + */ +struct gru_config_info { + int cpus; + int blades; + int nodes; + int chiplets; + int fill[16]; +}; + +#endif /* __GRULIB_H__ */ diff --git a/kernel/drivers/misc/sgi-gru/grumain.c b/kernel/drivers/misc/sgi-gru/grumain.c new file mode 100644 index 000000000..ae16c8cb4 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grumain.c @@ -0,0 +1,973 @@ +/* + * SN Platform GRU Driver + * + * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/err.h> +#include <linux/prefetch.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" + +unsigned long gru_options __read_mostly; + +static struct device_driver gru_driver = { + .name = "gru" +}; + +static struct device gru_device = { + .init_name = "", + .driver = &gru_driver, +}; + +struct device *grudev = &gru_device; + +/* + * Select a gru fault map to be used by the current cpu. Note that + * multiple cpus may be using the same map. + * ZZZ should be inline but did not work on emulator + */ +int gru_cpu_fault_map_id(void) +{ +#ifdef CONFIG_IA64 + return uv_blade_processor_id() % GRU_NUM_TFM; +#else + int cpu = smp_processor_id(); + int id, core; + + core = uv_cpu_core_number(cpu); + id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + return id; +#endif +} + +/*--------- ASID Management ------------------------------------------- + * + * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. + * Once MAX is reached, flush the TLB & start over. However, + * some asids may still be in use. There won't be many (percentage wise) still + * in use. Search active contexts & determine the value of the first + * asid in use ("x"s below). Set "limit" to this value. + * This defines a block of assignable asids. + * + * When "limit" is reached, search forward from limit+1 and determine the + * next block of assignable asids. + * + * Repeat until MAX_ASID is reached, then start over again. + * + * Each time MAX_ASID is reached, increment the asid generation. Since + * the search for in-use asids only checks contexts with GRUs currently + * assigned, asids in some contexts will be missed. Prior to loading + * a context, the asid generation of the GTS asid is rechecked. If it + * doesn't match the current generation, a new asid will be assigned. + * + * 0---------------x------------x---------------------x----| + * ^-next ^-limit ^-MAX_ASID + * + * All asid manipulation & context loading/unloading is protected by the + * gs_lock. + */ + +/* Hit the asid limit. Start over */ +static int gru_wrap_asid(struct gru_state *gru) +{ + gru_dbg(grudev, "gid %d\n", gru->gs_gid); + STAT(asid_wrap); + gru->gs_asid_gen++; + return MIN_ASID; +} + +/* Find the next chunk of unused asids */ +static int gru_reset_asid_limit(struct gru_state *gru, int asid) +{ + int i, gid, inuse_asid, limit; + + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); + STAT(asid_next); + limit = MAX_ASID; + if (asid >= limit) + asid = gru_wrap_asid(gru); + gru_flush_all_tlb(gru); + gid = gru->gs_gid; +again: + for (i = 0; i < GRU_NUM_CCH; i++) { + if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i])) + continue; + inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; + gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", + gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms, + inuse_asid, i); + if (inuse_asid == asid) { + asid += ASID_INC; + if (asid >= limit) { + /* + * empty range: reset the range limit and + * start over + */ + limit = MAX_ASID; + if (asid >= MAX_ASID) + asid = gru_wrap_asid(gru); + goto again; + } + } + + if ((inuse_asid > asid) && (inuse_asid < limit)) + limit = inuse_asid; + } + gru->gs_asid_limit = limit; + gru->gs_asid = asid; + gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid, + asid, limit); + return asid; +} + +/* Assign a new ASID to a thread context. */ +static int gru_assign_asid(struct gru_state *gru) +{ + int asid; + + gru->gs_asid += ASID_INC; + asid = gru->gs_asid; + if (asid >= gru->gs_asid_limit) + asid = gru_reset_asid_limit(gru, asid); + + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); + return asid; +} + +/* + * Clear n bits in a word. Return a word indicating the bits that were cleared. + * Optionally, build an array of chars that contain the bit numbers allocated. + */ +static unsigned long reserve_resources(unsigned long *p, int n, int mmax, + char *idx) +{ + unsigned long bits = 0; + int i; + + while (n--) { + i = find_first_bit(p, mmax); + if (i == mmax) + BUG(); + __clear_bit(i, p); + __set_bit(i, &bits); + if (idx) + *idx++ = i; + } + return bits; +} + +unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, + char *cbmap) +{ + return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, + cbmap); +} + +unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, + char *dsmap) +{ + return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, + dsmap); +} + +static void reserve_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts++; + gts->ts_cbr_map = + gru_reserve_cb_resources(gru, gts->ts_cbr_au_count, + gts->ts_cbr_idx); + gts->ts_dsr_map = + gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL); +} + +static void free_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts--; + gru->gs_cbr_map |= gts->ts_cbr_map; + gru->gs_dsr_map |= gts->ts_dsr_map; +} + +/* + * Check if a GRU has sufficient free resources to satisfy an allocation + * request. Note: GRU locks may or may not be held when this is called. If + * not held, recheck after acquiring the appropriate locks. + * + * Returns 1 if sufficient resources, 0 if not + */ +static int check_gru_resources(struct gru_state *gru, int cbr_au_count, + int dsr_au_count, int max_active_contexts) +{ + return hweight64(gru->gs_cbr_map) >= cbr_au_count + && hweight64(gru->gs_dsr_map) >= dsr_au_count + && gru->gs_active_contexts < max_active_contexts; +} + +/* + * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG + * context. + */ +static int gru_load_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) +{ + struct gru_mm_struct *gms = gts->ts_gms; + struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; + unsigned short ctxbitmap = (1 << gts->ts_ctxnum); + int asid; + + spin_lock(&gms->ms_asid_lock); + asid = asids->mt_asid; + + spin_lock(&gru->gs_asid_lock); + if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen != + gru->gs_asid_gen)) { + asid = gru_assign_asid(gru); + asids->mt_asid = asid; + asids->mt_asid_gen = gru->gs_asid_gen; + STAT(asid_new); + } else { + STAT(asid_reuse); + } + spin_unlock(&gru->gs_asid_lock); + + BUG_ON(asids->mt_ctxbitmap & ctxbitmap); + asids->mt_ctxbitmap |= ctxbitmap; + if (!test_bit(gru->gs_gid, gms->ms_asidmap)) + __set_bit(gru->gs_gid, gms->ms_asidmap); + spin_unlock(&gms->ms_asid_lock); + + gru_dbg(grudev, + "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, asid, + gms->ms_asidmap[0]); + return asid; +} + +static void gru_unload_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) +{ + struct gru_mm_struct *gms = gts->ts_gms; + struct gru_mm_tracker *asids; + unsigned short ctxbitmap; + + asids = &gms->ms_asids[gru->gs_gid]; + ctxbitmap = (1 << gts->ts_ctxnum); + spin_lock(&gms->ms_asid_lock); + spin_lock(&gru->gs_asid_lock); + BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); + asids->mt_ctxbitmap ^= ctxbitmap; + gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); + spin_unlock(&gru->gs_asid_lock); + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Decrement the reference count on a GTS structure. Free the structure + * if the reference count goes to zero. + */ +void gts_drop(struct gru_thread_state *gts) +{ + if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { + if (gts->ts_gms) + gru_drop_mmu_notifier(gts->ts_gms); + kfree(gts); + STAT(gts_free); + } +} + +/* + * Locate the GTS structure for the current thread. + */ +static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data + *vdata, int tsid) +{ + struct gru_thread_state *gts; + + list_for_each_entry(gts, &vdata->vd_head, ts_next) + if (gts->ts_tsid == tsid) + return gts; + return NULL; +} + +/* + * Allocate a thread state structure. + */ +struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid) +{ + struct gru_thread_state *gts; + struct gru_mm_struct *gms; + int bytes; + + bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); + bytes += sizeof(struct gru_thread_state); + gts = kmalloc(bytes, GFP_KERNEL); + if (!gts) + return ERR_PTR(-ENOMEM); + + STAT(gts_alloc); + memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ + atomic_set(>s->ts_refcnt, 1); + mutex_init(>s->ts_ctxlock); + gts->ts_cbr_au_count = cbr_au_count; + gts->ts_dsr_au_count = dsr_au_count; + gts->ts_tlb_preload_count = tlb_preload_count; + gts->ts_user_options = options; + gts->ts_user_blade_id = -1; + gts->ts_user_chiplet_id = -1; + gts->ts_tsid = tsid; + gts->ts_ctxnum = NULLCTX; + gts->ts_tlb_int_select = -1; + gts->ts_cch_req_slice = -1; + gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); + if (vma) { + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gms = gru_register_mmu_notifier(); + if (IS_ERR(gms)) + goto err; + gts->ts_gms = gms; + } + + gru_dbg(grudev, "alloc gts %p\n", gts); + return gts; + +err: + gts_drop(gts); + return ERR_CAST(gms); +} + +/* + * Allocate a vma private data structure. + */ +struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) +{ + struct gru_vma_data *vdata = NULL; + + vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); + if (!vdata) + return NULL; + + STAT(vdata_alloc); + INIT_LIST_HEAD(&vdata->vd_head); + spin_lock_init(&vdata->vd_lock); + gru_dbg(grudev, "alloc vdata %p\n", vdata); + return vdata; +} + +/* + * Find the thread state structure for the current thread. + */ +struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts; + + spin_lock(&vdata->vd_lock); + gts = gru_find_current_gts_nolock(vdata, tsid); + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Allocate a new thread state for a GSEG. Note that races may allow + * another thread to race to create a gts. + */ +struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts, *ngts; + + gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, + vdata->vd_dsr_au_count, + vdata->vd_tlb_preload_count, + vdata->vd_user_options, tsid); + if (IS_ERR(gts)) + return gts; + + spin_lock(&vdata->vd_lock); + ngts = gru_find_current_gts_nolock(vdata, tsid); + if (ngts) { + gts_drop(gts); + gts = ngts; + STAT(gts_double_allocate); + } else { + list_add(>s->ts_next, &vdata->vd_head); + } + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Free the GRU context assigned to the thread state. + */ +static void gru_free_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + gru = gts->ts_gru; + gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid); + + spin_lock(&gru->gs_lock); + gru->gs_gts[gts->ts_ctxnum] = NULL; + free_gru_resources(gru, gts); + BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); + __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); + gts->ts_ctxnum = NULLCTX; + gts->ts_gru = NULL; + gts->ts_blade = -1; + spin_unlock(&gru->gs_lock); + + gts_drop(gts); + STAT(free_context); +} + +/* + * Prefetching cachelines help hardware performance. + * (Strictly a performance enhancement. Not functionally required). + */ +static void prefetch_data(void *p, int num, int stride) +{ + while (num-- > 0) { + prefetchw(p); + p += stride; + } +} + +static inline long gru_copy_handle(void *d, void *s) +{ + memcpy(d, s, GRU_HANDLE_BYTES); + return GRU_HANDLE_BYTES; +} + +static void gru_prefetch_context(void *gseg, void *cb, void *cbe, + unsigned long cbrmap, unsigned long length) +{ + int i, scr; + + prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, + GRU_CACHE_LINE_BYTES); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); + prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, + GRU_CACHE_LINE_BYTES); + cb += GRU_HANDLE_STRIDE; + } +} + +static void gru_load_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap, + int data_valid) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + if (data_valid) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, + save); + } else { + memset(cb, 0, GRU_CACHE_LINE_BYTES); + memset(cbe + i * GRU_HANDLE_STRIDE, 0, + GRU_CACHE_LINE_BYTES); + } + /* Flush CBE to hide race in context restart */ + mb(); + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + + if (data_valid) + memcpy(gseg + GRU_DS_BASE, save, length); + else + memset(gseg + GRU_DS_BASE, 0, length); +} + +static void gru_unload_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + + /* CBEs may not be coherent. Flush them from cache */ + for_each_cbr_in_allocation_map(i, &cbrmap, scr) + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + mb(); /* Let the CL flush complete */ + + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(save, cb); + save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + memcpy(save, gseg + GRU_DS_BASE, length); +} + +void gru_unload_context(struct gru_thread_state *gts, int savestate) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int ctxnum = gts->ts_ctxnum; + + if (!is_kernel_context(gts)) + zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", + gts, gts->ts_cbr_map, gts->ts_dsr_map); + lock_cch_handle(cch); + if (cch_interrupt_sync(cch)) + BUG(); + + if (!is_kernel_context(gts)) + gru_unload_mm_tracker(gru, gts); + if (savestate) { + gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, + ctxnum, gts->ts_cbr_map, + gts->ts_dsr_map); + gts->ts_data_valid = 1; + } + + if (cch_deallocate(cch)) + BUG(); + unlock_cch_handle(cch); + + gru_free_gru_context(gts); +} + +/* + * Load a GRU context by copying it from the thread data structure in memory + * to the GRU. + */ +void gru_load_context(struct gru_thread_state *gts) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int i, err, asid, ctxnum = gts->ts_ctxnum; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + lock_cch_handle(cch); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch->tlb_int_enable) { + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gts->ts_tlb_int_select; + } + if (gts->ts_cch_req_slice >= 0) { + cch->req_slice_set_enable = 1; + cch->req_slice = gts->ts_cch_req_slice; + } else { + cch->req_slice_set_enable =0; + } + cch->tfm_done_bit_enable = 0; + cch->dsr_allocation_map = gts->ts_dsr_map; + cch->cbr_allocation_map = gts->ts_cbr_map; + + if (is_kernel_context(gts)) { + cch->unmap_enable = 1; + cch->tfm_done_bit_enable = 1; + cch->cb_int_enable = 1; + cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ + } else { + cch->unmap_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->cb_int_enable = 0; + asid = gru_load_mm_tracker(gru, gts); + for (i = 0; i < 8; i++) { + cch->asid[i] = asid + i; + cch->sizeavail[i] = gts->ts_sizeavail; + } + } + + err = cch_allocate(cch); + if (err) { + gru_dbg(grudev, + "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", + err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); + BUG(); + } + + gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, + gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid); + + if (cch_start(cch)) + BUG(); + unlock_cch_handle(cch); + + gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", + gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, + (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); +} + +/* + * Update fields in an active CCH: + * - retarget interrupts on local blade + * - update sizeavail mask + */ +int gru_update_cch(struct gru_thread_state *gts) +{ + struct gru_context_configuration_handle *cch; + struct gru_state *gru = gts->ts_gru; + int i, ctxnum = gts->ts_ctxnum, ret = 0; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + lock_cch_handle(cch); + if (cch->state == CCHSTATE_ACTIVE) { + if (gru->gs_gts[gts->ts_ctxnum] != gts) + goto exit; + if (cch_interrupt(cch)) + BUG(); + for (i = 0; i < 8; i++) + cch->sizeavail[i] = gts->ts_sizeavail; + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gru_cpu_fault_map_id(); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch_start(cch)) + BUG(); + ret = 1; + } +exit: + unlock_cch_handle(cch); + return ret; +} + +/* + * Update CCH tlb interrupt select. Required when all the following is true: + * - task's GRU context is loaded into a GRU + * - task is using interrupt notification for TLB faults + * - task has migrated to a different cpu on the same blade where + * it was previously running. + */ +static int gru_retarget_intr(struct gru_thread_state *gts) +{ + if (gts->ts_tlb_int_select < 0 + || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) + return 0; + + gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, + gru_cpu_fault_map_id()); + return gru_update_cch(gts); +} + +/* + * Check if a GRU context is allowed to use a specific chiplet. By default + * a context is assigned to any blade-local chiplet. However, users can + * override this. + * Returns 1 if assignment allowed, 0 otherwise + */ +static int gru_check_chiplet_assignment(struct gru_state *gru, + struct gru_thread_state *gts) +{ + int blade_id; + int chiplet_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + + chiplet_id = gts->ts_user_chiplet_id; + return gru->gs_blade_id == blade_id && + (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); +} + +/* + * Unload the gru context if it is not assigned to the correct blade or + * chiplet. Misassignment can occur if the process migrates to a different + * blade or if the user changes the selected blade/chiplet. + */ +void gru_check_context_placement(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + /* + * If the current task is the context owner, verify that the + * context is correctly placed. This test is skipped for non-owner + * references. Pthread apps use non-owner references to the CBRs. + */ + gru = gts->ts_gru; + if (!gru || gts->ts_tgid_owner != current->tgid) + return; + + if (!gru_check_chiplet_assignment(gru, gts)) { + STAT(check_context_unload); + gru_unload_context(gts, 1); + } else if (gru_retarget_intr(gts)) { + STAT(check_context_retarget_intr); + } +} + + +/* + * Insufficient GRU resources available on the local blade. Steal a context from + * a process. This is a hack until a _real_ resource scheduler is written.... + */ +#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) +#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ + ((g)+1) : &(b)->bs_grus[0]) + +static int is_gts_stealable(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) + return down_write_trylock(&bs->bs_kgts_sema); + else + return mutex_trylock(>s->ts_ctxlock); +} + +static void gts_stolen(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) { + up_write(&bs->bs_kgts_sema); + STAT(steal_kernel_context); + } else { + mutex_unlock(>s->ts_ctxlock); + STAT(steal_user_context); + } +} + +void gru_steal_context(struct gru_thread_state *gts) +{ + struct gru_blade_state *blade; + struct gru_state *gru, *gru0; + struct gru_thread_state *ngts = NULL; + int ctxnum, ctxnum0, flag = 0, cbr, dsr; + int blade_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + cbr = gts->ts_cbr_au_count; + dsr = gts->ts_dsr_au_count; + + blade = gru_base[blade_id]; + spin_lock(&blade->bs_lock); + + ctxnum = next_ctxnum(blade->bs_lru_ctxnum); + gru = blade->bs_lru_gru; + if (ctxnum == 0) + gru = next_gru(blade, gru); + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; + ctxnum0 = ctxnum; + gru0 = gru; + while (1) { + if (gru_check_chiplet_assignment(gru, gts)) { + if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) + break; + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (flag && gru == gru0 && ctxnum == ctxnum0) + break; + ngts = gru->gs_gts[ctxnum]; + /* + * We are grabbing locks out of order, so trylock is + * needed. GTSs are usually not locked, so the odds of + * success are high. If trylock fails, try to steal a + * different GSEG. + */ + if (ngts && is_gts_stealable(ngts, blade)) + break; + ngts = NULL; + } + spin_unlock(&gru->gs_lock); + if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) + break; + } + if (flag && gru == gru0) + break; + flag = 1; + ctxnum = 0; + gru = next_gru(blade, gru); + } + spin_unlock(&blade->bs_lock); + + if (ngts) { + gts->ustats.context_stolen++; + ngts->ts_steal_jiffies = jiffies; + gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1); + gts_stolen(ngts, blade); + } else { + STAT(steal_context_failed); + } + gru_dbg(grudev, + "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;" + " avail cb %ld, ds %ld\n", + gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), + hweight64(gru->gs_dsr_map)); +} + +/* + * Assign a gru context. + */ +static int gru_assign_context_number(struct gru_state *gru) +{ + int ctxnum; + + ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + __set_bit(ctxnum, &gru->gs_context_map); + return ctxnum; +} + +/* + * Scan the GRUs on the local blade & assign a GRU context. + */ +struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru, *grux; + int i, max_active_contexts; + int blade_id = gts->ts_user_blade_id; + + if (blade_id < 0) + blade_id = uv_numa_blade_id(); +again: + gru = NULL; + max_active_contexts = GRU_NUM_CCH; + for_each_gru_on_blade(grux, blade_id, i) { + if (!gru_check_chiplet_assignment(grux, gts)) + continue; + if (check_gru_resources(grux, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, + max_active_contexts)) { + gru = grux; + max_active_contexts = grux->gs_active_contexts; + if (max_active_contexts == 0) + break; + } + } + + if (gru) { + spin_lock(&gru->gs_lock); + if (!check_gru_resources(gru, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, GRU_NUM_CCH)) { + spin_unlock(&gru->gs_lock); + goto again; + } + reserve_gru_resources(gru, gts); + gts->ts_gru = gru; + gts->ts_blade = gru->gs_blade_id; + gts->ts_ctxnum = gru_assign_context_number(gru); + atomic_inc(>s->ts_refcnt); + gru->gs_gts[gts->ts_ctxnum] = gts; + spin_unlock(&gru->gs_lock); + + STAT(assign_context); + gru_dbg(grudev, + "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n", + gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, + gts->ts_gru->gs_gid, gts->ts_ctxnum, + gts->ts_cbr_au_count, gts->ts_dsr_au_count); + } else { + gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); + STAT(assign_context_failed); + } + + return gru; +} + +/* + * gru_nopage + * + * Map the user's GRU segment + * + * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. + */ +int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct gru_thread_state *gts; + unsigned long paddr, vaddr; + + vaddr = (unsigned long)vmf->virtual_address; + gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", + vma, vaddr, GSEG_BASE(vaddr)); + STAT(nopfn); + + /* The following check ensures vaddr is a valid address in the VMA */ + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (!gts) + return VM_FAULT_SIGBUS; + +again: + mutex_lock(>s->ts_ctxlock); + preempt_disable(); + + gru_check_context_placement(gts); + + if (!gts->ts_gru) { + STAT(load_user_context); + if (!gru_assign_gru_context(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ + if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) + gru_steal_context(gts); + goto again; + } + gru_load_context(gts); + paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); + remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1), + paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, + vma->vm_page_prot); + } + + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + + return VM_FAULT_NOPAGE; +} + diff --git a/kernel/drivers/misc/sgi-gru/gruprocfs.c b/kernel/drivers/misc/sgi-gru/gruprocfs.c new file mode 100644 index 000000000..4f7635922 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/gruprocfs.c @@ -0,0 +1,377 @@ +/* + * SN Platform GRU Driver + * + * PROC INTERFACES + * + * This file supports the /proc interfaces for the GRU driver + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/proc_fs.h> +#include <linux/device.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#define printstat(s, f) printstat_val(s, &gru_stats.f, #f) + +static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) +{ + unsigned long val = atomic_long_read(v); + + seq_printf(s, "%16lu %s\n", val, id); +} + +static int statistics_show(struct seq_file *s, void *p) +{ + printstat(s, vdata_alloc); + printstat(s, vdata_free); + printstat(s, gts_alloc); + printstat(s, gts_free); + printstat(s, gms_alloc); + printstat(s, gms_free); + printstat(s, gts_double_allocate); + printstat(s, assign_context); + printstat(s, assign_context_failed); + printstat(s, free_context); + printstat(s, load_user_context); + printstat(s, load_kernel_context); + printstat(s, lock_kernel_context); + printstat(s, unlock_kernel_context); + printstat(s, steal_user_context); + printstat(s, steal_kernel_context); + printstat(s, steal_context_failed); + printstat(s, nopfn); + printstat(s, asid_new); + printstat(s, asid_next); + printstat(s, asid_wrap); + printstat(s, asid_reuse); + printstat(s, intr); + printstat(s, intr_cbr); + printstat(s, intr_tfh); + printstat(s, intr_spurious); + printstat(s, intr_mm_lock_failed); + printstat(s, call_os); + printstat(s, call_os_wait_queue); + printstat(s, user_flush_tlb); + printstat(s, user_unload_context); + printstat(s, user_exception); + printstat(s, set_context_option); + printstat(s, check_context_retarget_intr); + printstat(s, check_context_unload); + printstat(s, tlb_dropin); + printstat(s, tlb_preload_page); + printstat(s, tlb_dropin_fail_no_asid); + printstat(s, tlb_dropin_fail_upm); + printstat(s, tlb_dropin_fail_invalid); + printstat(s, tlb_dropin_fail_range_active); + printstat(s, tlb_dropin_fail_idle); + printstat(s, tlb_dropin_fail_fmm); + printstat(s, tlb_dropin_fail_no_exception); + printstat(s, tfh_stale_on_fault); + printstat(s, mmu_invalidate_range); + printstat(s, mmu_invalidate_page); + printstat(s, flush_tlb); + printstat(s, flush_tlb_gru); + printstat(s, flush_tlb_gru_tgh); + printstat(s, flush_tlb_gru_zero_asid); + printstat(s, copy_gpa); + printstat(s, read_gpa); + printstat(s, mesq_receive); + printstat(s, mesq_receive_none); + printstat(s, mesq_send); + printstat(s, mesq_send_failed); + printstat(s, mesq_noop); + printstat(s, mesq_send_unexpected_error); + printstat(s, mesq_send_lb_overflow); + printstat(s, mesq_send_qlimit_reached); + printstat(s, mesq_send_amo_nacked); + printstat(s, mesq_send_put_nacked); + printstat(s, mesq_qf_locked); + printstat(s, mesq_qf_noop_not_full); + printstat(s, mesq_qf_switch_head_failed); + printstat(s, mesq_qf_unexpected_error); + printstat(s, mesq_noop_unexpected_error); + printstat(s, mesq_noop_lb_overflow); + printstat(s, mesq_noop_qlimit_reached); + printstat(s, mesq_noop_amo_nacked); + printstat(s, mesq_noop_put_nacked); + printstat(s, mesq_noop_page_overflow); + return 0; +} + +static ssize_t statistics_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + memset(&gru_stats, 0, sizeof(gru_stats)); + return count; +} + +static int mcs_statistics_show(struct seq_file *s, void *p) +{ + int op; + unsigned long total, count, max; + static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", + "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", + "tfh_write_restart", "tgh_invalidate"}; + + seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); + for (op = 0; op < mcsop_last; op++) { + count = atomic_long_read(&mcs_op_statistics[op].count); + total = atomic_long_read(&mcs_op_statistics[op].total); + max = mcs_op_statistics[op].max; + seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count, + count ? total / count : 0, max); + } + return 0; +} + +static ssize_t mcs_statistics_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *data) +{ + memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics)); + return count; +} + +static int options_show(struct seq_file *s, void *p) +{ + seq_printf(s, "#bitmask: 1=trace, 2=statistics\n"); + seq_printf(s, "0x%lx\n", gru_options); + return 0; +} + +static ssize_t options_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + int ret; + + ret = kstrtoul_from_user(userbuf, count, 0, &gru_options); + if (ret) + return ret; + + return count; +} + +static int cch_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data; + int i; + struct gru_state *gru = GID_TO_GRU(gid); + struct gru_thread_state *ts; + const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; + + if (gid == 0) + seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); + if (gru) + for (i = 0; i < GRU_NUM_CCH; i++) { + ts = gru->gs_gts[i]; + if (!ts) + continue; + seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n", + gru->gs_gid, gru->gs_blade_id, i, + is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid, + is_kernel_context(ts) ? 0 : ts->ts_tgid_owner, + ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, + ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, + mode[ts->ts_user_options & + GRU_OPT_MISS_MASK]); + } + + return 0; +} + +static int gru_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data, ctxfree, cbrfree, dsrfree; + struct gru_state *gru = GID_TO_GRU(gid); + + if (gid == 0) { + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", + "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", + "busy", "busy", "free", "free", "free"); + } + if (gru) { + ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; + cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n", + gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree, + GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree, + ctxfree, cbrfree, dsrfree); + } + + return 0; +} + +static void seq_stop(struct seq_file *file, void *data) +{ +} + +static void *seq_start(struct seq_file *file, loff_t *gid) +{ + if (*gid < gru_max_gids) + return gid; + return NULL; +} + +static void *seq_next(struct seq_file *file, void *data, loff_t *gid) +{ + (*gid)++; + if (*gid < gru_max_gids) + return gid; + return NULL; +} + +static const struct seq_operations cch_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = cch_seq_show +}; + +static const struct seq_operations gru_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = gru_seq_show +}; + +static int statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, statistics_show, NULL); +} + +static int mcs_statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, mcs_statistics_show, NULL); +} + +static int options_open(struct inode *inode, struct file *file) +{ + return single_open(file, options_show, NULL); +} + +static int cch_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cch_seq_ops); +} + +static int gru_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &gru_seq_ops); +} + +/* *INDENT-OFF* */ +static const struct file_operations statistics_fops = { + .open = statistics_open, + .read = seq_read, + .write = statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations mcs_statistics_fops = { + .open = mcs_statistics_open, + .read = seq_read, + .write = mcs_statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations options_fops = { + .open = options_open, + .read = seq_read, + .write = options_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations cch_fops = { + .open = cch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +static const struct file_operations gru_fops = { + .open = gru_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_entry { + char *name; + umode_t mode; + const struct file_operations *fops; + struct proc_dir_entry *entry; +} proc_files[] = { + {"statistics", 0644, &statistics_fops}, + {"mcs_statistics", 0644, &mcs_statistics_fops}, + {"debug_options", 0644, &options_fops}, + {"cch_status", 0444, &cch_fops}, + {"gru_status", 0444, &gru_fops}, + {NULL} +}; +/* *INDENT-ON* */ + +static struct proc_dir_entry *proc_gru __read_mostly; + +static int create_proc_file(struct proc_entry *p) +{ + p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); + if (!p->entry) + return -1; + return 0; +} + +static void delete_proc_files(void) +{ + struct proc_entry *p; + + if (proc_gru) { + for (p = proc_files; p->name; p++) + if (p->entry) + remove_proc_entry(p->name, proc_gru); + proc_remove(proc_gru); + } +} + +int gru_proc_init(void) +{ + struct proc_entry *p; + + proc_gru = proc_mkdir("sgi_uv/gru", NULL); + + for (p = proc_files; p->name; p++) + if (create_proc_file(p)) + goto err; + return 0; + +err: + delete_proc_files(); + return -1; +} + +void gru_proc_exit(void) +{ + delete_proc_files(); +} diff --git a/kernel/drivers/misc/sgi-gru/grutables.h b/kernel/drivers/misc/sgi-gru/grutables.h new file mode 100644 index 000000000..5c3ce2459 --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grutables.h @@ -0,0 +1,678 @@ +/* + * SN Platform GRU Driver + * + * GRU DRIVER TABLES, MACROS, externs, etc + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUTABLES_H__ +#define __GRUTABLES_H__ + +/* + * GRU Chiplet: + * The GRU is a user addressible memory accelerator. It provides + * several forms of load, store, memset, bcopy instructions. In addition, it + * contains special instructions for AMOs, sending messages to message + * queues, etc. + * + * The GRU is an integral part of the node controller. It connects + * directly to the cpu socket. In its current implementation, there are 2 + * GRU chiplets in the node controller on each blade (~node). + * + * The entire GRU memory space is fully coherent and cacheable by the cpus. + * + * Each GRU chiplet has a physical memory map that looks like the following: + * + * +-----------------+ + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * +-----------------+ + * | system control | + * +-----------------+ _______ +-------------+ + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / | instructions| + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / |-------------| + * |/////////////////| / | | + * +-----------------+ | | + * | context 15 | | data | + * +-----------------+ | | + * | ...... | \ | | + * +-----------------+ \____________ +-------------+ + * | context 1 | + * +-----------------+ + * | context 0 | + * +-----------------+ + * + * Each of the "contexts" is a chunk of memory that can be mmaped into user + * space. The context consists of 2 parts: + * + * - an instruction space that can be directly accessed by the user + * to issue GRU instructions and to check instruction status. + * + * - a data area that acts as normal RAM. + * + * User instructions contain virtual addresses of data to be accessed by the + * GRU. The GRU contains a TLB that is used to convert these user virtual + * addresses to physical addresses. + * + * The "system control" area of the GRU chiplet is used by the kernel driver + * to manage user contexts and to perform functions such as TLB dropin and + * purging. + * + * One context may be reserved for the kernel and used for cross-partition + * communication. The GRU will also be used to asynchronously zero out + * large blocks of memory (not currently implemented). + * + * + * Tables: + * + * VDATA-VMA Data - Holds a few parameters. Head of linked list of + * GTS tables for threads using the GSEG + * GTS - Gru Thread State - contains info for managing a GSEG context. A + * GTS is allocated for each thread accessing a + * GSEG. + * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is + * not loaded into a GRU + * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs + * where a GSEG has been loaded. Similar to + * an mm_struct but for GRU. + * + * GS - GRU State - Used to manage the state of a GRU chiplet + * BS - Blade State - Used to manage state of all GRU chiplets + * on a blade + * + * + * Normal task tables for task using GRU. + * - 2 threads in process + * - 2 GSEGs open in process + * - GSEG1 is being used by both threads + * - GSEG2 is used only by thread 2 + * + * task -->| + * task ---+---> mm ->------ (notifier) -------+-> gms + * | | + * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) + * | | | + * | +-> gts--->| GSEG1 (thread2) + * | | + * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) + * . + * . + * + * GSEGs are marked DONTCOPY on fork + * + * At open + * file.private_data -> NULL + * + * At mmap, + * vma -> vdata + * + * After gseg reference + * vma -> vdata ->gts + * + * After fork + * parent + * vma -> vdata -> gts + * child + * (vma is not copied) + * + */ + +#include <linux/rmap.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/wait.h> +#include <linux/mmu_notifier.h> +#include "gru.h" +#include "grulib.h" +#include "gruhandles.h" + +extern struct gru_stats_s gru_stats; +extern struct gru_blade_state *gru_base[]; +extern unsigned long gru_start_paddr, gru_end_paddr; +extern void *gru_start_vaddr; +extern unsigned int gru_max_gids; + +#define GRU_MAX_BLADES MAX_NUMNODES +#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) + +#define GRU_DRIVER_ID_STR "SGI GRU Device Driver" +#define GRU_DRIVER_VERSION_STR "0.85" + +/* + * GRU statistics. + */ +struct gru_stats_s { + atomic_long_t vdata_alloc; + atomic_long_t vdata_free; + atomic_long_t gts_alloc; + atomic_long_t gts_free; + atomic_long_t gms_alloc; + atomic_long_t gms_free; + atomic_long_t gts_double_allocate; + atomic_long_t assign_context; + atomic_long_t assign_context_failed; + atomic_long_t free_context; + atomic_long_t load_user_context; + atomic_long_t load_kernel_context; + atomic_long_t lock_kernel_context; + atomic_long_t unlock_kernel_context; + atomic_long_t steal_user_context; + atomic_long_t steal_kernel_context; + atomic_long_t steal_context_failed; + atomic_long_t nopfn; + atomic_long_t asid_new; + atomic_long_t asid_next; + atomic_long_t asid_wrap; + atomic_long_t asid_reuse; + atomic_long_t intr; + atomic_long_t intr_cbr; + atomic_long_t intr_tfh; + atomic_long_t intr_spurious; + atomic_long_t intr_mm_lock_failed; + atomic_long_t call_os; + atomic_long_t call_os_wait_queue; + atomic_long_t user_flush_tlb; + atomic_long_t user_unload_context; + atomic_long_t user_exception; + atomic_long_t set_context_option; + atomic_long_t check_context_retarget_intr; + atomic_long_t check_context_unload; + atomic_long_t tlb_dropin; + atomic_long_t tlb_preload_page; + atomic_long_t tlb_dropin_fail_no_asid; + atomic_long_t tlb_dropin_fail_upm; + atomic_long_t tlb_dropin_fail_invalid; + atomic_long_t tlb_dropin_fail_range_active; + atomic_long_t tlb_dropin_fail_idle; + atomic_long_t tlb_dropin_fail_fmm; + atomic_long_t tlb_dropin_fail_no_exception; + atomic_long_t tfh_stale_on_fault; + atomic_long_t mmu_invalidate_range; + atomic_long_t mmu_invalidate_page; + atomic_long_t flush_tlb; + atomic_long_t flush_tlb_gru; + atomic_long_t flush_tlb_gru_tgh; + atomic_long_t flush_tlb_gru_zero_asid; + + atomic_long_t copy_gpa; + atomic_long_t read_gpa; + + atomic_long_t mesq_receive; + atomic_long_t mesq_receive_none; + atomic_long_t mesq_send; + atomic_long_t mesq_send_failed; + atomic_long_t mesq_noop; + atomic_long_t mesq_send_unexpected_error; + atomic_long_t mesq_send_lb_overflow; + atomic_long_t mesq_send_qlimit_reached; + atomic_long_t mesq_send_amo_nacked; + atomic_long_t mesq_send_put_nacked; + atomic_long_t mesq_page_overflow; + atomic_long_t mesq_qf_locked; + atomic_long_t mesq_qf_noop_not_full; + atomic_long_t mesq_qf_switch_head_failed; + atomic_long_t mesq_qf_unexpected_error; + atomic_long_t mesq_noop_unexpected_error; + atomic_long_t mesq_noop_lb_overflow; + atomic_long_t mesq_noop_qlimit_reached; + atomic_long_t mesq_noop_amo_nacked; + atomic_long_t mesq_noop_put_nacked; + atomic_long_t mesq_noop_page_overflow; + +}; + +enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, + cchop_deallocate, tfhop_write_only, tfhop_write_restart, + tghop_invalidate, mcsop_last}; + +struct mcs_op_statistic { + atomic_long_t count; + atomic_long_t total; + unsigned long max; +}; + +extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +#define OPT_DPRINT 1 +#define OPT_STATS 2 + + +#define IRQ_GRU 110 /* Starting IRQ number for interrupts */ + +/* Delay in jiffies between attempts to assign a GRU context */ +#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) + +/* + * If a process has it's context stolen, min delay in jiffies before trying to + * steal a context from another process. + */ +#define GRU_STEAL_DELAY ((HZ * 200) / 1000) + +#define STAT(id) do { \ + if (gru_options & OPT_STATS) \ + atomic_long_inc(&gru_stats.id); \ + } while (0) + +#ifdef CONFIG_SGI_GRU_DEBUG +#define gru_dbg(dev, fmt, x...) \ + do { \ + if (gru_options & OPT_DPRINT) \ + printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ + } while (0) +#else +#define gru_dbg(x...) +#endif + +/*----------------------------------------------------------------------------- + * ASID management + */ +#define MAX_ASID 0xfffff0 +#define MIN_ASID 8 +#define ASID_INC 8 /* number of regions */ + +/* Generate a GRU asid value from a GRU base asid & a virtual address. */ +#define VADDR_HI_BIT 64 +#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) +#define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) + +/*------------------------------------------------------------------------------ + * File & VMS Tables + */ + +struct gru_state; + +/* + * This structure is pointed to from the mmstruct via the notifier pointer. + * There is one of these per address space. + */ +struct gru_mm_tracker { /* pack to reduce size */ + unsigned int mt_asid_gen:24; /* ASID wrap count */ + unsigned int mt_asid:24; /* current base ASID for gru */ + unsigned short mt_ctxbitmap:16;/* bitmap of contexts using + asid */ +} __attribute__ ((packed)); + +struct gru_mm_struct { + struct mmu_notifier ms_notifier; + atomic_t ms_refcnt; + spinlock_t ms_asid_lock; /* protects ASID assignment */ + atomic_t ms_range_active;/* num range_invals active */ + char ms_released; + wait_queue_head_t ms_wait_queue; + DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); + struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; +}; + +/* + * One of these structures is allocated when a GSEG is mmaped. The + * structure is pointed to by the vma->vm_private_data field in the vma struct. + */ +struct gru_vma_data { + spinlock_t vd_lock; /* Serialize access to vma */ + struct list_head vd_head; /* head of linked list of gts */ + long vd_user_options;/* misc user option flags */ + int vd_cbr_au_count; + int vd_dsr_au_count; + unsigned char vd_tlb_preload_count; +}; + +/* + * One of these is allocated for each thread accessing a mmaped GRU. A linked + * list of these structure is hung off the struct gru_vma_data in the mm_struct. + */ +struct gru_thread_state { + struct list_head ts_next; /* list - head at vma-private */ + struct mutex ts_ctxlock; /* load/unload CTX lock */ + struct mm_struct *ts_mm; /* mm currently mapped to + context */ + struct vm_area_struct *ts_vma; /* vma of GRU context */ + struct gru_state *ts_gru; /* GRU where the context is + loaded */ + struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ + unsigned char ts_tlb_preload_count; /* TLB preload pages */ + unsigned long ts_cbr_map; /* map of allocated CBRs */ + unsigned long ts_dsr_map; /* map of allocated DATA + resources */ + unsigned long ts_steal_jiffies;/* jiffies when context last + stolen */ + long ts_user_options;/* misc user option flags */ + pid_t ts_tgid_owner; /* task that is using the + context - for migration */ + short ts_user_blade_id;/* user selected blade */ + char ts_user_chiplet_id;/* user selected chiplet */ + unsigned short ts_sizeavail; /* Pagesizes in use */ + int ts_tsid; /* thread that owns the + structure */ + int ts_tlb_int_select;/* target cpu if interrupts + enabled */ + int ts_ctxnum; /* context number where the + context is loaded */ + atomic_t ts_refcnt; /* reference count GTS */ + unsigned char ts_dsr_au_count;/* Number of DSR resources + required for contest */ + unsigned char ts_cbr_au_count;/* Number of CBR resources + required for contest */ + char ts_cch_req_slice;/* CCH packet slice */ + char ts_blade; /* If >= 0, migrate context if + ref from different blade */ + char ts_force_cch_reload; + char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each + allocated CB */ + int ts_data_valid; /* Indicates if ts_gdata has + valid data */ + struct gru_gseg_statistics ustats; /* User statistics */ + unsigned long ts_gdata[0]; /* save area for GRU data (CB, + DS, CBE) */ +}; + +/* + * Threaded programs actually allocate an array of GSEGs when a context is + * created. Each thread uses a separate GSEG. TSID is the index into the GSEG + * array. + */ +#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) +#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ + (gts)->ts_tsid * GRU_GSEG_PAGESIZE) + +#define NULLCTX (-1) /* if context not loaded into GRU */ + +/*----------------------------------------------------------------------------- + * GRU State Tables + */ + +/* + * One of these exists for each GRU chiplet. + */ +struct gru_state { + struct gru_blade_state *gs_blade; /* GRU state for entire + blade */ + unsigned long gs_gru_base_paddr; /* Physical address of + gru segments (64) */ + void *gs_gru_base_vaddr; /* Virtual address of + gru segments (64) */ + unsigned short gs_gid; /* unique GRU number */ + unsigned short gs_blade_id; /* blade of GRU */ + unsigned char gs_chiplet_id; /* blade chiplet of GRU */ + unsigned char gs_tgh_local_shift; /* used to pick TGH for + local flush */ + unsigned char gs_tgh_first_remote; /* starting TGH# for + remote flush */ + spinlock_t gs_asid_lock; /* lock used for + assigning asids */ + spinlock_t gs_lock; /* lock used for + assigning contexts */ + + /* -- the following are protected by the gs_asid_lock spinlock ---- */ + unsigned int gs_asid; /* Next availe ASID */ + unsigned int gs_asid_limit; /* Limit of available + ASIDs */ + unsigned int gs_asid_gen; /* asid generation. + Inc on wrap */ + + /* --- the following fields are protected by the gs_lock spinlock --- */ + unsigned long gs_context_map; /* bitmap to manage + contexts in use */ + unsigned long gs_cbr_map; /* bitmap to manage CB + resources */ + unsigned long gs_dsr_map; /* bitmap used to manage + DATA resources */ + unsigned int gs_reserved_cbrs; /* Number of kernel- + reserved cbrs */ + unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- + reserved dsrs */ + unsigned short gs_active_contexts; /* number of contexts + in use */ + struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using + the context */ + int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ +}; + +/* + * This structure contains the GRU state for all the GRUs on a blade. + */ +struct gru_blade_state { + void *kernel_cb; /* First kernel + reserved cb */ + void *kernel_dsr; /* First kernel + reserved DSR */ + struct rw_semaphore bs_kgts_sema; /* lock for kgts */ + struct gru_thread_state *bs_kgts; /* GTS for kernel use */ + + /* ---- the following are used for managing kernel async GRU CBRs --- */ + int bs_async_dsr_bytes; /* DSRs for async */ + int bs_async_cbrs; /* CBRs AU for async */ + struct completion *bs_async_wq; + + /* ---- the following are protected by the bs_lock spinlock ---- */ + spinlock_t bs_lock; /* lock used for + stealing contexts */ + int bs_lru_ctxnum; /* STEAL - last context + stolen */ + struct gru_state *bs_lru_gru; /* STEAL - last gru + stolen */ + + struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; +}; + +/*----------------------------------------------------------------------------- + * Address Primitives + */ +#define get_tfm_for_cpu(g, c) \ + ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) +#define get_tfh_by_index(g, i) \ + ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) +#define get_tgh_by_index(g, i) \ + ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) +#define get_cbe_by_index(g, i) \ + ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ + (i))) + +/*----------------------------------------------------------------------------- + * Useful Macros + */ + +/* Given a blade# & chiplet#, get a pointer to the GRU */ +#define get_gru(b, c) (&gru_base[b]->bs_grus[c]) + +/* Number of bytes to save/restore when unloading/loading GRU contexts */ +#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) +#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) + +/* Convert a user CB number to the actual CBRNUM */ +#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ + * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) + +/* Convert a gid to a pointer to the GRU */ +#define GID_TO_GRU(gid) \ + (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ + (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ + bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ + NULL) + +/* Scan all active GRUs in a GRU bitmap */ +#define for_each_gru_in_bitmap(gid, map) \ + for_each_set_bit((gid), (map), GRU_MAX_GRUS) + +/* Scan all active GRUs on a specific blade */ +#define for_each_gru_on_blade(gru, nid, i) \ + for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ + (i) < GRU_CHIPLETS_PER_BLADE; \ + (i)++, (gru)++) + +/* Scan all GRUs */ +#define foreach_gid(gid) \ + for ((gid) = 0; (gid) < gru_max_gids; (gid)++) + +/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ +#define for_each_gts_on_gru(gts, gru, ctxnum) \ + for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ + if (((gts) = (gru)->gs_gts[ctxnum])) + +/* Scan each CBR whose bit is set in a TFM (or copy of) */ +#define for_each_cbr_in_tfm(i, map) \ + for_each_set_bit((i), (map), GRU_NUM_CBE) + +/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ +#define for_each_cbr_in_allocation_map(i, map, k) \ + for_each_set_bit((k), (map), GRU_CBR_AU) \ + for ((i) = (k)*GRU_CBR_AU_SIZE; \ + (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) + +/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ +#define for_each_dsr_in_allocation_map(i, map, k) \ + for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \ + for ((i) = (k) * GRU_DSR_AU_CL; \ + (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) + +#define gseg_physical_address(gru, ctxnum) \ + ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) +#define gseg_virtual_address(gru, ctxnum) \ + ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) + +/*----------------------------------------------------------------------------- + * Lock / Unlock GRU handles + * Use the "delresp" bit in the handle as a "lock" bit. + */ + +/* Lock hierarchy checking enabled only in emulator */ + +/* 0 = lock failed, 1 = locked */ +static inline int __trylock_handle(void *h) +{ + return !test_and_set_bit(1, h); +} + +static inline void __lock_handle(void *h) +{ + while (test_and_set_bit(1, h)) + cpu_relax(); +} + +static inline void __unlock_handle(void *h) +{ + clear_bit(1, h); +} + +static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) +{ + return __trylock_handle(cch); +} + +static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) +{ + __lock_handle(cch); +} + +static inline void unlock_cch_handle(struct gru_context_configuration_handle + *cch) +{ + __unlock_handle(cch); +} + +static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __lock_handle(tgh); +} + +static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __unlock_handle(tgh); +} + +static inline int is_kernel_context(struct gru_thread_state *gts) +{ + return !gts->ts_mm; +} + +/* + * The following are for Nehelem-EX. A more general scheme is needed for + * future processors. + */ +#define UV_MAX_INT_CORES 8 +#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) +#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) +#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ + ((cpu_physical_id(p) >> 1) & 3)) +/*----------------------------------------------------------------------------- + * Function prototypes & externs + */ +struct gru_unload_context_req; + +extern const struct vm_operations_struct gru_vm_ops; +extern struct device *grudev; + +extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, + int tsid); +extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); +extern void gru_load_context(struct gru_thread_state *gts); +extern void gru_steal_context(struct gru_thread_state *gts); +extern void gru_unload_context(struct gru_thread_state *gts, int savestate); +extern int gru_update_cch(struct gru_thread_state *gts); +extern void gts_drop(struct gru_thread_state *gts); +extern void gru_tgh_flush_init(struct gru_state *gru); +extern int gru_kservices_init(void); +extern void gru_kservices_exit(void); +extern irqreturn_t gru0_intr(int irq, void *dev_id); +extern irqreturn_t gru1_intr(int irq, void *dev_id); +extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); +extern int gru_dump_chiplet_request(unsigned long arg); +extern long gru_get_gseg_statistics(unsigned long arg); +extern int gru_handle_user_call_os(unsigned long address); +extern int gru_user_flush_tlb(unsigned long arg); +extern int gru_user_unload_context(unsigned long arg); +extern int gru_get_exception_detail(unsigned long arg); +extern int gru_set_context_option(unsigned long address); +extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_cpu_fault_map_id(void); +extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); +extern void gru_flush_all_tlb(struct gru_state *gru); +extern int gru_proc_init(void); +extern void gru_proc_exit(void); + +extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid); +extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, + int cbr_au_count, char *cbmap); +extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, + int dsr_au_count, char *dsmap); +extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); +extern struct gru_mm_struct *gru_register_mmu_notifier(void); +extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); + +extern int gru_ktest(unsigned long arg); +extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len); + +extern unsigned long gru_options; + +#endif /* __GRUTABLES_H__ */ diff --git a/kernel/drivers/misc/sgi-gru/grutlbpurge.c b/kernel/drivers/misc/sgi-gru/grutlbpurge.c new file mode 100644 index 000000000..2129274ef --- /dev/null +++ b/kernel/drivers/misc/sgi-gru/grutlbpurge.c @@ -0,0 +1,377 @@ +/* + * SN Platform GRU Driver + * + * MMUOPS callbacks + TLB flushing + * + * This file handles emu notifier callbacks from the core kernel. The callbacks + * are used to update the TLB in the GRU as a result of changes in the + * state of a process address space. This file also handles TLB invalidates + * from the GRU driver. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/hugetlb.h> +#include <linux/delay.h> +#include <linux/timex.h> +#include <linux/srcu.h> +#include <asm/processor.h> +#include "gru.h" +#include "grutables.h" +#include <asm/uv/uv_hub.h> + +#define gru_random() get_cycles() + +/* ---------------------------------- TLB Invalidation functions -------- + * get_tgh_handle + * + * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the + * local blade, use a fixed TGH that is a function of the blade-local cpu + * number. Normally, this TGH is private to the cpu & no contention occurs for + * the TGH. For offblade GRUs, select a random TGH in the range above the + * private TGHs. A spinlock is required to access this TGH & the lock must be + * released when the invalidate is completes. This sucks, but it is the best we + * can do. + * + * Note that the spinlock is IN the TGH handle so locking does not involve + * additional cache lines. + * + */ +static inline int get_off_blade_tgh(struct gru_state *gru) +{ + int n; + + n = GRU_NUM_TGH - gru->gs_tgh_first_remote; + n = gru_random() % n; + n += gru->gs_tgh_first_remote; + return n; +} + +static inline int get_on_blade_tgh(struct gru_state *gru) +{ + return uv_blade_processor_id() >> gru->gs_tgh_local_shift; +} + +static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state + *gru) +{ + struct gru_tlb_global_handle *tgh; + int n; + + preempt_disable(); + if (uv_numa_blade_id() == gru->gs_blade_id) + n = get_on_blade_tgh(gru); + else + n = get_off_blade_tgh(gru); + tgh = get_tgh_by_index(gru, n); + lock_tgh_handle(tgh); + + return tgh; +} + +static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + unlock_tgh_handle(tgh); + preempt_enable(); +} + +/* + * gru_flush_tlb_range + * + * General purpose TLB invalidation function. This function scans every GRU in + * the ENTIRE system (partition) looking for GRUs where the specified MM has + * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR + * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned + * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the + * cost of (possibly) a large number of future TLBmisses. + * + * The current algorithm is optimized based on the following (somewhat true) + * assumptions: + * - GRU contexts are not loaded into a GRU unless a reference is made to + * the data segment or control block (this is true, not an assumption). + * If a DS/CB is referenced, the user will also issue instructions that + * cause TLBmisses. It is not necessary to optimize for the case where + * contexts are loaded but no instructions cause TLB misses. (I know + * this will happen but I'm not optimizing for it). + * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally + * a few usec but in unusual cases, it could be longer. Avoid if + * possible. + * - intrablade process migration between cpus is not frequent but is + * common. + * - a GRU context is not typically migrated to a different GRU on the + * blade because of intrablade migration + * - interblade migration is rare. Processes migrate their GRU context to + * the new blade. + * - if interblade migration occurs, migration back to the original blade + * is very very rare (ie., no optimization for this case) + * - most GRU instruction operate on a subset of the user REGIONS. Code + * & shared library regions are not likely targets of GRU instructions. + * + * To help improve the efficiency of TLB invalidation, the GMS data + * structure is maintained for EACH address space (MM struct). The GMS is + * also the structure that contains the pointer to the mmu callout + * functions. This structure is linked to the mm_struct for the address space + * using the mmu "register" function. The mmu interfaces are used to + * provide the callbacks for TLB invalidation. The GMS contains: + * + * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is + * loaded into the GRU. + * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in + * the above array + * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active + * in the GRU for the address space. This bitmap must be passed to the + * GRU to do an invalidate. + * + * The current algorithm for invalidating TLBs is: + * - scan the asidmap for GRUs where the context has been loaded, ie, + * asid is non-zero. + * - for each gru found: + * - if the ctxtmap is non-zero, there are active contexts in the + * GRU. TLB invalidate instructions must be issued to the GRU. + * - if the ctxtmap is zero, no context is active. Set the ASID to + * zero to force a full TLB invalidation. This is fast but will + * cause a lot of TLB misses if the context is reloaded onto the + * GRU + * + */ + +void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len) +{ + struct gru_state *gru; + struct gru_mm_tracker *asids; + struct gru_tlb_global_handle *tgh; + unsigned long num; + int grupagesize, pagesize, pageshift, gid, asid; + + /* ZZZ TODO - handle huge pages */ + pageshift = PAGE_SHIFT; + pagesize = (1UL << pageshift); + grupagesize = GRU_PAGESIZE(pageshift); + num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL); + + STAT(flush_tlb); + gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms, + start, len, gms->ms_asidmap[0]); + + spin_lock(&gms->ms_asid_lock); + for_each_gru_in_bitmap(gid, gms->ms_asidmap) { + STAT(flush_tlb_gru); + gru = GID_TO_GRU(gid); + asids = gms->ms_asids + gid; + asid = asids->mt_asid; + if (asids->mt_ctxbitmap && asid) { + STAT(flush_tlb_gru_tgh); + asid = GRUASID(asid, start); + gru_dbg(grudev, + " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, start, grupagesize, num, asids->mt_ctxbitmap); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, + num - 1, asids->mt_ctxbitmap); + get_unlock_tgh_handle(tgh); + } else { + STAT(flush_tlb_gru_zero_asid); + asids->mt_asid = 0; + __clear_bit(gru->gs_gid, gms->ms_asidmap); + gru_dbg(grudev, + " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n", + gid, asid, asids->mt_ctxbitmap, + gms->ms_asidmap[0]); + } + } + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Flush the entire TLB on a chiplet. + */ +void gru_flush_all_tlb(struct gru_state *gru) +{ + struct gru_tlb_global_handle *tgh; + + gru_dbg(grudev, "gid %d\n", gru->gs_gid); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff); + get_unlock_tgh_handle(tgh); +} + +/* + * MMUOPS notifier callout functions + */ +static void gru_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_range); + atomic_inc(&gms->ms_range_active); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms, + start, end, atomic_read(&gms->ms_range_active)); + gru_flush_tlb_range(gms, start, end - start); +} + +static void gru_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + /* ..._and_test() provides needed barrier */ + (void)atomic_dec_and_test(&gms->ms_range_active); + + wake_up_all(&gms->ms_wait_queue); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); +} + +static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_page); + gru_flush_tlb_range(gms, address, PAGE_SIZE); + gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address); +} + +static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + gms->ms_released = 1; + gru_dbg(grudev, "gms %p\n", gms); +} + + +static const struct mmu_notifier_ops gru_mmuops = { + .invalidate_page = gru_invalidate_page, + .invalidate_range_start = gru_invalidate_range_start, + .invalidate_range_end = gru_invalidate_range_end, + .release = gru_release, +}; + +/* Move this to the basic mmu_notifier file. But for now... */ +static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, + const struct mmu_notifier_ops *ops) +{ + struct mmu_notifier *mn, *gru_mn = NULL; + + if (mm->mmu_notifier_mm) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, + hlist) + if (mn->ops == ops) { + gru_mn = mn; + break; + } + rcu_read_unlock(); + } + return gru_mn; +} + +struct gru_mm_struct *gru_register_mmu_notifier(void) +{ + struct gru_mm_struct *gms; + struct mmu_notifier *mn; + int err; + + mn = mmu_find_ops(current->mm, &gru_mmuops); + if (mn) { + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + atomic_inc(&gms->ms_refcnt); + } else { + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (gms) { + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; + } + } + gru_dbg(grudev, "gms %p, refcnt %d\n", gms, + atomic_read(&gms->ms_refcnt)); + return gms; +error: + kfree(gms); + return ERR_PTR(err); +} + +void gru_drop_mmu_notifier(struct gru_mm_struct *gms) +{ + gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, + atomic_read(&gms->ms_refcnt), gms->ms_released); + if (atomic_dec_return(&gms->ms_refcnt) == 0) { + if (!gms->ms_released) + mmu_notifier_unregister(&gms->ms_notifier, current->mm); + kfree(gms); + STAT(gms_free); + } +} + +/* + * Setup TGH parameters. There are: + * - 24 TGH handles per GRU chiplet + * - a portion (MAX_LOCAL_TGH) of the handles are reserved for + * use by blade-local cpus + * - the rest are used by off-blade cpus. This usage is + * less frequent than blade-local usage. + * + * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade + * has less tan or equal to 16 cpus, each cpu has a unique handle that it can + * use. + */ +#define MAX_LOCAL_TGH 16 + +void gru_tgh_flush_init(struct gru_state *gru) +{ + int cpus, shift = 0, n; + + cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id); + + /* n = cpus rounded up to next power of 2 */ + if (cpus) { + n = 1 << fls(cpus - 1); + + /* + * shift count for converting local cpu# to TGH index + * 0 if cpus <= MAX_LOCAL_TGH, + * 1 if cpus <= 2*MAX_LOCAL_TGH, + * etc + */ + shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1)); + } + gru->gs_tgh_local_shift = shift; + + /* first starting TGH index to use for remote purges */ + gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift; + +} diff --git a/kernel/drivers/misc/sgi-xp/Makefile b/kernel/drivers/misc/sgi-xp/Makefile new file mode 100644 index 000000000..4fc40d8e1 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for SGI's XP devices. +# + +obj-$(CONFIG_SGI_XP) += xp.o +xp-y := xp_main.o +xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o +xp-$(CONFIG_X86_64) += xp_uv.o + +obj-$(CONFIG_SGI_XP) += xpc.o +xpc-y := xpc_main.o xpc_channel.o xpc_partition.o +xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o +xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o +xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o +xpc-$(CONFIG_X86_64) += xpc_uv.o + +obj-$(CONFIG_SGI_XP) += xpnet.o diff --git a/kernel/drivers/misc/sgi-xp/xp.h b/kernel/drivers/misc/sgi-xp/xp.h new file mode 100644 index 000000000..c862cd458 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xp.h @@ -0,0 +1,358 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * External Cross Partition (XP) structures and defines. + */ + +#ifndef _DRIVERS_MISC_SGIXP_XP_H +#define _DRIVERS_MISC_SGIXP_XP_H + +#include <linux/mutex.h> + +#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV +#include <asm/uv/uv.h> +#define is_uv() is_uv_system() +#endif + +#ifndef is_uv +#define is_uv() 0 +#endif + +#if defined CONFIG_IA64 +#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ +#define is_shub() ia64_platform_is("sn2") +#endif + +#ifndef is_shub1 +#define is_shub1() 0 +#endif + +#ifndef is_shub2 +#define is_shub2() 0 +#endif + +#ifndef is_shub +#define is_shub() 0 +#endif + +#ifdef USE_DBUG_ON +#define DBUG_ON(condition) BUG_ON(condition) +#else +#define DBUG_ON(condition) +#endif + +/* + * Define the maximum number of partitions the system can possibly support. + * It is based on the maximum number of hardware partitionable regions. The + * term 'region' in this context refers to the minimum number of nodes that + * can comprise an access protection grouping. The access protection is in + * regards to memory, IPI and IOI. + * + * The maximum number of hardware partitionable regions is equal to the + * maximum number of nodes in the entire system divided by the minimum number + * of nodes that comprise an access protection grouping. + */ +#define XP_MAX_NPARTITIONS_SN2 64 +#define XP_MAX_NPARTITIONS_UV 256 + +/* + * XPC establishes channel connections between the local partition and any + * other partition that is currently up. Over these channels, kernel-level + * `users' can communicate with their counterparts on the other partitions. + * + * If the need for additional channels arises, one can simply increase + * XPC_MAX_NCHANNELS accordingly. If the day should come where that number + * exceeds the absolute MAXIMUM number of channels possible (eight), then one + * will need to make changes to the XPC code to accommodate for this. + * + * The absolute maximum number of channels possible is limited to eight for + * performance reasons on sn2 hardware. The internal cross partition structures + * require sixteen bytes per channel, and eight allows all of this + * interface-shared info to fit in one 128-byte cacheline. + */ +#define XPC_MEM_CHANNEL 0 /* memory channel number */ +#define XPC_NET_CHANNEL 1 /* network channel number */ + +#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */ + +#if XPC_MAX_NCHANNELS > 8 +#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible. +#endif + +/* + * Define macro, XPC_MSG_SIZE(), is provided for the user + * that wants to fit as many msg entries as possible in a given memory size + * (e.g. a memory page). + */ +#define XPC_MSG_MAX_SIZE 128 +#define XPC_MSG_HDR_MAX_SIZE 16 +#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE) + +#define XPC_MSG_SIZE(_payload_size) \ + ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \ + is_uv() ? 64 : 128) + + +/* + * Define the return values and values passed to user's callout functions. + * (It is important to add new value codes at the end just preceding + * xpUnknownReason, which must have the highest numerical value.) + */ +enum xp_retval { + xpSuccess = 0, + + xpNotConnected, /* 1: channel is not connected */ + xpConnected, /* 2: channel connected (opened) */ + xpRETIRED1, /* 3: (formerly xpDisconnected) */ + + xpMsgReceived, /* 4: message received */ + xpMsgDelivered, /* 5: message delivered and acknowledged */ + + xpRETIRED2, /* 6: (formerly xpTransferFailed) */ + + xpNoWait, /* 7: operation would require wait */ + xpRetry, /* 8: retry operation */ + xpTimeout, /* 9: timeout in xpc_allocate_msg_wait() */ + xpInterrupted, /* 10: interrupted wait */ + + xpUnequalMsgSizes, /* 11: message size disparity between sides */ + xpInvalidAddress, /* 12: invalid address */ + + xpNoMemory, /* 13: no memory available for XPC structures */ + xpLackOfResources, /* 14: insufficient resources for operation */ + xpUnregistered, /* 15: channel is not registered */ + xpAlreadyRegistered, /* 16: channel is already registered */ + + xpPartitionDown, /* 17: remote partition is down */ + xpNotLoaded, /* 18: XPC module is not loaded */ + xpUnloading, /* 19: this side is unloading XPC module */ + + xpBadMagic, /* 20: XPC MAGIC string not found */ + + xpReactivating, /* 21: remote partition was reactivated */ + + xpUnregistering, /* 22: this side is unregistering channel */ + xpOtherUnregistering, /* 23: other side is unregistering channel */ + + xpCloneKThread, /* 24: cloning kernel thread */ + xpCloneKThreadFailed, /* 25: cloning kernel thread failed */ + + xpNoHeartbeat, /* 26: remote partition has no heartbeat */ + + xpPioReadError, /* 27: PIO read error */ + xpPhysAddrRegFailed, /* 28: registration of phys addr range failed */ + + xpRETIRED3, /* 29: (formerly xpBteDirectoryError) */ + xpRETIRED4, /* 30: (formerly xpBtePoisonError) */ + xpRETIRED5, /* 31: (formerly xpBteWriteError) */ + xpRETIRED6, /* 32: (formerly xpBteAccessError) */ + xpRETIRED7, /* 33: (formerly xpBtePWriteError) */ + xpRETIRED8, /* 34: (formerly xpBtePReadError) */ + xpRETIRED9, /* 35: (formerly xpBteTimeOutError) */ + xpRETIRED10, /* 36: (formerly xpBteXtalkError) */ + xpRETIRED11, /* 37: (formerly xpBteNotAvailable) */ + xpRETIRED12, /* 38: (formerly xpBteUnmappedError) */ + + xpBadVersion, /* 39: bad version number */ + xpVarsNotSet, /* 40: the XPC variables are not set up */ + xpNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */ + xpInvalidPartid, /* 42: invalid partition ID */ + xpLocalPartid, /* 43: local partition ID */ + + xpOtherGoingDown, /* 44: other side going down, reason unknown */ + xpSystemGoingDown, /* 45: system is going down, reason unknown */ + xpSystemHalt, /* 46: system is being halted */ + xpSystemReboot, /* 47: system is being rebooted */ + xpSystemPoweroff, /* 48: system is being powered off */ + + xpDisconnecting, /* 49: channel disconnecting (closing) */ + + xpOpenCloseError, /* 50: channel open/close protocol error */ + + xpDisconnected, /* 51: channel disconnected (closed) */ + + xpBteCopyError, /* 52: bte_copy() returned error */ + xpSalError, /* 53: sn SAL error */ + xpRsvdPageNotSet, /* 54: the reserved page is not set up */ + xpPayloadTooBig, /* 55: payload too large for message slot */ + + xpUnsupported, /* 56: unsupported functionality or resource */ + xpNeedMoreInfo, /* 57: more info is needed by SAL */ + + xpGruCopyError, /* 58: gru_copy_gru() returned error */ + xpGruSendMqError, /* 59: gru send message queue related error */ + + xpBadChannelNumber, /* 60: invalid channel number */ + xpBadMsgType, /* 61: invalid message type */ + xpBiosError, /* 62: BIOS error */ + + xpUnknownReason /* 63: unknown reason - must be last in enum */ +}; + +/* + * Define the callout function type used by XPC to update the user on + * connection activity and state changes via the user function registered + * by xpc_connect(). + * + * Arguments: + * + * reason - reason code. + * partid - partition ID associated with condition. + * ch_number - channel # associated with condition. + * data - pointer to optional data. + * key - pointer to optional user-defined value provided as the "key" + * argument to xpc_connect(). + * + * A reason code of xpConnected indicates that a connection has been + * established to the specified partition on the specified channel. The data + * argument indicates the max number of entries allowed in the message queue. + * + * A reason code of xpMsgReceived indicates that a XPC message arrived from + * the specified partition on the specified channel. The data argument + * specifies the address of the message's payload. The user must call + * xpc_received() when finished with the payload. + * + * All other reason codes indicate failure. The data argmument is NULL. + * When a failure reason code is received, one can assume that the channel + * is not connected. + */ +typedef void (*xpc_channel_func) (enum xp_retval reason, short partid, + int ch_number, void *data, void *key); + +/* + * Define the callout function type used by XPC to notify the user of + * messages received and delivered via the user function registered by + * xpc_send_notify(). + * + * Arguments: + * + * reason - reason code. + * partid - partition ID associated with condition. + * ch_number - channel # associated with condition. + * key - pointer to optional user-defined value provided as the "key" + * argument to xpc_send_notify(). + * + * A reason code of xpMsgDelivered indicates that the message was delivered + * to the intended recipient and that they have acknowledged its receipt by + * calling xpc_received(). + * + * All other reason codes indicate failure. + * + * NOTE: The user defined function must be callable by an interrupt handler + * and thus cannot block. + */ +typedef void (*xpc_notify_func) (enum xp_retval reason, short partid, + int ch_number, void *key); + +/* + * The following is a registration entry. There is a global array of these, + * one per channel. It is used to record the connection registration made + * by the users of XPC. As long as a registration entry exists, for any + * partition that comes up, XPC will attempt to establish a connection on + * that channel. Notification that a connection has been made will occur via + * the xpc_channel_func function. + * + * The 'func' field points to the function to call when aynchronous + * notification is required for such events as: a connection established/lost, + * or an incoming message received, or an error condition encountered. A + * non-NULL 'func' field indicates that there is an active registration for + * the channel. + */ +struct xpc_registration { + struct mutex mutex; + xpc_channel_func func; /* function to call */ + void *key; /* pointer to user's key */ + u16 nentries; /* #of msg entries in local msg queue */ + u16 entry_size; /* message queue's message entry size */ + u32 assigned_limit; /* limit on #of assigned kthreads */ + u32 idle_limit; /* limit on #of idle kthreads */ +} ____cacheline_aligned; + +#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL) + +/* the following are valid xpc_send() or xpc_send_notify() flags */ +#define XPC_WAIT 0 /* wait flag */ +#define XPC_NOWAIT 1 /* no wait flag */ + +struct xpc_interface { + void (*connect) (int); + void (*disconnect) (int); + enum xp_retval (*send) (short, int, u32, void *, u16); + enum xp_retval (*send_notify) (short, int, u32, void *, u16, + xpc_notify_func, void *); + void (*received) (short, int, void *); + enum xp_retval (*partid_to_nasids) (short, void *); +}; + +extern struct xpc_interface xpc_interface; + +extern void xpc_set_interface(void (*)(int), + void (*)(int), + enum xp_retval (*)(short, int, u32, void *, u16), + enum xp_retval (*)(short, int, u32, void *, u16, + xpc_notify_func, void *), + void (*)(short, int, void *), + enum xp_retval (*)(short, void *)); +extern void xpc_clear_interface(void); + +extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16, + u16, u32, u32); +extern void xpc_disconnect(int); + +static inline enum xp_retval +xpc_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) +{ + return xpc_interface.send(partid, ch_number, flags, payload, + payload_size); +} + +static inline enum xp_retval +xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) +{ + return xpc_interface.send_notify(partid, ch_number, flags, payload, + payload_size, func, key); +} + +static inline void +xpc_received(short partid, int ch_number, void *payload) +{ + return xpc_interface.received(partid, ch_number, payload); +} + +static inline enum xp_retval +xpc_partid_to_nasids(short partid, void *nasids) +{ + return xpc_interface.partid_to_nasids(partid, nasids); +} + +extern short xp_max_npartitions; +extern short xp_partition_id; +extern u8 xp_region_size; + +extern unsigned long (*xp_pa) (void *); +extern unsigned long (*xp_socket_pa) (unsigned long); +extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long, + size_t); +extern int (*xp_cpu_to_nasid) (int); +extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long); +extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long); + +extern u64 xp_nofault_PIOR_target; +extern int xp_nofault_PIOR(void *); +extern int xp_error_PIOR(void); + +extern struct device *xp; +extern enum xp_retval xp_init_sn2(void); +extern enum xp_retval xp_init_uv(void); +extern void xp_exit_sn2(void); +extern void xp_exit_uv(void); + +#endif /* _DRIVERS_MISC_SGIXP_XP_H */ diff --git a/kernel/drivers/misc/sgi-xp/xp_main.c b/kernel/drivers/misc/sgi-xp/xp_main.c new file mode 100644 index 000000000..01be66d02 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xp_main.c @@ -0,0 +1,286 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) base. + * + * XP provides a base from which its users can interact + * with XPC, yet not be dependent on XPC. + * + */ + +#include <linux/module.h> +#include <linux/device.h> +#include "xp.h" + +/* define the XP debug device structures to be used with dev_dbg() et al */ + +struct device_driver xp_dbg_name = { + .name = "xp" +}; + +struct device xp_dbg_subname = { + .init_name = "", /* set to "" */ + .driver = &xp_dbg_name +}; + +struct device *xp = &xp_dbg_subname; + +/* max #of partitions possible */ +short xp_max_npartitions; +EXPORT_SYMBOL_GPL(xp_max_npartitions); + +short xp_partition_id; +EXPORT_SYMBOL_GPL(xp_partition_id); + +u8 xp_region_size; +EXPORT_SYMBOL_GPL(xp_region_size); + +unsigned long (*xp_pa) (void *addr); +EXPORT_SYMBOL_GPL(xp_pa); + +unsigned long (*xp_socket_pa) (unsigned long gpa); +EXPORT_SYMBOL_GPL(xp_socket_pa); + +enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa, + const unsigned long src_gpa, size_t len); +EXPORT_SYMBOL_GPL(xp_remote_memcpy); + +int (*xp_cpu_to_nasid) (int cpuid); +EXPORT_SYMBOL_GPL(xp_cpu_to_nasid); + +enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr, + unsigned long size); +EXPORT_SYMBOL_GPL(xp_expand_memprotect); +enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr, + unsigned long size); +EXPORT_SYMBOL_GPL(xp_restrict_memprotect); + +/* + * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level + * users of XPC. + */ +struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS]; +EXPORT_SYMBOL_GPL(xpc_registrations); + +/* + * Initialize the XPC interface to indicate that XPC isn't loaded. + */ +static enum xp_retval +xpc_notloaded(void) +{ + return xpNotLoaded; +} + +struct xpc_interface xpc_interface = { + (void (*)(int))xpc_notloaded, + (void (*)(int))xpc_notloaded, + (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded, + (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func, + void *))xpc_notloaded, + (void (*)(short, int, void *))xpc_notloaded, + (enum xp_retval(*)(short, void *))xpc_notloaded +}; +EXPORT_SYMBOL_GPL(xpc_interface); + +/* + * XPC calls this when it (the XPC module) has been loaded. + */ +void +xpc_set_interface(void (*connect) (int), + void (*disconnect) (int), + enum xp_retval (*send) (short, int, u32, void *, u16), + enum xp_retval (*send_notify) (short, int, u32, void *, u16, + xpc_notify_func, void *), + void (*received) (short, int, void *), + enum xp_retval (*partid_to_nasids) (short, void *)) +{ + xpc_interface.connect = connect; + xpc_interface.disconnect = disconnect; + xpc_interface.send = send; + xpc_interface.send_notify = send_notify; + xpc_interface.received = received; + xpc_interface.partid_to_nasids = partid_to_nasids; +} +EXPORT_SYMBOL_GPL(xpc_set_interface); + +/* + * XPC calls this when it (the XPC module) is being unloaded. + */ +void +xpc_clear_interface(void) +{ + xpc_interface.connect = (void (*)(int))xpc_notloaded; + xpc_interface.disconnect = (void (*)(int))xpc_notloaded; + xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16)) + xpc_notloaded; + xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *, + u16, xpc_notify_func, + void *))xpc_notloaded; + xpc_interface.received = (void (*)(short, int, void *)) + xpc_notloaded; + xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *)) + xpc_notloaded; +} +EXPORT_SYMBOL_GPL(xpc_clear_interface); + +/* + * Register for automatic establishment of a channel connection whenever + * a partition comes up. + * + * Arguments: + * + * ch_number - channel # to register for connection. + * func - function to call for asynchronous notification of channel + * state changes (i.e., connection, disconnection, error) and + * the arrival of incoming messages. + * key - pointer to optional user-defined value that gets passed back + * to the user on any callouts made to func. + * payload_size - size in bytes of the XPC message's payload area which + * contains a user-defined message. The user should make + * this large enough to hold their largest message. + * nentries - max #of XPC message entries a message queue can contain. + * The actual number, which is determined when a connection + * is established and may be less then requested, will be + * passed to the user via the xpConnected callout. + * assigned_limit - max number of kthreads allowed to be processing + * messages (per connection) at any given instant. + * idle_limit - max number of kthreads allowed to be idle at any given + * instant. + */ +enum xp_retval +xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, + u16 nentries, u32 assigned_limit, u32 idle_limit) +{ + struct xpc_registration *registration; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + DBUG_ON(payload_size == 0 || nentries == 0); + DBUG_ON(func == NULL); + DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); + + if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE) + return xpPayloadTooBig; + + registration = &xpc_registrations[ch_number]; + + if (mutex_lock_interruptible(®istration->mutex) != 0) + return xpInterrupted; + + /* if XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func != NULL) { + mutex_unlock(®istration->mutex); + return xpAlreadyRegistered; + } + + /* register the channel for connection */ + registration->entry_size = XPC_MSG_SIZE(payload_size); + registration->nentries = nentries; + registration->assigned_limit = assigned_limit; + registration->idle_limit = idle_limit; + registration->key = key; + registration->func = func; + + mutex_unlock(®istration->mutex); + + xpc_interface.connect(ch_number); + + return xpSuccess; +} +EXPORT_SYMBOL_GPL(xpc_connect); + +/* + * Remove the registration for automatic connection of the specified channel + * when a partition comes up. + * + * Before returning this xpc_disconnect() will wait for all connections on the + * specified channel have been closed/torndown. So the caller can be assured + * that they will not be receiving any more callouts from XPC to their + * function registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_disconnect(int ch_number) +{ + struct xpc_registration *registration; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + registration = &xpc_registrations[ch_number]; + + /* + * We've decided not to make this a down_interruptible(), since we + * figured XPC's users will just turn around and call xpc_disconnect() + * again anyways, so we might as well wait, if need be. + */ + mutex_lock(®istration->mutex); + + /* if !XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func == NULL) { + mutex_unlock(®istration->mutex); + return; + } + + /* remove the connection registration for the specified channel */ + registration->func = NULL; + registration->key = NULL; + registration->nentries = 0; + registration->entry_size = 0; + registration->assigned_limit = 0; + registration->idle_limit = 0; + + xpc_interface.disconnect(ch_number); + + mutex_unlock(®istration->mutex); + + return; +} +EXPORT_SYMBOL_GPL(xpc_disconnect); + +int __init +xp_init(void) +{ + enum xp_retval ret; + int ch_number; + + /* initialize the connection registration mutex */ + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) + mutex_init(&xpc_registrations[ch_number].mutex); + + if (is_shub()) + ret = xp_init_sn2(); + else if (is_uv()) + ret = xp_init_uv(); + else + ret = 0; + + if (ret != xpSuccess) + return ret; + + return 0; +} + +module_init(xp_init); + +void __exit +xp_exit(void) +{ + if (is_shub()) + xp_exit_sn2(); + else if (is_uv()) + xp_exit_uv(); +} + +module_exit(xp_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition (XP) base"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/sgi-xp/xp_nofault.S b/kernel/drivers/misc/sgi-xp/xp_nofault.S new file mode 100644 index 000000000..e38d43319 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xp_nofault.S @@ -0,0 +1,35 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * The xp_nofault_PIOR function takes a pointer to a remote PIO register + * and attempts to load and consume a value from it. This function + * will be registered as a nofault code block. In the event that the + * PIO read fails, the MCA handler will force the error to look + * corrected and vector to the xp_error_PIOR which will return an error. + * + * The definition of "consumption" and the time it takes for an MCA + * to surface is processor implementation specific. This code + * is sufficient on Itanium through the Montvale processor family. + * It may need to be adjusted for future processor implementations. + * + * extern int xp_nofault_PIOR(void *remote_register); + */ + + .global xp_nofault_PIOR +xp_nofault_PIOR: + mov r8=r0 // Stage a success return value + ld8.acq r9=[r32];; // PIO Read the specified register + adds r9=1,r9;; // Add to force consumption + srlz.i;; // Allow time for MCA to surface + br.ret.sptk.many b0;; // Return success + + .global xp_error_PIOR +xp_error_PIOR: + mov r8=1 // Return value of 1 + br.ret.sptk.many b0;; // Return failure diff --git a/kernel/drivers/misc/sgi-xp/xp_sn2.c b/kernel/drivers/misc/sgi-xp/xp_sn2.c new file mode 100644 index 000000000..d8e463f87 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xp_sn2.c @@ -0,0 +1,190 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) sn2-based functions. + * + * Architecture specific implementation of common functions. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <asm/sn/bte.h> +#include <asm/sn/sn_sal.h> +#include "xp.h" + +/* + * The export of xp_nofault_PIOR needs to happen here since it is defined + * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is + * defined here. + */ +EXPORT_SYMBOL_GPL(xp_nofault_PIOR); + +u64 xp_nofault_PIOR_target; +EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); + +/* + * Register a nofault code region which performs a cross-partition PIO read. + * If the PIO read times out, the MCA handler will consume the error and + * return to a kernel-provided instruction to indicate an error. This PIO read + * exists because it is guaranteed to timeout if the destination is down + * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2, + * which unfortunately we have to work around). + */ +static enum xp_retval +xp_register_nofault_code_sn2(void) +{ + int ret; + u64 func_addr; + u64 err_func_addr; + + func_addr = *(u64 *)xp_nofault_PIOR; + err_func_addr = *(u64 *)xp_error_PIOR; + ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr, + 1, 1); + if (ret != 0) { + dev_err(xp, "can't register nofault code, error=%d\n", ret); + return xpSalError; + } + /* + * Setup the nofault PIO read target. (There is no special reason why + * SH_IPI_ACCESS was selected.) + */ + if (is_shub1()) + xp_nofault_PIOR_target = SH1_IPI_ACCESS; + else if (is_shub2()) + xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + + return xpSuccess; +} + +static void +xp_unregister_nofault_code_sn2(void) +{ + u64 func_addr = *(u64 *)xp_nofault_PIOR; + u64 err_func_addr = *(u64 *)xp_error_PIOR; + + /* unregister the PIO read nofault code region */ + (void)sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 0); +} + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_sn2(void *addr) +{ + return __pa(addr); +} + +/* + * Convert a global physical to a socket physical address. + */ +static unsigned long +xp_socket_pa_sn2(unsigned long gpa) +{ + return gpa; +} + +/* + * Wrapper for bte_copy(). + * + * dst_pa - physical address of the destination of the transfer. + * src_pa - physical address of the source of the transfer. + * len - number of bytes to transfer from source to destination. + * + * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock. + */ +static enum xp_retval +xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa, + size_t len) +{ + bte_result_t ret; + + ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (ret == BTE_SUCCESS) + return xpSuccess; + + if (is_shub2()) { + dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa=" + "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa, + src_pa, len); + } else { + dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx " + "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); + } + + return xpBteCopyError; +} + +static int +xp_cpu_to_nasid_sn2(int cpuid) +{ + return cpuid_to_nasid(cpuid); +} + +static enum xp_retval +xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size) +{ + u64 nasid_array = 0; + int ret; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } + return xpSuccess; +} + +static enum xp_retval +xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size) +{ + u64 nasid_array = 0; + int ret; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } + return xpSuccess; +} + +enum xp_retval +xp_init_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_SN2; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_sn2; + xp_socket_pa = xp_socket_pa_sn2; + xp_remote_memcpy = xp_remote_memcpy_sn2; + xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; + xp_expand_memprotect = xp_expand_memprotect_sn2; + xp_restrict_memprotect = xp_restrict_memprotect_sn2; + + return xp_register_nofault_code_sn2(); +} + +void +xp_exit_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_unregister_nofault_code_sn2(); +} + diff --git a/kernel/drivers/misc/sgi-xp/xp_uv.c b/kernel/drivers/misc/sgi-xp/xp_uv.c new file mode 100644 index 000000000..a0d093274 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xp_uv.c @@ -0,0 +1,171 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/device.h> +#include <asm/uv/uv_hub.h> +#if defined CONFIG_X86_64 +#include <asm/uv/bios.h> +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#include <asm/sn/sn_sal.h> +#endif +#include "../sgi-gru/grukservices.h" +#include "xp.h" + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_uv(void *addr) +{ + return uv_gpa(addr); +} + +/* + * Convert a global physical to socket physical address. + */ +static unsigned long +xp_socket_pa_uv(unsigned long gpa) +{ + return uv_gpa_to_soc_phys_ram(gpa); +} + +static enum xp_retval +xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa)); + + BUG_ON(!uv_gpa_in_mmr_space(src_gpa)); + BUG_ON(len != 8); + + ret = gru_read_gpa(dst_va, src_gpa); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + + +static enum xp_retval +xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + + if (uv_gpa_in_mmr_space(src_gpa)) + return xp_remote_mmr_read(dst_gpa, src_gpa, len); + + ret = gru_copy_gpa(dst_gpa, src_gpa, len); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + +static int +xp_cpu_to_nasid_uv(int cpuid) +{ + /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */ + return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid)); +} + +static enum xp_retval +xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) +{ + int ret; + +#if defined CONFIG_X86_64 + ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW); + if (ret != BIOS_STATUS_SUCCESS) { + dev_err(xp, "uv_bios_change_memprotect(,, " + "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret); + return xpBiosError; + } + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } +#else + #error not a supported configuration +#endif + return xpSuccess; +} + +static enum xp_retval +xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) +{ + int ret; + +#if defined CONFIG_X86_64 + ret = uv_bios_change_memprotect(phys_addr, size, + UV_MEMPROT_RESTRICT_ACCESS); + if (ret != BIOS_STATUS_SUCCESS) { + dev_err(xp, "uv_bios_change_memprotect(,, " + "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret); + return xpBiosError; + } + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } +#else + #error not a supported configuration +#endif + return xpSuccess; +} + +enum xp_retval +xp_init_uv(void) +{ + BUG_ON(!is_uv()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_UV; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_uv; + xp_socket_pa = xp_socket_pa_uv; + xp_remote_memcpy = xp_remote_memcpy_uv; + xp_cpu_to_nasid = xp_cpu_to_nasid_uv; + xp_expand_memprotect = xp_expand_memprotect_uv; + xp_restrict_memprotect = xp_restrict_memprotect_uv; + + return xpSuccess; +} + +void +xp_exit_uv(void) +{ + BUG_ON(!is_uv()); +} diff --git a/kernel/drivers/misc/sgi-xp/xpc.h b/kernel/drivers/misc/sgi-xp/xpc.h new file mode 100644 index 000000000..b94d5f767 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpc.h @@ -0,0 +1,1004 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) structures and macros. + */ + +#ifndef _DRIVERS_MISC_SGIXP_XPC_H +#define _DRIVERS_MISC_SGIXP_XPC_H + +#include <linux/wait.h> +#include <linux/completion.h> +#include <linux/timer.h> +#include <linux/sched.h> +#include "xp.h" + +/* + * XPC Version numbers consist of a major and minor number. XPC can always + * talk to versions with same major #, and never talk to versions with a + * different major #. + */ +#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf)) +#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) + +/* define frequency of the heartbeat and frequency how often it's checked */ +#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ +#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ + +/* define the process name of HB checker and the CPU it is pinned to */ +#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" +#define XPC_HB_CHECK_CPU 0 + +/* define the process name of the discovery thread */ +#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" + +/* + * the reserved page + * + * SAL reserves one page of memory per partition for XPC. Though a full page + * in length (16384 bytes), its starting address is not page aligned, but it + * is cacheline aligned. The reserved page consists of the following: + * + * reserved page header + * + * The first two 64-byte cachelines of the reserved page contain the + * header (struct xpc_rsvd_page). Before SAL initialization has completed, + * SAL has set up the following fields of the reserved page header: + * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The + * other fields are set up by XPC. (xpc_rsvd_page points to the local + * partition's reserved page.) + * + * part_nasids mask + * mach_nasids mask + * + * SAL also sets up two bitmaps (or masks), one that reflects the actual + * nasids in this partition (part_nasids), and the other that reflects + * the actual nasids in the entire machine (mach_nasids). We're only + * interested in the even numbered nasids (which contain the processors + * and/or memory), so we only need half as many bits to represent the + * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure + * to either divide or multiply by 2. The part_nasids mask is located + * starting at the first cacheline following the reserved page header. The + * mach_nasids mask follows right after the part_nasids mask. The size in + * bytes of each mask is reflected by the reserved page header field + * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids + * and xpc_mach_nasids.) + * + * vars (ia64-sn2 only) + * vars part (ia64-sn2 only) + * + * Immediately following the mach_nasids mask are the XPC variables + * required by other partitions. First are those that are generic to all + * partitions (vars), followed on the next available cacheline by those + * which are partition specific (vars part). These are setup by XPC. + * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) + * + * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been + * initialized. + */ +struct xpc_rsvd_page { + u64 SAL_signature; /* SAL: unique signature */ + u64 SAL_version; /* SAL: version */ + short SAL_partid; /* SAL: partition ID */ + short max_npartitions; /* value of XPC_MAX_PARTITIONS */ + u8 version; + u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ + unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ + union { + struct { + unsigned long vars_pa; /* phys addr */ + } sn2; + struct { + unsigned long heartbeat_gpa; /* phys addr */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr */ + } uv; + } sn; + u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */ + u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ +}; + +#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */ + +/* + * Define the structures by which XPC variables can be exported to other + * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) + */ + +/* + * The following structure describes the partition generic variables + * needed by other partitions in order to properly initialize. + * + * struct xpc_vars version number also applies to struct xpc_vars_part. + * Changes to either structure and/or related functionality should be + * reflected by incrementing either the major or minor version numbers + * of struct xpc_vars. + */ +struct xpc_vars_sn2 { + u8 version; + u64 heartbeat; + DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + u64 heartbeat_offline; /* if 0, heartbeat should be changing */ + int activate_IRQ_nasid; + int activate_IRQ_phys_cpuid; + unsigned long vars_part_pa; + unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */ + struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */ +}; + +#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */ + +/* + * The following structure describes the per partition specific variables. + * + * An array of these structures, one per partition, will be defined. As a + * partition becomes active XPC will copy the array entry corresponding to + * itself from that partition. It is desirable that the size of this structure + * evenly divides into a 128-byte cacheline, such that none of the entries in + * this array crosses a 128-byte cacheline boundary. As it is now, each entry + * occupies 64-bytes. + */ +struct xpc_vars_part_sn2 { + u64 magic; + + unsigned long openclose_args_pa; /* phys addr of open and close args */ + unsigned long GPs_pa; /* physical address of Get/Put values */ + + unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + + u8 nchannels; /* #of defined channels supported */ + + u8 reserved[23]; /* pad to a full 64 bytes */ +}; + +/* + * The vars_part MAGIC numbers play a part in the first contact protocol. + * + * MAGIC1 indicates that the per partition specific variables for a remote + * partition have been initialized by this partition. + * + * MAGIC2 indicates that this partition has pulled the remote partititions + * per partition variables that pertain to this partition. + */ +#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ +#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ + +/* the reserved page sizes and offsets */ + +#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) +#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2)) + +#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \ + XPC_RP_HEADER_SIZE)) +#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs) +#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \ + (XPC_RP_MACH_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs)) + + +/* + * The following structure describes the partition's heartbeat info which + * will be periodically read by other partitions to determine whether this + * XPC is still 'alive'. + */ +struct xpc_heartbeat_uv { + unsigned long value; + unsigned long offline; /* if 0, heartbeat should be changing */ +}; + +/* + * Info pertinent to a GRU message queue using a watch list for irq generation. + */ +struct xpc_gru_mq_uv { + void *address; /* address of GRU message queue */ + unsigned int order; /* size of GRU message queue as a power of 2 */ + int irq; /* irq raised when message is received in mq */ + int mmr_blade; /* blade where watchlist was allocated from */ + unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */ + unsigned long mmr_value; /* value of irq mmr located on mmr_blade */ + int watchlist_num; /* number of watchlist allocatd by BIOS */ + void *gru_mq_desc; /* opaque structure used by the GRU driver */ +}; + +/* + * The activate_mq is used to send/receive GRU messages that affect XPC's + * partition active state and channel state. This is uv only. + */ +struct xpc_activate_mq_msghdr_uv { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + short partid; /* sender's partid */ + u8 act_state; /* sender's act_state at time msg sent */ + u8 type; /* message's type */ + unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */ +}; + +/* activate_mq defined message types */ +#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 + +#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1 +#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2 + +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV 7 + +#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 8 +#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 9 + +struct xpc_activate_mq_msg_uv { + struct xpc_activate_mq_msghdr_uv hdr; +}; + +struct xpc_activate_mq_msg_activate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + unsigned long rp_gpa; + unsigned long heartbeat_gpa; + unsigned long activate_gru_mq_desc_gpa; +}; + +struct xpc_activate_mq_msg_deactivate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closerequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closereply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +struct xpc_activate_mq_msg_chctl_openrequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short entry_size; /* size of notify_mq's GRU messages */ + short local_nentries; /* ??? Is this needed? What is? */ +}; + +struct xpc_activate_mq_msg_chctl_openreply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short remote_nentries; /* ??? Is this needed? What is? */ + short local_nentries; /* ??? Is this needed? What is? */ + unsigned long notify_gru_mq_desc_gpa; +}; + +struct xpc_activate_mq_msg_chctl_opencomplete_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +/* + * Functions registered by add_timer() or called by kernel_thread() only + * allow for a single 64-bit argument. The following macros can be used to + * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from + * the passed argument. + */ +#define XPC_PACK_ARGS(_arg1, _arg2) \ + ((((u64)_arg1) & 0xffffffff) | \ + ((((u64)_arg2) & 0xffffffff) << 32)) + +#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff) +#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff) + +/* + * Define a Get/Put value pair (pointers) used with a message queue. + */ +struct xpc_gp_sn2 { + s64 get; /* Get value */ + s64 put; /* Put value */ +}; + +#define XPC_GP_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS) + +/* + * Define a structure that contains arguments associated with opening and + * closing a channel. + */ +struct xpc_openclose_args { + u16 reason; /* reason why channel is closing */ + u16 entry_size; /* sizeof each message entry */ + u16 remote_nentries; /* #of message entries in remote msg queue */ + u16 local_nentries; /* #of message entries in local msg queue */ + unsigned long local_msgqueue_pa; /* phys addr of local message queue */ +}; + +#define XPC_OPENCLOSE_ARGS_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \ + XPC_MAX_NCHANNELS) + + +/* + * Structures to define a fifo singly-linked list. + */ + +struct xpc_fifo_entry_uv { + struct xpc_fifo_entry_uv *next; +}; + +struct xpc_fifo_head_uv { + struct xpc_fifo_entry_uv *first; + struct xpc_fifo_entry_uv *last; + spinlock_t lock; + int n_entries; +}; + +/* + * Define a sn2 styled message. + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message entry (within a message queue) must be a 128-byte + * cacheline sized multiple in order to facilitate the BTE transfer of messages + * from one message queue to another. + */ +struct xpc_msg_sn2 { + u8 flags; /* FOR XPC INTERNAL USE ONLY */ + u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ + s64 number; /* FOR XPC INTERNAL USE ONLY */ + + u64 payload; /* user defined portion of message */ +}; + +/* struct xpc_msg_sn2 flags */ + +#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */ +#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */ +#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */ + +/* + * The format of a uv XPC notify_mq GRU message is as follows: + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message (payload and header) sent via the GRU must be either 1 + * or 2 GRU_CACHE_LINE_BYTES in length. + */ + +struct xpc_notify_mq_msghdr_uv { + union { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */ + } u; + short partid; /* FOR XPC INTERNAL USE ONLY */ + u8 ch_number; /* FOR XPC INTERNAL USE ONLY */ + u8 size; /* FOR XPC INTERNAL USE ONLY */ + unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */ +}; + +struct xpc_notify_mq_msg_uv { + struct xpc_notify_mq_msghdr_uv hdr; + unsigned long payload; +}; + +/* + * Define sn2's notify entry. + * + * This is used to notify a message's sender that their message was received + * and consumed by the intended recipient. + */ +struct xpc_notify_sn2 { + u8 type; /* type of notification */ + + /* the following two fields are only used if type == XPC_N_CALL */ + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* struct xpc_notify_sn2 type of notification */ + +#define XPC_N_CALL 0x01 /* notify function provided by user */ + +/* + * Define uv's version of the notify entry. It additionally is used to allocate + * a msg slot on the remote partition into which is copied a sent message. + */ +struct xpc_send_msg_slot_uv { + struct xpc_fifo_entry_uv next; + unsigned int msg_slot_number; + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* + * Define the structure that manages all the stuff required by a channel. In + * particular, they are used to manage the messages sent across the channel. + * + * This structure is private to a partition, and is NOT shared across the + * partition boundary. + * + * There is an array of these structures for each remote partition. It is + * allocated at the time a partition becomes active. The array contains one + * of these structures for each potential channel connection to that partition. + */ + +/* + * The following is sn2 only. + * + * Each channel structure manages two message queues (circular buffers). + * They are allocated at the time a channel connection is made. One of + * these message queues (local_msgqueue) holds the locally created messages + * that are destined for the remote partition. The other of these message + * queues (remote_msgqueue) is a locally cached copy of the remote partition's + * own local_msgqueue. + * + * The following is a description of the Get/Put pointers used to manage these + * two message queues. Consider the local_msgqueue to be on one partition + * and the remote_msgqueue to be its cached copy on another partition. A + * description of what each of the lettered areas contains is included. + * + * + * local_msgqueue remote_msgqueue + * + * |/////////| |/////////| + * w_remote_GP.get --> +---------+ |/////////| + * | F | |/////////| + * remote_GP.get --> +---------+ +---------+ <-- local_GP->get + * | | | | + * | | | E | + * | | | | + * | | +---------+ <-- w_local_GP.get + * | B | |/////////| + * | | |////D////| + * | | |/////////| + * | | +---------+ <-- w_remote_GP.put + * | | |////C////| + * local_GP->put --> +---------+ +---------+ <-- remote_GP.put + * | | |/////////| + * | A | |/////////| + * | | |/////////| + * w_local_GP.put --> +---------+ |/////////| + * |/////////| |/////////| + * + * + * ( remote_GP.[get|put] are cached copies of the remote + * partition's local_GP->[get|put], and thus their values can + * lag behind their counterparts on the remote partition. ) + * + * + * A - Messages that have been allocated, but have not yet been sent to the + * remote partition. + * + * B - Messages that have been sent, but have not yet been acknowledged by the + * remote partition as having been received. + * + * C - Area that needs to be prepared for the copying of sent messages, by + * the clearing of the message flags of any previously received messages. + * + * D - Area into which sent messages are to be copied from the remote + * partition's local_msgqueue and then delivered to their intended + * recipients. [ To allow for a multi-message copy, another pointer + * (next_msg_to_pull) has been added to keep track of the next message + * number needing to be copied (pulled). It chases after w_remote_GP.put. + * Any messages lying between w_local_GP.get and next_msg_to_pull have + * been copied and are ready to be delivered. ] + * + * E - Messages that have been copied and delivered, but have not yet been + * acknowledged by the recipient as having been received. + * + * F - Messages that have been acknowledged, but XPC has not yet notified the + * sender that the message was received by its intended recipient. + * This is also an area that needs to be prepared for the allocating of + * new messages, by the clearing of the message flags of the acknowledged + * messages. + */ + +struct xpc_channel_sn2 { + struct xpc_openclose_args *local_openclose_args; /* args passed on */ + /* opening or closing of channel */ + + void *local_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *local_msgqueue; /* local message queue */ + void *remote_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */ + /* partition's local message queue */ + unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */ + /* local message queue */ + + struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */ + + /* various flavors of local and remote Get/Put values */ + + struct xpc_gp_sn2 *local_GP; /* local Get/Put values */ + struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */ + struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */ + struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */ + s64 next_msg_to_pull; /* Put value of next msg to pull */ + + struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ +}; + +struct xpc_channel_uv { + void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */ + /* gru mq descriptor */ + + struct xpc_send_msg_slot_uv *send_msg_slots; + void *recv_msg_slots; /* each slot will hold a xpc_notify_mq_msg_uv */ + /* structure plus the user's payload */ + + struct xpc_fifo_head_uv msg_slot_free_list; + struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */ +}; + +struct xpc_channel { + short partid; /* ID of remote partition connected */ + spinlock_t lock; /* lock for updating this structure */ + unsigned int flags; /* general flags */ + + enum xp_retval reason; /* reason why channel is disconnect'g */ + int reason_line; /* line# disconnect initiated from */ + + u16 number; /* channel # */ + + u16 entry_size; /* sizeof each msg entry */ + u16 local_nentries; /* #of msg entries in local msg queue */ + u16 remote_nentries; /* #of msg entries in remote msg queue */ + + atomic_t references; /* #of external references to queues */ + + atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ + wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + + u8 delayed_chctl_flags; /* chctl flags received, but delayed */ + /* action until channel disconnected */ + + atomic_t n_to_notify; /* #of msg senders to notify */ + + xpc_channel_func func; /* user's channel function */ + void *key; /* pointer to user's key */ + + struct completion wdisconnect_wait; /* wait for channel disconnect */ + + /* kthread management related fields */ + + atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ + u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */ + atomic_t kthreads_idle; /* #of kthreads idle waiting for work */ + u32 kthreads_idle_limit; /* limit on #of kthreads idle */ + atomic_t kthreads_active; /* #of kthreads actively working */ + + wait_queue_head_t idle_wq; /* idle kthread wait queue */ + + union { + struct xpc_channel_sn2 sn2; + struct xpc_channel_uv uv; + } sn; + +} ____cacheline_aligned; + +/* struct xpc_channel flags */ + +#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ + +#define XPC_C_ROPENCOMPLETE 0x00000002 /* remote open channel complete */ +#define XPC_C_OPENCOMPLETE 0x00000004 /* local open channel complete */ +#define XPC_C_ROPENREPLY 0x00000008 /* remote open channel reply */ +#define XPC_C_OPENREPLY 0x00000010 /* local open channel reply */ +#define XPC_C_ROPENREQUEST 0x00000020 /* remote open channel request */ +#define XPC_C_OPENREQUEST 0x00000040 /* local open channel request */ + +#define XPC_C_SETUP 0x00000080 /* channel's msgqueues are alloc'd */ +#define XPC_C_CONNECTEDCALLOUT 0x00000100 /* connected callout initiated */ +#define XPC_C_CONNECTEDCALLOUT_MADE \ + 0x00000200 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000400 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000800 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00001000 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00002000 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00004000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00008000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00010000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00020000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTINGCALLOUT \ + 0x00040000 /* disconnecting callout initiated */ +#define XPC_C_DISCONNECTINGCALLOUT_MADE \ + 0x00080000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00100000 /* waiting for channel disconnect */ + +/* + * The channel control flags (chctl) union consists of a 64-bit variable which + * is divided up into eight bytes, ordered from right to left. Byte zero + * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte + * can have one or more of the chctl flags set in it. + */ + +union xpc_channel_ctl_flags { + u64 all_flags; + u8 flags[XPC_MAX_NCHANNELS]; +}; + +/* chctl flags */ +#define XPC_CHCTL_CLOSEREQUEST 0x01 +#define XPC_CHCTL_CLOSEREPLY 0x02 +#define XPC_CHCTL_OPENREQUEST 0x04 +#define XPC_CHCTL_OPENREPLY 0x08 +#define XPC_CHCTL_OPENCOMPLETE 0x10 +#define XPC_CHCTL_MSGREQUEST 0x20 + +#define XPC_OPENCLOSE_CHCTL_FLAGS \ + (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \ + XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \ + XPC_CHCTL_OPENCOMPLETE) +#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST + +static inline int +xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) + return 1; + } + return 0; +} + +static inline int +xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + return 1; + } + return 0; +} + +/* + * Manage channels on a partition basis. There is one of these structures + * for each partition (a partition will never utilize the structure that + * represents itself). + */ + +struct xpc_partition_sn2 { + unsigned long remote_amos_page_pa; /* paddr of partition's amos page */ + int activate_IRQ_nasid; /* active partition's act/deact nasid */ + int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */ + + unsigned long remote_vars_pa; /* phys addr of partition's vars */ + unsigned long remote_vars_part_pa; /* paddr of partition's vars part */ + u8 remote_vars_version; /* version# of partition's vars */ + + void *local_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */ + void *remote_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */ + /* Get/Put values */ + unsigned long remote_GPs_pa; /* phys addr of remote partition's local */ + /* Get/Put values */ + + void *local_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *local_openclose_args; /* local's args */ + unsigned long remote_openclose_args_pa; /* phys addr of remote's args */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + char notify_IRQ_owner[8]; /* notify IRQ's owner's name */ + + struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */ + struct amo *local_chctl_amo_va; /* address of chctl flags' amo */ + + struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */ +}; + +struct xpc_partition_uv { + unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */ + struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */ + /* partition's heartbeat */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ + /* activate mq's gru mq */ + /* descriptor */ + void *cached_activate_gru_mq_desc; /* cached copy of partition's */ + /* activate mq's gru mq descriptor */ + struct mutex cached_activate_gru_mq_desc_mutex; + spinlock_t flags_lock; /* protect updating of flags */ + unsigned int flags; /* general flags */ + u8 remote_act_state; /* remote partition's act_state */ + u8 act_state_req; /* act_state request from remote partition */ + enum xp_retval reason; /* reason for deactivate act_state request */ +}; + +/* struct xpc_partition_uv flags */ + +#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001 +#define XPC_P_ENGAGED_UV 0x00000002 + +/* struct xpc_partition_uv act_state change requests */ + +#define XPC_P_ASR_ACTIVATE_UV 0x01 +#define XPC_P_ASR_REACTIVATE_UV 0x02 +#define XPC_P_ASR_DEACTIVATE_UV 0x03 + +struct xpc_partition { + + /* XPC HB infrastructure */ + + u8 remote_rp_version; /* version# of partition's rsvd pg */ + unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */ + unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */ + u64 last_heartbeat; /* HB at last read */ + u32 activate_IRQ_rcvd; /* IRQs since activation */ + spinlock_t act_lock; /* protect updating of act_state */ + u8 act_state; /* from XPC HB viewpoint */ + enum xp_retval reason; /* reason partition is deactivating */ + int reason_line; /* line# deactivation initiated from */ + + unsigned long disengage_timeout; /* timeout in jiffies */ + struct timer_list disengage_timer; + + /* XPC infrastructure referencing and teardown control */ + + u8 setup_state; /* infrastructure setup state */ + wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ + atomic_t references; /* #of references to infrastructure */ + + u8 nchannels; /* #of defined channels supported */ + atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ + atomic_t nchannels_engaged; /* #of channels engaged with remote part */ + struct xpc_channel *channels; /* array of channel structures */ + + /* fields used for managing channel avialability and activity */ + + union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */ + spinlock_t chctl_lock; /* chctl flags lock */ + + void *remote_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ + /* args */ + + /* channel manager related fields */ + + atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ + wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ + + union { + struct xpc_partition_sn2 sn2; + struct xpc_partition_uv uv; + } sn; + +} ____cacheline_aligned; + +struct xpc_arch_operations { + int (*setup_partitions) (void); + void (*teardown_partitions) (void); + void (*process_activate_IRQ_rcvd) (void); + enum xp_retval (*get_partition_rsvd_page_pa) + (void *, u64 *, unsigned long *, size_t *); + int (*setup_rsvd_page) (struct xpc_rsvd_page *); + + void (*allow_hb) (short); + void (*disallow_hb) (short); + void (*disallow_all_hbs) (void); + void (*increment_heartbeat) (void); + void (*offline_heartbeat) (void); + void (*online_heartbeat) (void); + void (*heartbeat_init) (void); + void (*heartbeat_exit) (void); + enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *); + + void (*request_partition_activation) (struct xpc_rsvd_page *, + unsigned long, int); + void (*request_partition_reactivation) (struct xpc_partition *); + void (*request_partition_deactivation) (struct xpc_partition *); + void (*cancel_partition_deactivation_request) (struct xpc_partition *); + enum xp_retval (*setup_ch_structures) (struct xpc_partition *); + void (*teardown_ch_structures) (struct xpc_partition *); + + enum xp_retval (*make_first_contact) (struct xpc_partition *); + + u64 (*get_chctl_all_flags) (struct xpc_partition *); + void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *); + void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *); + void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *); + void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *); + void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *); + void (*process_msg_chctl_flags) (struct xpc_partition *, int); + + enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *, + unsigned long); + + enum xp_retval (*setup_msg_structures) (struct xpc_channel *); + void (*teardown_msg_structures) (struct xpc_channel *); + + void (*indicate_partition_engaged) (struct xpc_partition *); + void (*indicate_partition_disengaged) (struct xpc_partition *); + void (*assume_partition_disengaged) (short); + int (*partition_engaged) (short); + int (*any_partition_engaged) (void); + + int (*n_of_deliverable_payloads) (struct xpc_channel *); + enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *, + u16, u8, xpc_notify_func, void *); + void *(*get_deliverable_payload) (struct xpc_channel *); + void (*received_payload) (struct xpc_channel *, void *); + void (*notify_senders_of_disconnect) (struct xpc_channel *); +}; + +/* struct xpc_partition act_state values (for XPC HB) */ + +#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */ +#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */ +#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */ +#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */ +#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */ + +#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ + xpc_deactivate_partition(__LINE__, (_p), (_reason)) + +/* struct xpc_partition setup_state values */ + +#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */ +#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */ +#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ +#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */ + +/* + * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the + * following interval #of seconds before checking for dropped notify IRQs. + * These can occur whenever an IRQ's associated amo write doesn't complete + * until after the IRQ was received. + */ +#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ) + +/* number of seconds to wait for other partitions to disengage */ +#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90 + +/* interval in seconds to print 'waiting deactivation' messages */ +#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10 + +#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0])) + +/* found in xp_main.c */ +extern struct xpc_registration xpc_registrations[]; + +/* found in xpc_main.c */ +extern struct device *xpc_part; +extern struct device *xpc_chan; +extern struct xpc_arch_operations xpc_arch_ops; +extern int xpc_disengage_timelimit; +extern int xpc_disengage_timedout; +extern int xpc_activate_IRQ_rcvd; +extern spinlock_t xpc_activate_IRQ_rcvd_lock; +extern wait_queue_head_t xpc_activate_IRQ_wq; +extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); +extern void xpc_activate_partition(struct xpc_partition *); +extern void xpc_activate_kthreads(struct xpc_channel *, int); +extern void xpc_create_kthreads(struct xpc_channel *, int, int); +extern void xpc_disconnect_wait(int); + +/* found in xpc_sn2.c */ +extern int xpc_init_sn2(void); +extern void xpc_exit_sn2(void); + +/* found in xpc_uv.c */ +extern int xpc_init_uv(void); +extern void xpc_exit_uv(void); + +/* found in xpc_partition.c */ +extern int xpc_exiting; +extern int xpc_nasid_mask_nlongs; +extern struct xpc_rsvd_page *xpc_rsvd_page; +extern unsigned long *xpc_mach_nasids; +extern struct xpc_partition *xpc_partitions; +extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **); +extern int xpc_setup_rsvd_page(void); +extern void xpc_teardown_rsvd_page(void); +extern int xpc_identify_activate_IRQ_sender(void); +extern int xpc_partition_disengaged(struct xpc_partition *); +extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *); +extern void xpc_mark_partition_inactive(struct xpc_partition *); +extern void xpc_discovery(void); +extern enum xp_retval xpc_get_remote_rp(int, unsigned long *, + struct xpc_rsvd_page *, + unsigned long *); +extern void xpc_deactivate_partition(const int, struct xpc_partition *, + enum xp_retval); +extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *); + +/* found in xpc_channel.c */ +extern void xpc_initiate_connect(int); +extern void xpc_initiate_disconnect(int); +extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *); +extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16); +extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16, + xpc_notify_func, void *); +extern void xpc_initiate_received(short, int, void *); +extern void xpc_process_sent_chctl_flags(struct xpc_partition *); +extern void xpc_connected_callout(struct xpc_channel *); +extern void xpc_deliver_payload(struct xpc_channel *); +extern void xpc_disconnect_channel(const int, struct xpc_channel *, + enum xp_retval, unsigned long *); +extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); +extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); + +static inline void +xpc_wakeup_channel_mgr(struct xpc_partition *part) +{ + if (atomic_inc_return(&part->channel_mgr_requests) == 1) + wake_up(&part->channel_mgr_wq); +} + +/* + * These next two inlines are used to keep us from tearing down a channel's + * msg queues while a thread may be referencing them. + */ +static inline void +xpc_msgqueue_ref(struct xpc_channel *ch) +{ + atomic_inc(&ch->references); +} + +static inline void +xpc_msgqueue_deref(struct xpc_channel *ch) +{ + s32 refs = atomic_dec_return(&ch->references); + + DBUG_ON(refs < 0); + if (refs == 0) + xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]); +} + +#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \ + xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs) + +/* + * These two inlines are used to keep us from tearing down a partition's + * setup infrastructure while a thread may be referencing it. + */ +static inline void +xpc_part_deref(struct xpc_partition *part) +{ + s32 refs = atomic_dec_return(&part->references); + + DBUG_ON(refs < 0); + if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN) + wake_up(&part->teardown_wq); +} + +static inline int +xpc_part_ref(struct xpc_partition *part) +{ + int setup; + + atomic_inc(&part->references); + setup = (part->setup_state == XPC_P_SS_SETUP); + if (!setup) + xpc_part_deref(part); + + return setup; +} + +/* + * The following macro is to be used for the setting of the reason and + * reason_line fields in both the struct xpc_channel and struct xpc_partition + * structures. + */ +#define XPC_SET_REASON(_p, _reason, _line) \ + { \ + (_p)->reason = _reason; \ + (_p)->reason_line = _line; \ + } + +#endif /* _DRIVERS_MISC_SGIXP_XPC_H */ diff --git a/kernel/drivers/misc/sgi-xp/xpc_channel.c b/kernel/drivers/misc/sgi-xp/xpc_channel.c new file mode 100644 index 000000000..128d5615c --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpc_channel.c @@ -0,0 +1,1014 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) channel support. + * + * This is the part of XPC that manages the channels and + * sends/receives messages across them to/from other partitions. + * + */ + +#include <linux/device.h> +#include "xpc.h" + +/* + * Process a connect message from a remote partition. + * + * Note: xpc_process_connect() is expecting to be called with the + * spin_lock_irqsave held and will leave it locked upon return. + */ +static void +xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + enum xp_retval ret; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_ROPENREQUEST)) { + /* nothing more to do for now */ + return; + } + DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); + + if (!(ch->flags & XPC_C_SETUP)) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + ret = xpc_arch_ops.setup_msg_structures(ch); + spin_lock_irqsave(&ch->lock, *irq_flags); + + if (ret != xpSuccess) + XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); + else + ch->flags |= XPC_C_SETUP; + + if (ch->flags & XPC_C_DISCONNECTING) + return; + } + + if (!(ch->flags & XPC_C_OPENREPLY)) { + ch->flags |= XPC_C_OPENREPLY; + xpc_arch_ops.send_chctl_openreply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENREPLY)) + return; + + if (!(ch->flags & XPC_C_OPENCOMPLETE)) { + ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED); + xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENCOMPLETE)) + return; + + dev_info(xpc_chan, "channel %d to partition %d connected\n", + ch->number, ch->partid); + + ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ +} + +/* + * spin_lock_irqsave() is expected to be held on entry. + */ +static void +xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED); + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_DISCONNECTING)) + return; + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + /* make sure all activity has settled down first */ + + if (atomic_read(&ch->kthreads_assigned) > 0 || + atomic_read(&ch->references) > 0) { + return; + } + DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) { + /* can't proceed until the other side disengages from us */ + if (xpc_arch_ops.partition_engaged(ch->partid)) + return; + + } else { + + /* as long as the other side is up do the full protocol */ + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) + return; + + if (!(ch->flags & XPC_C_CLOSEREPLY)) { + ch->flags |= XPC_C_CLOSEREPLY; + xpc_arch_ops.send_chctl_closereply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_RCLOSEREPLY)) + return; + } + + /* wake those waiting for notify completion */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* we do callout while holding ch->lock, callout can't block */ + xpc_arch_ops.notify_senders_of_disconnect(ch); + } + + /* both sides are disconnected now */ + + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_disconnect_callout(ch, xpDisconnected); + spin_lock_irqsave(&ch->lock, *irq_flags); + } + + DBUG_ON(atomic_read(&ch->n_to_notify) != 0); + + /* it's now safe to free the channel's message queues */ + xpc_arch_ops.teardown_msg_structures(ch); + + ch->func = NULL; + ch->key = NULL; + ch->entry_size = 0; + ch->local_nentries = 0; + ch->remote_nentries = 0; + ch->kthreads_assigned_limit = 0; + ch->kthreads_idle_limit = 0; + + /* + * Mark the channel disconnected and clear all other flags, including + * XPC_C_SETUP (because of call to + * xpc_arch_ops.teardown_msg_structures()) but not including + * XPC_C_WDISCONNECT (if it was set). + */ + ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); + + atomic_dec(&part->nchannels_active); + + if (channel_was_connected) { + dev_info(xpc_chan, "channel %d to partition %d disconnected, " + "reason=%d\n", ch->number, ch->partid, ch->reason); + } + + if (ch->flags & XPC_C_WDISCONNECT) { + /* we won't lose the CPU since we're holding ch->lock */ + complete(&ch->wdisconnect_wait); + } else if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + /* time to take action on any delayed chctl flags */ + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); + } + ch->delayed_chctl_flags = 0; + } +} + +/* + * Process a change in the channel's remote connection state. + */ +static void +xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, + u8 chctl_flags) +{ + unsigned long irq_flags; + struct xpc_openclose_args *args = + &part->remote_openclose_args[ch_number]; + struct xpc_channel *ch = &part->channels[ch_number]; + enum xp_retval reason; + enum xp_retval ret; + int create_kthread = 0; + + spin_lock_irqsave(&ch->lock, irq_flags); + +again: + + if ((ch->flags & XPC_C_DISCONNECTED) && + (ch->flags & XPC_C_WDISCONNECT)) { + /* + * Delay processing chctl flags until thread waiting disconnect + * has had a chance to see that the channel is disconnected. + */ + ch->delayed_chctl_flags |= chctl_flags; + goto out; + } + + if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) { + + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received " + "from partid=%d, channel=%d\n", args->reason, + ch->partid, ch->number); + + /* + * If RCLOSEREQUEST is set, we're probably waiting for + * RCLOSEREPLY. We should find it and a ROPENREQUEST packed + * with this RCLOSEREQUEST in the chctl_flags. + */ + + if (ch->flags & XPC_C_RCLOSEREQUEST) { + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); + DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); + + DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY)); + chctl_flags &= ~XPC_CHCTL_CLOSEREPLY; + ch->flags |= XPC_C_RCLOSEREPLY; + + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + goto again; + } + + if (ch->flags & XPC_C_DISCONNECTED) { + if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_OPENREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREQUEST; + spin_unlock(&part->chctl_lock); + } + goto out; + } + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); + } + + chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | + XPC_CHCTL_OPENCOMPLETE); + + /* + * The meaningful CLOSEREQUEST connection state fields are: + * reason = reason connection is to be closed + */ + + ch->flags |= XPC_C_RCLOSEREQUEST; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + reason = args->reason; + if (reason <= xpSuccess || reason > xpUnknownReason) + reason = xpUnknownReason; + else if (reason == xpUnregistering) + reason = xpOtherUnregistering; + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY); + goto out; + } + + xpc_process_disconnect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_CLOSEREPLY) { + + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid=" + "%d, channel=%d\n", ch->partid, ch->number); + + if (ch->flags & XPC_C_DISCONNECTED) { + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_CLOSEREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREPLY; + spin_unlock(&part->chctl_lock); + } + goto out; + } + + ch->flags |= XPC_C_RCLOSEREPLY; + + if (ch->flags & XPC_C_CLOSEREPLY) { + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + } + + if (chctl_flags & XPC_CHCTL_OPENREQUEST) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, " + "local_nentries=%d) received from partid=%d, " + "channel=%d\n", args->entry_size, args->local_nentries, + ch->partid, ch->number); + + if (part->act_state == XPC_P_AS_DEACTIVATING || + (ch->flags & XPC_C_ROPENREQUEST)) { + goto out; + } + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { + ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST; + goto out; + } + DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | + XPC_C_OPENREQUEST))); + DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_OPENREPLY | XPC_C_CONNECTED)); + + /* + * The meaningful OPENREQUEST connection state fields are: + * entry_size = size of channel's messages in bytes + * local_nentries = remote partition's local_nentries + */ + if (args->entry_size == 0 || args->local_nentries == 0) { + /* assume OPENREQUEST was delayed by mistake */ + goto out; + } + + ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); + ch->remote_nentries = args->local_nentries; + + if (ch->flags & XPC_C_OPENREQUEST) { + if (args->entry_size != ch->entry_size) { + XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, + &irq_flags); + goto out; + } + } else { + ch->entry_size = args->entry_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + } + + xpc_process_connect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_OPENREPLY) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa=" + "0x%lx, local_nentries=%d, remote_nentries=%d) " + "received from partid=%d, channel=%d\n", + args->local_msgqueue_pa, args->local_nentries, + args->remote_nentries, ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + goto out; + + if (!(ch->flags & XPC_C_OPENREQUEST)) { + XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, + &irq_flags); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + + /* + * The meaningful OPENREPLY connection state fields are: + * local_msgqueue_pa = physical address of remote + * partition's local_msgqueue + * local_nentries = remote partition's local_nentries + * remote_nentries = remote partition's remote_nentries + */ + DBUG_ON(args->local_msgqueue_pa == 0); + DBUG_ON(args->local_nentries == 0); + DBUG_ON(args->remote_nentries == 0); + + ret = xpc_arch_ops.save_remote_msgqueue_pa(ch, + args->local_msgqueue_pa); + if (ret != xpSuccess) { + XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags); + goto out; + } + ch->flags |= XPC_C_ROPENREPLY; + + if (args->local_nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " + "remote_nentries=%d, old remote_nentries=%d, " + "partid=%d, channel=%d\n", + args->local_nentries, ch->remote_nentries, + ch->partid, ch->number); + + ch->remote_nentries = args->local_nentries; + } + if (args->remote_nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " + "local_nentries=%d, old local_nentries=%d, " + "partid=%d, channel=%d\n", + args->remote_nentries, ch->local_nentries, + ch->partid, ch->number); + + ch->local_nentries = args->remote_nentries; + } + + xpc_process_connect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + goto out; + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_OPENREPLY)) { + XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, + &irq_flags); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY)); + DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); + + ch->flags |= XPC_C_ROPENCOMPLETE; + + xpc_process_connect(ch, &irq_flags); + create_kthread = 1; + } + +out: + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (create_kthread) + xpc_create_kthreads(ch, 1, 0); +} + +/* + * Attempt to establish a channel connection to a remote partition. + */ +static enum xp_retval +xpc_connect_channel(struct xpc_channel *ch) +{ + unsigned long irq_flags; + struct xpc_registration *registration = &xpc_registrations[ch->number]; + + if (mutex_trylock(®istration->mutex) == 0) + return xpRetry; + + if (!XPC_CHANNEL_REGISTERED(ch->number)) { + mutex_unlock(®istration->mutex); + return xpUnregistered; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + mutex_unlock(®istration->mutex); + return ch->reason; + } + + /* add info from the channel connect registration to the channel */ + + ch->kthreads_assigned_limit = registration->assigned_limit; + ch->kthreads_idle_limit = registration->idle_limit; + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); + DBUG_ON(atomic_read(&ch->kthreads_active) != 0); + + ch->func = registration->func; + DBUG_ON(registration->func == NULL); + ch->key = registration->key; + + ch->local_nentries = registration->nentries; + + if (ch->flags & XPC_C_ROPENREQUEST) { + if (registration->entry_size != ch->entry_size) { + /* the local and remote sides aren't the same */ + + /* + * Because XPC_DISCONNECT_CHANNEL() can block we're + * forced to up the registration sema before we unlock + * the channel lock. But that's okay here because we're + * done with the part that required the registration + * sema. XPC_DISCONNECT_CHANNEL() requires that the + * channel lock be locked and will unlock and relock + * the channel lock as needed. + */ + mutex_unlock(®istration->mutex); + XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpUnequalMsgSizes; + } + } else { + ch->entry_size = registration->entry_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&xpc_partitions[ch->partid].nchannels_active); + } + + mutex_unlock(®istration->mutex); + + /* initiate the connection */ + + ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); + xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags); + + xpc_process_connect(ch, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpSuccess; +} + +void +xpc_process_sent_chctl_flags(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + struct xpc_channel *ch; + int ch_number; + u32 ch_flags; + + chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part); + + /* + * Initiate channel connections for registered channels. + * + * For each connected channel that has pending messages activate idle + * kthreads and/or create new kthreads as needed. + */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + /* + * Process any open or close related chctl flags, and then deal + * with connecting or disconnecting the channel as required. + */ + + if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) { + xpc_process_openclose_chctl_flags(part, ch_number, + chctl.flags[ch_number]); + } + + ch_flags = ch->flags; /* need an atomic snapshot of flags */ + + if (ch_flags & XPC_C_DISCONNECTING) { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_disconnect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + continue; + } + + if (part->act_state == XPC_P_AS_DEACTIVATING) + continue; + + if (!(ch_flags & XPC_C_CONNECTED)) { + if (!(ch_flags & XPC_C_OPENREQUEST)) { + DBUG_ON(ch_flags & XPC_C_SETUP); + (void)xpc_connect_channel(ch); + } + continue; + } + + /* + * Process any message related chctl flags, this may involve + * the activation of kthreads to deliver any pending messages + * sent from the other partition. + */ + + if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + xpc_arch_ops.process_msg_chctl_flags(part, ch_number); + } +} + +/* + * XPC's heartbeat code calls this function to inform XPC that a partition is + * going down. XPC responds by tearing down the XPartition Communication + * infrastructure used for the just downed partition. + * + * XPC's heartbeat code will never call this function and xpc_partition_up() + * at the same time. Nor will it ever make multiple calls to either function + * at the same time. + */ +void +xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason) +{ + unsigned long irq_flags; + int ch_number; + struct xpc_channel *ch; + + dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", + XPC_PARTID(part), reason); + + if (!xpc_part_ref(part)) { + /* infrastructure for this partition isn't currently set up */ + return; + } + + /* disconnect channels associated with the partition going down */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + xpc_msgqueue_ref(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_msgqueue_deref(ch); + } + + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); +} + +/* + * Called by XP at the time of channel connection registration to cause + * XPC to establish connections to all currently active partitions. + */ +void +xpc_initiate_connect(int ch_number) +{ + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + + /* + * Initiate the establishment of a connection on the + * newly registered channel to the remote partition. + */ + xpc_wakeup_channel_mgr(part); + xpc_part_deref(part); + } + } +} + +void +xpc_connected_callout(struct xpc_channel *ch) +{ + /* let the registerer know that a connection has been established */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + ch->func(xpConnected, ch->partid, ch->number, + (void *)(u64)ch->local_nentries, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + } +} + +/* + * Called by XP at the time of channel connection unregistration to cause + * XPC to teardown all current connections for the specified channel. + * + * Before returning xpc_initiate_disconnect() will wait until all connections + * on the specified channel have been closed/torndown. So the caller can be + * assured that they will not be receiving any more callouts from XPC to the + * function they registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_initiate_disconnect(int ch_number) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + /* initiate the channel disconnect for every active partition */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + + if (!(ch->flags & XPC_C_DISCONNECTED)) { + ch->flags |= XPC_C_WDISCONNECT; + + XPC_DISCONNECT_CHANNEL(ch, xpUnregistering, + &irq_flags); + } + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + } + } + + xpc_disconnect_wait(ch_number); +} + +/* + * To disconnect a channel, and reflect it back to all who may be waiting. + * + * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by + * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by + * xpc_disconnect_wait(). + * + * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. + */ +void +xpc_disconnect_channel(const int line, struct xpc_channel *ch, + enum xp_retval reason, unsigned long *irq_flags) +{ + u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED); + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + return; + + DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); + + dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", + reason, line, ch->partid, ch->number); + + XPC_SET_REASON(ch, reason, line); + + ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); + /* some of these may not have been set */ + ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | + XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_CONNECTING | XPC_C_CONNECTED); + + xpc_arch_ops.send_chctl_closerequest(ch, irq_flags); + + if (channel_was_connected) + ch->flags |= XPC_C_WASCONNECTED; + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + /* wake all idle kthreads so they can exit */ + if (atomic_read(&ch->kthreads_idle) > 0) { + wake_up_all(&ch->idle_wq); + + } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + /* start a kthread that will do the xpDisconnecting callout */ + xpc_create_kthreads(ch, 1, 1); + } + + /* wake those waiting to allocate an entry from the local msg queue */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + + spin_lock_irqsave(&ch->lock, *irq_flags); +} + +void +xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason) +{ + /* + * Let the channel's registerer know that the channel is being + * disconnected. We don't want to do this if the registerer was never + * informed of a connection being made. + */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); + + ch->func(reason, ch->partid, ch->number, NULL, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); + } +} + +/* + * Wait for a message entry to become available for the specified channel, + * but don't wait any longer than 1 jiffy. + */ +enum xp_retval +xpc_allocate_msg_wait(struct xpc_channel *ch) +{ + enum xp_retval ret; + DEFINE_WAIT(wait); + + if (ch->flags & XPC_C_DISCONNECTING) { + DBUG_ON(ch->reason == xpInterrupted); + return ch->reason; + } + + atomic_inc(&ch->n_on_msg_allocate_wq); + prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE); + ret = schedule_timeout(1); + finish_wait(&ch->msg_allocate_wq, &wait); + atomic_dec(&ch->n_on_msg_allocate_wq); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + DBUG_ON(ch->reason == xpInterrupted); + } else if (ret == 0) { + ret = xpTimeout; + } else { + ret = xpInterrupted; + } + + return ret; +} + +/* + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. + * + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * Once sent, this routine will not wait for the message to be received, nor + * will notification be given when it does happen. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. + */ +enum xp_retval +xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xp_retval ret = xpUnknownReason; + + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, + partid, ch_number); + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(payload == NULL); + + if (xpc_part_ref(part)) { + ret = xpc_arch_ops.send_payload(&part->channels[ch_number], + flags, payload, payload_size, 0, NULL, NULL); + xpc_part_deref(part); + } + + return ret; +} + +/* + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. + * + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * This routine will not wait for the message to be sent or received. + * + * Once the remote end of the channel has received the message, the function + * passed as an argument to xpc_initiate_send_notify() will be called. This + * allows the sender to free up or re-use any buffers referenced by the + * message, but does NOT mean the message has been processed at the remote + * end by a receiver. + * + * If this routine returns an error, the caller's function will NOT be called. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. + * func - function to call with asynchronous notification of message + * receipt. THIS FUNCTION MUST BE NON-BLOCKING. + * key - user-defined key to be passed to the function when it's called. + */ +enum xp_retval +xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xp_retval ret = xpUnknownReason; + + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, + partid, ch_number); + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(payload == NULL); + DBUG_ON(func == NULL); + + if (xpc_part_ref(part)) { + ret = xpc_arch_ops.send_payload(&part->channels[ch_number], + flags, payload, payload_size, XPC_N_CALL, func, key); + xpc_part_deref(part); + } + return ret; +} + +/* + * Deliver a message's payload to its intended recipient. + */ +void +xpc_deliver_payload(struct xpc_channel *ch) +{ + void *payload; + + payload = xpc_arch_ops.get_deliverable_payload(ch); + if (payload != NULL) { + + /* + * This ref is taken to protect the payload itself from being + * freed before the user is finished with it, which the user + * indicates by calling xpc_initiate_received(). + */ + xpc_msgqueue_ref(ch); + + atomic_inc(&ch->kthreads_active); + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, + ch->number); + + /* deliver the message to its intended recipient */ + ch->func(xpMsgReceived, ch->partid, ch->number, payload, + ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, + ch->number); + } + + atomic_dec(&ch->kthreads_active); + } +} + +/* + * Acknowledge receipt of a delivered message's payload. + * + * This function, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # message received on. + * payload - pointer to the payload area allocated via + * xpc_initiate_send() or xpc_initiate_send_notify(). + */ +void +xpc_initiate_received(short partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + ch = &part->channels[ch_number]; + xpc_arch_ops.received_payload(ch, payload); + + /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */ + xpc_msgqueue_deref(ch); +} diff --git a/kernel/drivers/misc/sgi-xp/xpc_main.c b/kernel/drivers/misc/sgi-xp/xpc_main.c new file mode 100644 index 000000000..7f327121e --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpc_main.c @@ -0,0 +1,1374 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) support - standard version. + * + * XPC provides a message passing capability that crosses partition + * boundaries. This module is made up of two parts: + * + * partition This part detects the presence/absence of other + * partitions. It provides a heartbeat and monitors + * the heartbeats of other partitions. + * + * channel This part manages the channels and sends/receives + * messages across them to/from other partitions. + * + * There are a couple of additional functions residing in XP, which + * provide an interface to XPC for its users. + * + * + * Caveats: + * + * . Currently on sn2, we have no way to determine which nasid an IRQ + * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write + * followed by an IPI. The amo indicates where data is to be pulled + * from, so after the IPI arrives, the remote partition checks the amo + * word. The IPI can actually arrive before the amo however, so other + * code must periodically check for this case. Also, remote amo + * operations do not reliably time out. Thus we do a remote PIO read + * solely to know whether the remote partition is down and whether we + * should stop sending IPIs to it. This remote PIO read operation is + * set up in a special nofault region so SAL knows to ignore (and + * cleanup) any errors due to the remote amo write, PIO read, and/or + * PIO write operations. + * + * If/when new hardware solves this IPI problem, we should abandon + * the current approach. + * + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/sysctl.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/kdebug.h> +#include <linux/kthread.h> +#include "xpc.h" + +#ifdef CONFIG_X86_64 +#include <asm/traps.h> +#endif + +/* define two XPC debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpc_dbg_name = { + .name = "xpc" +}; + +struct device xpc_part_dbg_subname = { + .init_name = "", /* set to "part" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device xpc_chan_dbg_subname = { + .init_name = "", /* set to "chan" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device *xpc_part = &xpc_part_dbg_subname; +struct device *xpc_chan = &xpc_chan_dbg_subname; + +static int xpc_kdebug_ignore; + +/* systune related variables for /proc/sys directories */ + +static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; +static int xpc_hb_min_interval = 1; +static int xpc_hb_max_interval = 10; + +static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; +static int xpc_hb_check_min_interval = 10; +static int xpc_hb_check_max_interval = 120; + +int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT; +static int xpc_disengage_min_timelimit; /* = 0 */ +static int xpc_disengage_max_timelimit = 120; + +static struct ctl_table xpc_sys_xpc_hb_dir[] = { + { + .procname = "hb_interval", + .data = &xpc_hb_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_hb_min_interval, + .extra2 = &xpc_hb_max_interval}, + { + .procname = "hb_check_interval", + .data = &xpc_hb_check_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_hb_check_min_interval, + .extra2 = &xpc_hb_check_max_interval}, + {} +}; +static struct ctl_table xpc_sys_xpc_dir[] = { + { + .procname = "hb", + .mode = 0555, + .child = xpc_sys_xpc_hb_dir}, + { + .procname = "disengage_timelimit", + .data = &xpc_disengage_timelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_disengage_min_timelimit, + .extra2 = &xpc_disengage_max_timelimit}, + {} +}; +static struct ctl_table xpc_sys_dir[] = { + { + .procname = "xpc", + .mode = 0555, + .child = xpc_sys_xpc_dir}, + {} +}; +static struct ctl_table_header *xpc_sysctl; + +/* non-zero if any remote partition disengage was timed out */ +int xpc_disengage_timedout; + +/* #of activate IRQs received and not yet processed */ +int xpc_activate_IRQ_rcvd; +DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock); + +/* IRQ handler notifies this wait queue on receipt of an IRQ */ +DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq); + +static unsigned long xpc_hb_check_timeout; +static struct timer_list xpc_hb_timer; + +/* notification that the xpc_hb_checker thread has exited */ +static DECLARE_COMPLETION(xpc_hb_checker_exited); + +/* notification that the xpc_discovery thread has exited */ +static DECLARE_COMPLETION(xpc_discovery_exited); + +static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); + +static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_reboot_notifier = { + .notifier_call = xpc_system_reboot, +}; + +static int xpc_system_die(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_die_notifier = { + .notifier_call = xpc_system_die, +}; + +struct xpc_arch_operations xpc_arch_ops; + +/* + * Timer function to enforce the timelimit on the partition disengage. + */ +static void +xpc_timeout_partition_disengage(unsigned long data) +{ + struct xpc_partition *part = (struct xpc_partition *)data; + + DBUG_ON(time_is_after_jiffies(part->disengage_timeout)); + + (void)xpc_partition_disengaged(part); + + DBUG_ON(part->disengage_timeout != 0); + DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part))); +} + +/* + * Timer to produce the heartbeat. The timer structures function is + * already set when this is initially called. A tunable is used to + * specify when the next timeout should occur. + */ +static void +xpc_hb_beater(unsigned long dummy) +{ + xpc_arch_ops.increment_heartbeat(); + + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); + add_timer(&xpc_hb_timer); +} + +static void +xpc_start_hb_beater(void) +{ + xpc_arch_ops.heartbeat_init(); + init_timer(&xpc_hb_timer); + xpc_hb_timer.function = xpc_hb_beater; + xpc_hb_beater(0); +} + +static void +xpc_stop_hb_beater(void) +{ + del_timer_sync(&xpc_hb_timer); + xpc_arch_ops.heartbeat_exit(); +} + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +static void +xpc_check_remote_hb(void) +{ + struct xpc_partition *part; + short partid; + enum xp_retval ret; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + + if (xpc_exiting) + break; + + if (partid == xp_partition_id) + continue; + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_AS_INACTIVE || + part->act_state == XPC_P_AS_DEACTIVATING) { + continue; + } + + ret = xpc_arch_ops.get_remote_heartbeat(part); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(part, ret); + } +} + +/* + * This thread is responsible for nearly all of the partition + * activation/deactivation. + */ +static int +xpc_hb_checker(void *ignore) +{ + int force_IRQ = 0; + + /* this thread was marked active by xpc_hb_init() */ + + set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU)); + + /* set our heartbeating to other partitions into motion */ + xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); + xpc_start_hb_beater(); + + while (!xpc_exiting) { + + dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " + "been received\n", + (int)(xpc_hb_check_timeout - jiffies), + xpc_activate_IRQ_rcvd); + + /* checking of remote heartbeats is skewed by IRQ handling */ + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) { + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + + dev_dbg(xpc_part, "checking remote heartbeats\n"); + xpc_check_remote_hb(); + + /* + * On sn2 we need to periodically recheck to ensure no + * IRQ/amo pairs have been missed. + */ + if (is_shub()) + force_IRQ = 1; + } + + /* check for outstanding IRQs */ + if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) { + force_IRQ = 0; + dev_dbg(xpc_part, "processing activate IRQs " + "received\n"); + xpc_arch_ops.process_activate_IRQ_rcvd(); + } + + /* wait for IRQ or timeout */ + (void)wait_event_interruptible(xpc_activate_IRQ_wq, + (time_is_before_eq_jiffies( + xpc_hb_check_timeout) || + xpc_activate_IRQ_rcvd > 0 || + xpc_exiting)); + } + + xpc_stop_hb_beater(); + + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); + + /* mark this thread as having exited */ + complete(&xpc_hb_checker_exited); + return 0; +} + +/* + * This thread will attempt to discover other partitions to activate + * based on info provided by SAL. This new thread is short lived and + * will exit once discovery is complete. + */ +static int +xpc_initiate_discovery(void *ignore) +{ + xpc_discovery(); + + dev_dbg(xpc_part, "discovery thread is exiting\n"); + + /* mark this thread as having exited */ + complete(&xpc_discovery_exited); + return 0; +} + +/* + * The first kthread assigned to a newly activated partition is the one + * created by XPC HB with which it calls xpc_activating(). XPC hangs on to + * that kthread until the partition is brought down, at which time that kthread + * returns back to XPC HB. (The return of that kthread will signify to XPC HB + * that XPC has dismantled all communication infrastructure for the associated + * partition.) This kthread becomes the channel manager for that partition. + * + * Each active partition has a channel manager, who, besides connecting and + * disconnecting channels, will ensure that each of the partition's connected + * channels has the required number of assigned kthreads to get the work done. + */ +static void +xpc_channel_mgr(struct xpc_partition *part) +{ + while (part->act_state != XPC_P_AS_DEACTIVATING || + atomic_read(&part->nchannels_active) > 0 || + !xpc_partition_disengaged(part)) { + + xpc_process_sent_chctl_flags(part); + + /* + * Wait until we've been requested to activate kthreads or + * all of the channel's message queues have been torn down or + * a signal is pending. + * + * The channel_mgr_requests is set to 1 after being awakened, + * This is done to prevent the channel mgr from making one pass + * through the loop for each request, since he will + * be servicing all the requests in one pass. The reason it's + * set to 1 instead of 0 is so that other kthreads will know + * that the channel mgr is running and won't bother trying to + * wake him up. + */ + atomic_dec(&part->channel_mgr_requests); + (void)wait_event_interruptible(part->channel_mgr_wq, + (atomic_read(&part->channel_mgr_requests) > 0 || + part->chctl.all_flags != 0 || + (part->act_state == XPC_P_AS_DEACTIVATING && + atomic_read(&part->nchannels_active) == 0 && + xpc_partition_disengaged(part)))); + atomic_set(&part->channel_mgr_requests, 1); + } +} + +/* + * Guarantee that the kzalloc'd memory is cacheline aligned. + */ +void * +xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kzalloc will give us cachline aligned memory by default */ + *base = kzalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kzalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Setup the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static enum xp_retval +xpc_setup_ch_structures(struct xpc_partition *part) +{ + enum xp_retval ret; + int ch_number; + struct xpc_channel *ch; + short partid = XPC_PARTID(part); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ + DBUG_ON(part->channels != NULL); + part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS, + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpNoMemory; + } + + /* allocate the remote open and close args */ + + part->remote_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part-> + remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + ret = xpNoMemory; + goto out_1; + } + + part->chctl.all_flags = 0; + spin_lock_init(&part->chctl_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + part->nchannels = XPC_MAX_NCHANNELS; + + atomic_set(&part->nchannels_active, 0); + atomic_set(&part->nchannels_engaged, 0); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + init_completion(&ch->wdisconnect_wait); + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } + + ret = xpc_arch_ops.setup_ch_structures(part); + if (ret != xpSuccess) + goto out_2; + + /* + * With the setting of the partition setup_state to XPC_P_SS_SETUP, + * we're declaring that this partition is ready to go. + */ + part->setup_state = XPC_P_SS_SETUP; + + return xpSuccess; + + /* setup of ch structures failed */ +out_2: + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; +out_1: + kfree(part->channels); + part->channels = NULL; + return ret; +} + +/* + * Teardown the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static void +xpc_teardown_ch_structures(struct xpc_partition *part) +{ + DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); + DBUG_ON(atomic_read(&part->nchannels_active) != 0); + + /* + * Make this partition inaccessible to local processes by marking it + * as no longer setup. Then wait before proceeding with the teardown + * until all existing references cease. + */ + DBUG_ON(part->setup_state != XPC_P_SS_SETUP); + part->setup_state = XPC_P_SS_WTEARDOWN; + + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); + + /* now we can begin tearing down the infrastructure */ + + xpc_arch_ops.teardown_ch_structures(part); + + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->channels); + part->channels = NULL; + + part->setup_state = XPC_P_SS_TORNDOWN; +} + +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition becoming the channel + * manager for that partition until the partition is deactivating, at which + * time the kthread will teardown the XPC infrastructure and then exit. + */ +static int +xpc_activating(void *__partid) +{ + short partid = (u64)__partid; + struct xpc_partition *part = &xpc_partitions[partid]; + unsigned long irq_flags; + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_AS_DEACTIVATING) { + part->act_state = XPC_P_AS_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + /* indicate the thread is activating */ + DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ); + part->act_state = XPC_P_AS_ACTIVATING; + + XPC_SET_REASON(part, 0, 0); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + dev_dbg(xpc_part, "activating partition %d\n", partid); + + xpc_arch_ops.allow_hb(partid); + + if (xpc_setup_ch_structures(part) == xpSuccess) { + (void)xpc_part_ref(part); /* this will always succeed */ + + if (xpc_arch_ops.make_first_contact(part) == xpSuccess) { + xpc_mark_partition_active(part); + xpc_channel_mgr(part); + /* won't return until partition is deactivating */ + } + + xpc_part_deref(part); + xpc_teardown_ch_structures(part); + } + + xpc_arch_ops.disallow_hb(partid); + xpc_mark_partition_inactive(part); + + if (part->reason == xpReactivating) { + /* interrupting ourselves results in activating partition */ + xpc_arch_ops.request_partition_reactivation(part); + } + + return 0; +} + +void +xpc_activate_partition(struct xpc_partition *part) +{ + short partid = XPC_PARTID(part); + unsigned long irq_flags; + struct task_struct *kthread; + + spin_lock_irqsave(&part->act_lock, irq_flags); + + DBUG_ON(part->act_state != XPC_P_AS_INACTIVE); + + part->act_state = XPC_P_AS_ACTIVATION_REQ; + XPC_SET_REASON(part, xpCloneKThread, __LINE__); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d", + partid); + if (IS_ERR(kthread)) { + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + } +} + +void +xpc_activate_kthreads(struct xpc_channel *ch, int needed) +{ + int idle = atomic_read(&ch->kthreads_idle); + int assigned = atomic_read(&ch->kthreads_assigned); + int wakeup; + + DBUG_ON(needed <= 0); + + if (idle > 0) { + wakeup = (needed > idle) ? idle : needed; + needed -= wakeup; + + dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " + "channel=%d\n", wakeup, ch->partid, ch->number); + + /* only wakeup the requested number of kthreads */ + wake_up_nr(&ch->idle_wq, wakeup); + } + + if (needed <= 0) + return; + + if (needed + assigned > ch->kthreads_assigned_limit) { + needed = ch->kthreads_assigned_limit - assigned; + if (needed <= 0) + return; + } + + dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", + needed, ch->partid, ch->number); + + xpc_create_kthreads(ch, needed, 0); +} + +/* + * This function is where XPC's kthreads wait for messages to deliver. + */ +static void +xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) +{ + int (*n_of_deliverable_payloads) (struct xpc_channel *) = + xpc_arch_ops.n_of_deliverable_payloads; + + do { + /* deliver messages to their intended recipients */ + + while (n_of_deliverable_payloads(ch) > 0 && + !(ch->flags & XPC_C_DISCONNECTING)) { + xpc_deliver_payload(ch); + } + + if (atomic_inc_return(&ch->kthreads_idle) > + ch->kthreads_idle_limit) { + /* too many idle kthreads on this channel */ + atomic_dec(&ch->kthreads_idle); + break; + } + + dev_dbg(xpc_chan, "idle kthread calling " + "wait_event_interruptible_exclusive()\n"); + + (void)wait_event_interruptible_exclusive(ch->idle_wq, + (n_of_deliverable_payloads(ch) > 0 || + (ch->flags & XPC_C_DISCONNECTING))); + + atomic_dec(&ch->kthreads_idle); + + } while (!(ch->flags & XPC_C_DISCONNECTING)); +} + +static int +xpc_kthread_start(void *args) +{ + short partid = XPC_UNPACK_ARG1(args); + u16 ch_number = XPC_UNPACK_ARG2(args); + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + int n_needed; + unsigned long irq_flags; + int (*n_of_deliverable_payloads) (struct xpc_channel *) = + xpc_arch_ops.n_of_deliverable_payloads; + + dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", + partid, ch_number); + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + + /* let registerer know that connection has been established */ + + spin_lock_irqsave(&ch->lock, irq_flags); + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_connected_callout(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + /* + * It is possible that while the callout was being + * made that the remote partition sent some messages. + * If that is the case, we may need to activate + * additional kthreads to help deliver them. We only + * need one less than total #of messages to deliver. + */ + n_needed = n_of_deliverable_payloads(ch) - 1; + if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) + xpc_activate_kthreads(ch, n_needed); + + } else { + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + + xpc_kthread_waitmsgs(part, ch); + } + + /* let registerer know that connection is disconnecting */ + + spin_lock_irqsave(&ch->lock, irq_flags); + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_disconnect_callout(ch, xpDisconnecting); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + xpc_arch_ops.indicate_partition_disengaged(part); + } + + xpc_msgqueue_deref(ch); + + dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", + partid, ch_number); + + xpc_part_deref(part); + return 0; +} + +/* + * For each partition that XPC has established communications with, there is + * a minimum of one kernel thread assigned to perform any operation that + * may potentially sleep or block (basically the callouts to the asynchronous + * functions registered via xpc_connect()). + * + * Additional kthreads are created and destroyed by XPC as the workload + * demands. + * + * A kthread is assigned to one of the active channels that exists for a given + * partition. + */ +void +xpc_create_kthreads(struct xpc_channel *ch, int needed, + int ignore_disconnecting) +{ + unsigned long irq_flags; + u64 args = XPC_PACK_ARGS(ch->partid, ch->number); + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct task_struct *kthread; + void (*indicate_partition_disengaged) (struct xpc_partition *) = + xpc_arch_ops.indicate_partition_disengaged; + + while (needed-- > 0) { + + /* + * The following is done on behalf of the newly created + * kthread. That kthread is responsible for doing the + * counterpart to the following before it exits. + */ + if (ignore_disconnecting) { + if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { + /* kthreads assigned had gone to zero */ + BUG_ON(!(ch->flags & + XPC_C_DISCONNECTINGCALLOUT_MADE)); + break; + } + + } else if (ch->flags & XPC_C_DISCONNECTING) { + break; + + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && + atomic_inc_return(&part->nchannels_engaged) == 1) { + xpc_arch_ops.indicate_partition_engaged(part); + } + (void)xpc_part_ref(part); + xpc_msgqueue_ref(ch); + + kthread = kthread_run(xpc_kthread_start, (void *)args, + "xpc%02dc%d", ch->partid, ch->number); + if (IS_ERR(kthread)) { + /* the fork failed */ + + /* + * NOTE: if (ignore_disconnecting && + * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, + * then we'll deadlock if all other kthreads assigned + * to this channel are blocked in the channel's + * registerer, because the only thing that will unblock + * them is the xpDisconnecting callout that this + * failed kthread_run() would have made. + */ + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + indicate_partition_disengaged(part); + } + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + + if (atomic_read(&ch->kthreads_assigned) < + ch->kthreads_idle_limit) { + /* + * Flag this as an error only if we have an + * insufficient #of kthreads for the channel + * to function. + */ + spin_lock_irqsave(&ch->lock, irq_flags); + XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + break; + } + } +} + +void +xpc_disconnect_wait(int ch_number) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + int wakeup_channel_mgr; + + /* now wait for all callouts to the caller's function to cease */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (!xpc_part_ref(part)) + continue; + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_WDISCONNECT)) { + xpc_part_deref(part); + continue; + } + + wait_for_completion(&ch->wdisconnect_wait); + + spin_lock_irqsave(&ch->lock, irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + wakeup_channel_mgr = 0; + + if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); + wakeup_channel_mgr = 1; + } + ch->delayed_chctl_flags = 0; + } + + ch->flags &= ~XPC_C_WDISCONNECT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (wakeup_channel_mgr) + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); + } +} + +static int +xpc_setup_partitions(void) +{ + short partid; + struct xpc_partition *part; + + xpc_partitions = kzalloc(sizeof(struct xpc_partition) * + xp_max_npartitions, GFP_KERNEL); + if (xpc_partitions == NULL) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + return -ENOMEM; + } + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); + + part->activate_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, 0, 0); + + init_timer(&part->disengage_timer); + part->disengage_timer.function = + xpc_timeout_partition_disengage; + part->disengage_timer.data = (unsigned long)part; + + part->setup_state = XPC_P_SS_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + return xpc_arch_ops.setup_partitions(); +} + +static void +xpc_teardown_partitions(void) +{ + xpc_arch_ops.teardown_partitions(); + kfree(xpc_partitions); +} + +static void +xpc_do_exit(enum xp_retval reason) +{ + short partid; + int active_part_count, printed_waiting_msg = 0; + struct xpc_partition *part; + unsigned long printmsg_time, disengage_timeout = 0; + + /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ + DBUG_ON(xpc_exiting == 1); + + /* + * Let the heartbeat checker thread and the discovery thread + * (if one is running) know that they should exit. Also wake up + * the heartbeat checker thread in case it's sleeping. + */ + xpc_exiting = 1; + wake_up_interruptible(&xpc_activate_IRQ_wq); + + /* wait for the discovery thread to exit */ + wait_for_completion(&xpc_discovery_exited); + + /* wait for the heartbeat checker thread to exit */ + wait_for_completion(&xpc_hb_checker_exited); + + /* sleep for a 1/3 of a second or so */ + (void)msleep_interruptible(300); + + /* wait for all partitions to become inactive */ + + printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + xpc_disengage_timedout = 0; + + do { + active_part_count = 0; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + continue; + } + + active_part_count++; + + XPC_DEACTIVATE_PARTITION(part, reason); + + if (part->disengage_timeout > disengage_timeout) + disengage_timeout = part->disengage_timeout; + } + + if (xpc_arch_ops.any_partition_engaged()) { + if (time_is_before_jiffies(printmsg_time)) { + dev_info(xpc_part, "waiting for remote " + "partitions to deactivate, timeout in " + "%ld seconds\n", (disengage_timeout - + jiffies) / HZ); + printmsg_time = jiffies + + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + printed_waiting_msg = 1; + } + + } else if (active_part_count > 0) { + if (printed_waiting_msg) { + dev_info(xpc_part, "waiting for local partition" + " to deactivate\n"); + printed_waiting_msg = 0; + } + + } else { + if (!xpc_disengage_timedout) { + dev_info(xpc_part, "all partitions have " + "deactivated\n"); + } + break; + } + + /* sleep for a 1/3 of a second or so */ + (void)msleep_interruptible(300); + + } while (1); + + DBUG_ON(xpc_arch_ops.any_partition_engaged()); + + xpc_teardown_rsvd_page(); + + if (reason == xpUnloading) { + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); + } + + /* clear the interface to XPC's functions */ + xpc_clear_interface(); + + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); + + if (is_shub()) + xpc_exit_sn2(); + else if (is_uv()) + xpc_exit_uv(); +} + +/* + * This function is called when the system is being rebooted. + */ +static int +xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) +{ + enum xp_retval reason; + + switch (event) { + case SYS_RESTART: + reason = xpSystemReboot; + break; + case SYS_HALT: + reason = xpSystemHalt; + break; + case SYS_POWER_OFF: + reason = xpSystemPoweroff; + break; + default: + reason = xpSystemGoingDown; + } + + xpc_do_exit(reason); + return NOTIFY_DONE; +} + +/* Used to only allow one cpu to complete disconnect */ +static unsigned int xpc_die_disconnecting; + +/* + * Notify other partitions to deactivate from us by first disengaging from all + * references to our memory. + */ +static void +xpc_die_deactivate(void) +{ + struct xpc_partition *part; + short partid; + int any_engaged; + long keep_waiting; + long wait_to_print; + + if (cmpxchg(&xpc_die_disconnecting, 0, 1)) + return; + + /* keep xpc_hb_checker thread from doing anything (just in case) */ + xpc_exiting = 1; + + xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */ + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_arch_ops.partition_engaged(partid) || + part->act_state != XPC_P_AS_INACTIVE) { + xpc_arch_ops.request_partition_deactivation(part); + xpc_arch_ops.indicate_partition_disengaged(part); + } + } + + /* + * Though we requested that all other partitions deactivate from us, + * we only wait until they've all disengaged or we've reached the + * defined timelimit. + * + * Given that one iteration through the following while-loop takes + * approximately 200 microseconds, calculate the #of loops to take + * before bailing and the #of loops before printing a waiting message. + */ + keep_waiting = xpc_disengage_timelimit * 1000 * 5; + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; + + while (1) { + any_engaged = xpc_arch_ops.any_partition_engaged(); + if (!any_engaged) { + dev_info(xpc_part, "all partitions have deactivated\n"); + break; + } + + if (!keep_waiting--) { + for (partid = 0; partid < xp_max_npartitions; + partid++) { + if (xpc_arch_ops.partition_engaged(partid)) { + dev_info(xpc_part, "deactivate from " + "remote partition %d timed " + "out\n", partid); + } + } + break; + } + + if (!wait_to_print--) { + dev_info(xpc_part, "waiting for remote partitions to " + "deactivate, timeout in %ld seconds\n", + keep_waiting / (1000 * 5)); + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * + 1000 * 5; + } + + udelay(200); + } +} + +/* + * This function is called when the system is being restarted or halted due + * to some sort of system failure. If this is the case we need to notify the + * other partitions to disengage from all references to our memory. + * This function can also be called when our heartbeater could be offlined + * for a time. In this case we need to notify other partitions to not worry + * about the lack of a heartbeat. + */ +static int +xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) +{ +#ifdef CONFIG_IA64 /* !!! temporary kludge */ + switch (event) { + case DIE_MACHINE_RESTART: + case DIE_MACHINE_HALT: + xpc_die_deactivate(); + break; + + case DIE_KDEBUG_ENTER: + /* Should lack of heartbeat be ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + /* fall through */ + case DIE_MCA_MONARCH_ENTER: + case DIE_INIT_MONARCH_ENTER: + xpc_arch_ops.offline_heartbeat(); + break; + + case DIE_KDEBUG_LEAVE: + /* Is lack of heartbeat being ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + /* fall through */ + case DIE_MCA_MONARCH_LEAVE: + case DIE_INIT_MONARCH_LEAVE: + xpc_arch_ops.online_heartbeat(); + break; + } +#else + struct die_args *die_args = _die_args; + + switch (event) { + case DIE_TRAP: + if (die_args->trapnr == X86_TRAP_DF) + xpc_die_deactivate(); + + if (((die_args->trapnr == X86_TRAP_MF) || + (die_args->trapnr == X86_TRAP_XF)) && + !user_mode(die_args->regs)) + xpc_die_deactivate(); + + break; + case DIE_INT3: + case DIE_DEBUG: + break; + case DIE_OOPS: + case DIE_GPF: + default: + xpc_die_deactivate(); + } +#endif + + return NOTIFY_DONE; +} + +int __init +xpc_init(void) +{ + int ret; + struct task_struct *kthread; + + dev_set_name(xpc_part, "part"); + dev_set_name(xpc_chan, "chan"); + + if (is_shub()) { + /* + * The ia64-sn2 architecture supports at most 64 partitions. + * And the inability to unregister remote amos restricts us + * further to only support exactly 64 partitions on this + * architecture, no less. + */ + if (xp_max_npartitions != 64) { + dev_err(xpc_part, "max #of partitions not set to 64\n"); + ret = -EINVAL; + } else { + ret = xpc_init_sn2(); + } + + } else if (is_uv()) { + ret = xpc_init_uv(); + + } else { + ret = -ENODEV; + } + + if (ret != 0) + return ret; + + ret = xpc_setup_partitions(); + if (ret != 0) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + goto out_1; + } + + xpc_sysctl = register_sysctl_table(xpc_sys_dir); + + /* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ + ret = xpc_setup_rsvd_page(); + if (ret != 0) { + dev_err(xpc_part, "can't setup our reserved page\n"); + goto out_2; + } + + /* add ourselves to the reboot_notifier_list */ + ret = register_reboot_notifier(&xpc_reboot_notifier); + if (ret != 0) + dev_warn(xpc_part, "can't register reboot notifier\n"); + + /* add ourselves to the die_notifier list */ + ret = register_die_notifier(&xpc_die_notifier); + if (ret != 0) + dev_warn(xpc_part, "can't register die notifier\n"); + + /* + * The real work-horse behind xpc. This processes incoming + * interrupts and monitors remote heartbeats. + */ + kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME); + if (IS_ERR(kthread)) { + dev_err(xpc_part, "failed while forking hb check thread\n"); + ret = -EBUSY; + goto out_3; + } + + /* + * Startup a thread that will attempt to discover other partitions to + * activate based on info provided by SAL. This new thread is short + * lived and will exit once discovery is complete. + */ + kthread = kthread_run(xpc_initiate_discovery, NULL, + XPC_DISCOVERY_THREAD_NAME); + if (IS_ERR(kthread)) { + dev_err(xpc_part, "failed while forking discovery thread\n"); + + /* mark this new thread as a non-starter */ + complete(&xpc_discovery_exited); + + xpc_do_exit(xpUnloading); + return -EBUSY; + } + + /* set the interface to point at XPC's functions */ + xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, + xpc_initiate_send, xpc_initiate_send_notify, + xpc_initiate_received, xpc_initiate_partid_to_nasids); + + return 0; + + /* initialization was not successful */ +out_3: + xpc_teardown_rsvd_page(); + + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); +out_2: + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); +out_1: + if (is_shub()) + xpc_exit_sn2(); + else if (is_uv()) + xpc_exit_uv(); + return ret; +} + +module_init(xpc_init); + +void __exit +xpc_exit(void) +{ + xpc_do_exit(xpUnloading); +} + +module_exit(xpc_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); +MODULE_LICENSE("GPL"); + +module_param(xpc_hb_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " + "heartbeat increments."); + +module_param(xpc_hb_check_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " + "heartbeat checks."); + +module_param(xpc_disengage_timelimit, int, 0); +MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait " + "for disengage to complete."); + +module_param(xpc_kdebug_ignore, int, 0); +MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " + "other partitions when dropping into kdebug."); diff --git a/kernel/drivers/misc/sgi-xp/xpc_partition.c b/kernel/drivers/misc/sgi-xp/xpc_partition.c new file mode 100644 index 000000000..6956f7e7d --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpc_partition.c @@ -0,0 +1,541 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) partition support. + * + * This is the part of XPC that detects the presence/absence of + * other partitions. It provides a heartbeat and monitors the + * heartbeats of other partitions. + * + */ + +#include <linux/device.h> +#include <linux/hardirq.h> +#include <linux/slab.h> +#include "xpc.h" +#include <asm/uv/uv_hub.h> + +/* XPC is exiting flag */ +int xpc_exiting; + +/* this partition's reserved page pointers */ +struct xpc_rsvd_page *xpc_rsvd_page; +static unsigned long *xpc_part_nasids; +unsigned long *xpc_mach_nasids; + +static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ +int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ + +struct xpc_partition *xpc_partitions; + +/* + * Guarantee that the kmalloc'd memory is cacheline aligned. + */ +void * +xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kmalloc will give us cachline aligned memory by default */ + *base = kmalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kmalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Given a nasid, get the physical address of the partition's reserved page + * for that nasid. This function returns 0 on any error. + */ +static unsigned long +xpc_get_rsvd_page_pa(int nasid) +{ + enum xp_retval ret; + u64 cookie = 0; + unsigned long rp_pa = nasid; /* seed with nasid */ + size_t len = 0; + size_t buf_len = 0; + void *buf = buf; + void *buf_base = NULL; + enum xp_retval (*get_partition_rsvd_page_pa) + (void *, u64 *, unsigned long *, size_t *) = + xpc_arch_ops.get_partition_rsvd_page_pa; + + while (1) { + + /* !!! rp_pa will need to be _gpa on UV. + * ??? So do we save it into the architecture specific parts + * ??? of the xpc_partition structure? Do we rename this + * ??? function or have two versions? Rename rp_pa for UV to + * ??? rp_gpa? + */ + ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len); + + dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " + "address=0x%016lx, len=0x%016lx\n", ret, + (unsigned long)cookie, rp_pa, len); + + if (ret != xpNeedMoreInfo) + break; + + /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ + if (is_shub()) + len = L1_CACHE_ALIGN(len); + + if (len > buf_len) { + if (buf_base != NULL) + kfree(buf_base); + buf_len = L1_CACHE_ALIGN(len); + buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, + &buf_base); + if (buf_base == NULL) { + dev_err(xpc_part, "unable to kmalloc " + "len=0x%016lx\n", buf_len); + ret = xpNoMemory; + break; + } + } + + ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); + break; + } + } + + kfree(buf_base); + + if (ret != xpSuccess) + rp_pa = 0; + + dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); + return rp_pa; +} + +/* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ +int +xpc_setup_rsvd_page(void) +{ + int ret; + struct xpc_rsvd_page *rp; + unsigned long rp_pa; + unsigned long new_ts_jiffies; + + /* get the local reserved page's address */ + + preempt_disable(); + rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); + preempt_enable(); + if (rp_pa == 0) { + dev_err(xpc_part, "SAL failed to locate the reserved page\n"); + return -ESRCH; + } + rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa)); + + if (rp->SAL_version < 3) { + /* SAL_versions < 3 had a SAL_partid defined as a u8 */ + rp->SAL_partid &= 0xff; + } + BUG_ON(rp->SAL_partid != xp_partition_id); + + if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { + dev_err(xpc_part, "the reserved page's partid of %d is outside " + "supported range (< 0 || >= %d)\n", rp->SAL_partid, + xp_max_npartitions); + return -EINVAL; + } + + rp->version = XPC_RP_VERSION; + rp->max_npartitions = xp_max_npartitions; + + /* establish the actual sizes of the nasid masks */ + if (rp->SAL_version == 1) { + /* SAL_version 1 didn't set the nasids_size field */ + rp->SAL_nasids_size = 128; + } + xpc_nasid_mask_nbytes = rp->SAL_nasids_size; + xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * + BITS_PER_BYTE); + + /* setup the pointers to the various items in the reserved page */ + xpc_part_nasids = XPC_RP_PART_NASIDS(rp); + xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); + + ret = xpc_arch_ops.setup_rsvd_page(rp); + if (ret != 0) + return ret; + + /* + * Set timestamp of when reserved page was setup by XPC. + * This signifies to the remote partition that our reserved + * page is initialized. + */ + new_ts_jiffies = jiffies; + if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) + new_ts_jiffies++; + rp->ts_jiffies = new_ts_jiffies; + + xpc_rsvd_page = rp; + return 0; +} + +void +xpc_teardown_rsvd_page(void) +{ + /* a zero timestamp indicates our rsvd page is not initialized */ + xpc_rsvd_page->ts_jiffies = 0; +} + +/* + * Get a copy of a portion of the remote partition's rsvd page. + * + * remote_rp points to a buffer that is cacheline aligned for BTE copies and + * is large enough to contain a copy of their reserved page header and + * part_nasids mask. + */ +enum xp_retval +xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, + struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) +{ + int l; + enum xp_retval ret; + + /* get the reserved page's physical address */ + + *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); + if (*remote_rp_pa == 0) + return xpNoRsvdPageAddr; + + /* pull over the reserved page header and part_nasids mask */ + ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, + XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); + if (ret != xpSuccess) + return ret; + + if (discovered_nasids != NULL) { + unsigned long *remote_part_nasids = + XPC_RP_PART_NASIDS(remote_rp); + + for (l = 0; l < xpc_nasid_mask_nlongs; l++) + discovered_nasids[l] |= remote_part_nasids[l]; + } + + /* zero timestamp indicates the reserved page has not been setup */ + if (remote_rp->ts_jiffies == 0) + return xpRsvdPageNotSet; + + if (XPC_VERSION_MAJOR(remote_rp->version) != + XPC_VERSION_MAJOR(XPC_RP_VERSION)) { + return xpBadVersion; + } + + /* check that both remote and local partids are valid for each side */ + if (remote_rp->SAL_partid < 0 || + remote_rp->SAL_partid >= xp_max_npartitions || + remote_rp->max_npartitions <= xp_partition_id) { + return xpInvalidPartid; + } + + if (remote_rp->SAL_partid == xp_partition_id) + return xpLocalPartid; + + return xpSuccess; +} + +/* + * See if the other side has responded to a partition deactivate request + * from us. Though we requested the remote partition to deactivate with regard + * to us, we really only need to wait for the other side to disengage from us. + */ +int +xpc_partition_disengaged(struct xpc_partition *part) +{ + short partid = XPC_PARTID(part); + int disengaged; + + disengaged = !xpc_arch_ops.partition_engaged(partid); + if (part->disengage_timeout) { + if (!disengaged) { + if (time_is_after_jiffies(part->disengage_timeout)) { + /* timelimit hasn't been reached yet */ + return 0; + } + + /* + * Other side hasn't responded to our deactivate + * request in a timely fashion, so assume it's dead. + */ + + dev_info(xpc_part, "deactivate request to remote " + "partition %d timed out\n", partid); + xpc_disengage_timedout = 1; + xpc_arch_ops.assume_partition_disengaged(partid); + disengaged = 1; + } + part->disengage_timeout = 0; + + /* cancel the timer function, provided it's not us */ + if (!in_interrupt()) + del_singleshot_timer_sync(&part->disengage_timer); + + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && + part->act_state != XPC_P_AS_INACTIVE); + if (part->act_state != XPC_P_AS_INACTIVE) + xpc_wakeup_channel_mgr(part); + + xpc_arch_ops.cancel_partition_deactivation_request(part); + } + return disengaged; +} + +/* + * Mark specified partition as active. + */ +enum xp_retval +xpc_mark_partition_active(struct xpc_partition *part) +{ + unsigned long irq_flags; + enum xp_retval ret; + + dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + if (part->act_state == XPC_P_AS_ACTIVATING) { + part->act_state = XPC_P_AS_ACTIVE; + ret = xpSuccess; + } else { + DBUG_ON(part->reason == xpSuccess); + ret = part->reason; + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + return ret; +} + +/* + * Start the process of deactivating the specified partition. + */ +void +xpc_deactivate_partition(const int line, struct xpc_partition *part, + enum xp_retval reason) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_AS_INACTIVE) { + XPC_SET_REASON(part, reason, line); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + if (reason == xpReactivating) { + /* we interrupt ourselves to reactivate partition */ + xpc_arch_ops.request_partition_reactivation(part); + } + return; + } + if (part->act_state == XPC_P_AS_DEACTIVATING) { + if ((part->reason == xpUnloading && reason != xpUnloading) || + reason == xpReactivating) { + XPC_SET_REASON(part, reason, line); + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + return; + } + + part->act_state = XPC_P_AS_DEACTIVATING; + XPC_SET_REASON(part, reason, line); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + /* ask remote partition to deactivate with regard to us */ + xpc_arch_ops.request_partition_deactivation(part); + + /* set a timelimit on the disengage phase of the deactivation request */ + part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); + part->disengage_timer.expires = part->disengage_timeout; + add_timer(&part->disengage_timer); + + dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", + XPC_PARTID(part), reason); + + xpc_partition_going_down(part, reason); +} + +/* + * Mark specified partition as inactive. + */ +void +xpc_mark_partition_inactive(struct xpc_partition *part) +{ + unsigned long irq_flags; + + dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", + XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_AS_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; +} + +/* + * SAL has provided a partition and machine mask. The partition mask + * contains a bit for each even nasid in our partition. The machine + * mask contains a bit for each even nasid in the entire machine. + * + * Using those two bit arrays, we can determine which nasids are + * known in the machine. Each should also have a reserved page + * initialized if they are available for partitioning. + */ +void +xpc_discovery(void) +{ + void *remote_rp_base; + struct xpc_rsvd_page *remote_rp; + unsigned long remote_rp_pa; + int region; + int region_size; + int max_regions; + int nasid; + struct xpc_rsvd_page *rp; + unsigned long *discovered_nasids; + enum xp_retval ret; + + remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + + xpc_nasid_mask_nbytes, + GFP_KERNEL, &remote_rp_base); + if (remote_rp == NULL) + return; + + discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs, + GFP_KERNEL); + if (discovered_nasids == NULL) { + kfree(remote_rp_base); + return; + } + + rp = (struct xpc_rsvd_page *)xpc_rsvd_page; + + /* + * The term 'region' in this context refers to the minimum number of + * nodes that can comprise an access protection grouping. The access + * protection is in regards to memory, IOI and IPI. + */ + region_size = xp_region_size; + + if (is_uv()) + max_regions = 256; + else { + max_regions = 64; + + switch (region_size) { + case 128: + max_regions *= 2; + case 64: + max_regions *= 2; + case 32: + max_regions *= 2; + region_size = 16; + DBUG_ON(!is_shub2()); + } + } + + for (region = 0; region < max_regions; region++) { + + if (xpc_exiting) + break; + + dev_dbg(xpc_part, "searching region %d\n", region); + + for (nasid = (region * region_size * 2); + nasid < ((region + 1) * region_size * 2); nasid += 2) { + + if (xpc_exiting) + break; + + dev_dbg(xpc_part, "checking nasid %d\n", nasid); + + if (test_bit(nasid / 2, xpc_part_nasids)) { + dev_dbg(xpc_part, "PROM indicates Nasid %d is " + "part of the local partition; skipping " + "region\n", nasid); + break; + } + + if (!(test_bit(nasid / 2, xpc_mach_nasids))) { + dev_dbg(xpc_part, "PROM indicates Nasid %d was " + "not on Numa-Link network at reset\n", + nasid); + continue; + } + + if (test_bit(nasid / 2, discovered_nasids)) { + dev_dbg(xpc_part, "Nasid %d is part of a " + "partition which was previously " + "discovered\n", nasid); + continue; + } + + /* pull over the rsvd page header & part_nasids mask */ + + ret = xpc_get_remote_rp(nasid, discovered_nasids, + remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "unable to get reserved page " + "from nasid %d, reason=%d\n", nasid, + ret); + + if (ret == xpLocalPartid) + break; + + continue; + } + + xpc_arch_ops.request_partition_activation(remote_rp, + remote_rp_pa, nasid); + } + } + + kfree(discovered_nasids); + kfree(remote_rp_base); +} + +/* + * Given a partid, get the nasids owned by that partition from the + * remote partition's reserved page. + */ +enum xp_retval +xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) +{ + struct xpc_partition *part; + unsigned long part_nasid_pa; + + part = &xpc_partitions[partid]; + if (part->remote_rp_pa == 0) + return xpPartitionDown; + + memset(nasid_mask, 0, xpc_nasid_mask_nbytes); + + part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); + + return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, + xpc_nasid_mask_nbytes); +} diff --git a/kernel/drivers/misc/sgi-xp/xpc_sn2.c b/kernel/drivers/misc/sgi-xp/xpc_sn2.c new file mode 100644 index 000000000..7d71c04fc --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpc_sn2.c @@ -0,0 +1,2462 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) sn2-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/delay.h> +#include <linux/slab.h> +#include <asm/uncached.h> +#include <asm/sn/mspec.h> +#include <asm/sn/sn_sal.h> +#include "xpc.h" + +/* + * Define the number of u64s required to represent all the C-brick nasids + * as a bitmap. The cross-partition kernel modules deal only with + * C-brick nasids, thus the need for bitmaps which don't account for + * odd-numbered (non C-brick) nasids. + */ +#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2) +#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8) +#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64) + +/* + * Memory for XPC's amo variables is allocated by the MSPEC driver. These + * pages are located in the lowest granule. The lowest granule uses 4k pages + * for cached references and an alternate TLB handler to never provide a + * cacheable mapping for the entire region. This will prevent speculative + * reading of cached copies of our lines from being issued which will cause + * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 + * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of + * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify + * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote + * partitions (i.e., XPCs) consider themselves currently engaged with the + * local XPC and 1 amo variable to request partition deactivation. + */ +#define XPC_NOTIFY_IRQ_AMOS_SN2 0 +#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \ + XP_MAX_NPARTITIONS_SN2) +#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \ + XP_NASID_MASK_WORDS_SN2) +#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1) + +/* + * Buffer used to store a local copy of portions of a remote partition's + * reserved page (either its header and part_nasids mask, or its vars). + */ +static void *xpc_remote_copy_buffer_base_sn2; +static char *xpc_remote_copy_buffer_sn2; + +static struct xpc_vars_sn2 *xpc_vars_sn2; +static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; + +static int +xpc_setup_partitions_sn2(void) +{ + /* nothing needs to be done */ + return 0; +} + +static void +xpc_teardown_partitions_sn2(void) +{ + /* nothing needs to be done */ +} + +/* SH_IPI_ACCESS shub register value on startup */ +static u64 xpc_sh1_IPI_access_sn2; +static u64 xpc_sh2_IPI_access0_sn2; +static u64 xpc_sh2_IPI_access1_sn2; +static u64 xpc_sh2_IPI_access2_sn2; +static u64 xpc_sh2_IPI_access3_sn2; + +/* + * Change protections to allow IPI operations. + */ +static void +xpc_allow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + xpc_sh2_IPI_access0_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); + xpc_sh2_IPI_access1_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); + xpc_sh2_IPI_access2_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); + xpc_sh2_IPI_access3_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + -1UL); + } + } else { + xpc_sh1_IPI_access_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + -1UL); + } + } +} + +/* + * Restrict protections to disallow IPI operations. + */ +static void +xpc_disallow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + xpc_sh2_IPI_access0_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + xpc_sh2_IPI_access1_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + xpc_sh2_IPI_access2_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + xpc_sh2_IPI_access3_sn2); + } + } else { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + xpc_sh1_IPI_access_sn2); + } + } +} + +/* + * The following set of functions are used for the sending and receiving of + * IRQs (also known as IPIs). There are two flavors of IRQs, one that is + * associated with partition activity (SGI_XPC_ACTIVATE) and the other that + * is associated with channel activity (SGI_XPC_NOTIFY). + */ + +static u64 +xpc_receive_IRQ_amo_sn2(struct amo *amo) +{ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR); +} + +static enum xp_retval +xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid, + int vector) +{ + int ret = 0; + unsigned long irq_flags; + + local_irq_save(irq_flags); + + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag); + sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + return (ret == 0) ? xpSuccess : xpPioReadError; +} + +static struct amo * +xpc_init_IRQ_amo_sn2(int index) +{ + struct amo *amo = xpc_vars_sn2->amos_page + index; + + (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */ + return amo; +} + +/* + * Functions associated with SGI_XPC_ACTIVATE IRQ. + */ + +/* + * Notify the heartbeat check thread that an activate IRQ has been received. + */ +static irqreturn_t +xpc_handle_activate_IRQ_sn2(int irq, void *dev_id) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return IRQ_HANDLED; +} + +/* + * Flag the appropriate amo variable and send an IRQ to the specified node. + */ +static void +xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid, + int to_nasid, int to_phys_cpuid) +{ + struct amo *amos = (struct amo *)__va(amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)], + BIT_MASK(from_nasid / 2), to_nasid, + to_phys_cpuid, SGI_XPC_ACTIVATE); +} + +static void +xpc_send_local_activate_IRQ_sn2(int from_nasid) +{ + unsigned long irq_flags; + struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + /* fake the sending and receipt of an activate IRQ from remote nasid */ + FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable), + FETCHOP_OR, BIT_MASK(from_nasid / 2)); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +/* + * Functions associated with SGI_XPC_NOTIFY IRQ. + */ + +/* + * Check to see if any chctl flags were sent from the specified partition. + */ +static void +xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part) +{ + union xpc_channel_ctl_flags chctl; + unsigned long irq_flags; + + chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2. + local_chctl_amo_va); + if (chctl.all_flags == 0) + return; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.all_flags |= chctl.all_flags; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags=" + "0x%llx\n", XPC_PARTID(part), chctl.all_flags); + + xpc_wakeup_channel_mgr(part); +} + +/* + * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified + * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more + * than one partition, we use an amo structure per partition to indicate + * whether a partition has sent an IRQ or not. If it has, then wake up the + * associated kthread to handle it. + * + * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC + * running on other partitions. + * + * Noteworthy Arguments: + * + * irq - Interrupt ReQuest number. NOT USED. + * + * dev_id - partid of IRQ's potential sender. + */ +static irqreturn_t +xpc_handle_notify_IRQ_sn2(int irq, void *dev_id) +{ + short partid = (short)(u64)dev_id; + struct xpc_partition *part = &xpc_partitions[partid]; + + DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2); + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + xpc_part_deref(part); + } + return IRQ_HANDLED; +} + +/* + * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor + * because the write to their associated amo variable completed after the IRQ + * was received. + */ +static void +xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + part_sn2->dropped_notify_IRQ_timer.expires = jiffies + + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(&part_sn2->dropped_notify_IRQ_timer); + xpc_part_deref(part); + } +} + +/* + * Send a notify IRQ to the remote partition that is associated with the + * specified channel. + */ +static void +xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + union xpc_channel_ctl_flags chctl = { 0 }; + enum xp_retval ret; + + if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) { + chctl.flags[ch->number] = chctl_flag; + ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va, + chctl.all_flags, + part_sn2->notify_IRQ_nasid, + part_sn2->notify_IRQ_phys_cpuid, + SGI_XPC_NOTIFY); + dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", + chctl_flag_string, ch->partid, ch->number, ret); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + XPC_DEACTIVATE_PARTITION(part, ret); + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } + } +} + +#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \ + xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f) + +/* + * Make it look like the remote partition, which is associated with the + * specified channel, sent us a notify IRQ. This faked IRQ will be handled + * by xpc_check_for_dropped_notify_IRQ_sn2(). + */ +static void +xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + union xpc_channel_ctl_flags chctl = { 0 }; + + chctl.flags[ch->number] = chctl_flag; + FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va-> + variable), FETCHOP_OR, chctl.all_flags); + dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", + chctl_flag_string, ch->partid, ch->number); +} + +#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \ + xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f) + +static void +xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->reason = ch->reason; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags); +} + +static void +xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags); +} + +static void +xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->entry_size = ch->entry_size; + args->local_nentries = ch->local_nentries; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags); +} + +static void +xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->remote_nentries = ch->remote_nentries; + args->local_nentries = ch->local_nentries; + args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue); + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags); +} + +static void +xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags); +} + +static void +xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL); +} + +static void +xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST); +} + +static enum xp_retval +xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch, + unsigned long msgqueue_pa) +{ + ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa; + return xpSuccess; +} + +/* + * This next set of functions are used to keep track of when a partition is + * potentially engaged in accessing memory belonging to another partition. + */ + +static void +xpc_indicate_partition_engaged_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static void +xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've cleared our + * bit in their engaged partitions amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_assume_partition_disengaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* clear bit(s) based on partid mask in our partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(partid)); +} + +static int +xpc_partition_engaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +static int +xpc_any_partition_engaged_sn2(void) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable */ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0; +} + +/* original protection values for each node */ +static u64 xpc_prot_vec_sn2[MAX_NUMNODES]; + +/* + * Change protections to allow amo operations on non-Shub 1.1 systems. + */ +static enum xp_retval +xpc_allow_amo_ops_sn2(struct amo *amos_page) +{ + enum xp_retval ret = xpSuccess; + + /* + * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST + * collides with memory operations. On those systems we call + * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. + */ + if (!enable_shub_wars_1_1()) + ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE); + + return ret; +} + +/* + * Change protections to allow amo operations on Shub 1.1 systems. + */ +static void +xpc_allow_amo_ops_shub_wars_1_1_sn2(void) +{ + int node; + int nasid; + + if (!enable_shub_wars_1_1()) + return; + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + /* save current protection values */ + xpc_prot_vec_sn2[node] = + (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0)); + /* open up everything */ + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + -1UL); + } +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + + status = sn_partition_reserved_page_pa((u64)buf, cookie, + (u64 *)rp_pa, (u64 *)len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + + return ret; +} + + +static int +xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) +{ + struct amo *amos_page; + int i; + int ret; + + xpc_vars_sn2 = XPC_RP_VARS(rp); + + rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2); + + /* vars_part array follows immediately after vars */ + xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + + XPC_RP_VARS_SIZE); + + /* + * Before clearing xpc_vars_sn2, see if a page of amos had been + * previously allocated. If not we'll need to allocate one and set + * permissions so that cross-partition amos are allowed. + * + * The allocated amo page needs MCA reporting to remain disabled after + * XPC has unloaded. To make this work, we keep a copy of the pointer + * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure, + * which is pointed to by the reserved page, and re-use that saved copy + * on subsequent loads of XPC. This amo page is never freed, and its + * memory protections are never restricted. + */ + amos_page = xpc_vars_sn2->amos_page; + if (amos_page == NULL) { + amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1)); + if (amos_page == NULL) { + dev_err(xpc_part, "can't allocate page of amos\n"); + return -ENOMEM; + } + + /* + * Open up amo-R/W to cpu. This is done on Shub 1.1 systems + * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called. + */ + ret = xpc_allow_amo_ops_sn2(amos_page); + if (ret != xpSuccess) { + dev_err(xpc_part, "can't allow amo operations\n"); + uncached_free_page(__IA64_UNCACHED_OFFSET | + TO_PHYS((u64)amos_page), 1); + return -EPERM; + } + } + + /* clear xpc_vars_sn2 */ + memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2)); + + xpc_vars_sn2->version = XPC_V_VERSION; + xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0); + xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0); + xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2); + xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page); + xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */ + + /* clear xpc_vars_part_sn2 */ + memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) * + XP_MAX_NPARTITIONS_SN2); + + /* initialize the activate IRQ related amo variables */ + for (i = 0; i < xpc_nasid_mask_nlongs; i++) + (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i); + + /* initialize the engaged remote partitions related amo variables */ + (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2); + (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2); + + return 0; +} + +static int +xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask) +{ + return test_bit(partid, heartbeating_to_mask); +} + +static void +xpc_allow_hb_sn2(short partid) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + set_bit(partid, xpc_vars_sn2->heartbeating_to_mask); +} + +static void +xpc_disallow_hb_sn2(short partid) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask); +} + +static void +xpc_disallow_all_hbs_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions); +} + +static void +xpc_increment_heartbeat_sn2(void) +{ + xpc_vars_sn2->heartbeat++; +} + +static void +xpc_offline_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 1; +} + +static void +xpc_online_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 0; +} + +static void +xpc_heartbeat_init_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + xpc_online_heartbeat_sn2(); +} + +static void +xpc_heartbeat_exit_sn2(void) +{ + xpc_offline_heartbeat_sn2(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_sn2(struct xpc_partition *part) +{ + struct xpc_vars_sn2 *remote_vars; + enum xp_retval ret; + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + /* pull the remote vars structure that contains the heartbeat */ + ret = xp_remote_memcpy(xp_pa(remote_vars), + part->sn.sn2.remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, " + "heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part), + remote_vars->heartbeat, part->last_heartbeat, + remote_vars->heartbeat_offline, + remote_vars->heartbeating_to_mask[0]); + + if ((remote_vars->heartbeat == part->last_heartbeat && + !remote_vars->heartbeat_offline) || + !xpc_hb_allowed_sn2(sn_partition_id, + remote_vars->heartbeating_to_mask)) { + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = remote_vars->heartbeat; + } + + return ret; +} + +/* + * Get a copy of the remote partition's XPC variables from the reserved page. + * + * remote_vars points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_RP_VARS_SIZE. + */ +static enum xp_retval +xpc_get_remote_vars_sn2(unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + enum xp_retval ret; + + if (remote_vars_pa == 0) + return xpVarsNotSet; + + /* pull over the cross partition variables */ + ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + if (XPC_VERSION_MAJOR(remote_vars->version) != + XPC_VERSION_MAJOR(XPC_V_VERSION)) { + return xpBadVersion; + } + + return xpSuccess; +} + +static void +xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, int nasid) +{ + xpc_send_local_activate_IRQ_sn2(nasid); +} + +static void +xpc_request_partition_reactivation_sn2(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid); +} + +static void +xpc_request_partition_deactivation_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've set our + * bit in their deactivate request amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static int +xpc_partition_deactivation_requested_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_DEACTIVATE_REQUEST_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +/* + * Update the remote partition's info. + */ +static void +xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version, + unsigned long *remote_rp_ts_jiffies, + unsigned long remote_rp_pa, + unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + part->remote_rp_version = remote_rp_version; + dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", + part->remote_rp_version); + + part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies; + dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n", + part->remote_rp_ts_jiffies); + + part->remote_rp_pa = remote_rp_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); + + part_sn2->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part_sn2->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat - 1; + dev_dbg(xpc_part, " last_heartbeat = 0x%016llx\n", + part->last_heartbeat); + + part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part_sn2->remote_vars_part_pa); + + part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid; + dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n", + part_sn2->activate_IRQ_nasid); + + part_sn2->activate_IRQ_phys_cpuid = + remote_vars->activate_IRQ_phys_cpuid; + dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n", + part_sn2->activate_IRQ_phys_cpuid); + + part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part_sn2->remote_amos_page_pa); + + part_sn2->remote_vars_version = remote_vars->version; + dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", + part_sn2->remote_vars_version); +} + +/* + * Prior code has determined the nasid which generated a activate IRQ. + * Inspect that nasid to determine if its partition needs to be activated + * or deactivated. + * + * A partition is considered "awaiting activation" if our partition + * flags indicate it is not active and it has a heartbeat. A + * partition is considered "awaiting deactivation" if our partition + * flags indicate it is active but it has no heartbeat or it is not + * sending its heartbeat to us. + * + * To determine the heartbeat, the remote nasid must have a properly + * initialized reserved page. + */ +static void +xpc_identify_activate_IRQ_req_sn2(int nasid) +{ + struct xpc_rsvd_page *remote_rp; + struct xpc_vars_sn2 *remote_vars; + unsigned long remote_rp_pa; + unsigned long remote_vars_pa; + int remote_rp_version; + int reactivate = 0; + unsigned long remote_rp_ts_jiffies = 0; + short partid; + struct xpc_partition *part; + struct xpc_partition_sn2 *part_sn2; + enum xp_retval ret; + + /* pull over the reserved page structure */ + + remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get reserved page from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + return; + } + + remote_vars_pa = remote_rp->sn.sn2.vars_pa; + remote_rp_version = remote_rp->version; + remote_rp_ts_jiffies = remote_rp->ts_jiffies; + + partid = remote_rp->SAL_partid; + part = &xpc_partitions[partid]; + part_sn2 = &part->sn.sn2; + + /* pull over the cross partition variables */ + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + return; + } + + part->activate_IRQ_rcvd++; + + dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " + "%lld:0x%lx\n", (int)nasid, (int)partid, + part->activate_IRQ_rcvd, + remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]); + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + + if (xpc_partition_deactivation_requested_sn2(partid)) { + /* + * Other side is waiting on us to deactivate even though + * we already have. + */ + return; + } + + xpc_activate_partition(part); + return; + } + + DBUG_ON(part->remote_rp_version == 0); + DBUG_ON(part_sn2->remote_vars_version == 0); + + if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) { + + /* the other side rebooted */ + + DBUG_ON(xpc_partition_engaged_sn2(partid)); + DBUG_ON(xpc_partition_deactivation_requested_sn2(partid)); + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + reactivate = 1; + } + + if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) { + /* still waiting on other side to disengage from us */ + return; + } + + if (reactivate) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + else if (xpc_partition_deactivation_requested_sn2(partid)) + XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); +} + +/* + * Loop through the activation amo variables and process any bits + * which are set. Each bit indicates a nasid sending a partition + * activation or deactivation request. + * + * Return #of IRQs detected. + */ +int +xpc_identify_activate_IRQ_sender_sn2(void) +{ + int l; + int b; + unsigned long nasid_mask_long; + u64 nasid; /* remote nasid */ + int n_IRQs_detected = 0; + struct amo *act_amos; + + act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2; + + /* scan through activate amo variables looking for non-zero entries */ + for (l = 0; l < xpc_nasid_mask_nlongs; l++) { + + if (xpc_exiting) + break; + + nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]); + + b = find_first_bit(&nasid_mask_long, BITS_PER_LONG); + if (b >= BITS_PER_LONG) { + /* no IRQs from nasids in this amo variable */ + continue; + } + + dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l, + nasid_mask_long); + + /* + * If this nasid has been added to the machine since + * our partition was reset, this will retain the + * remote nasid in our reserved pages machine mask. + * This is used in the event of module reload. + */ + xpc_mach_nasids[l] |= nasid_mask_long; + + /* locate the nasid(s) which sent interrupts */ + + do { + n_IRQs_detected++; + nasid = (l * BITS_PER_LONG + b) * 2; + dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid); + xpc_identify_activate_IRQ_req_sn2(nasid); + + b = find_next_bit(&nasid_mask_long, BITS_PER_LONG, + b + 1); + } while (b < BITS_PER_LONG); + } + return n_IRQs_detected; +} + +static void +xpc_process_activate_IRQ_rcvd_sn2(void) +{ + unsigned long irq_flags; + int n_IRQs_expected; + int n_IRQs_detected; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + n_IRQs_expected = xpc_activate_IRQ_rcvd; + xpc_activate_IRQ_rcvd = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2(); + if (n_IRQs_detected < n_IRQs_expected) { + /* retry once to help avoid missing amo */ + (void)xpc_identify_activate_IRQ_sender_sn2(); + } +} + +/* + * Setup the channel structures that are sn2 specific. + */ +static enum xp_retval +xpc_setup_ch_structures_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + struct xpc_channel_sn2 *ch_sn2; + enum xp_retval retval; + int ret; + int cpuid; + int ch_number; + struct timer_list *timer; + short partid = XPC_PARTID(part); + + /* allocate all the required GET/PUT values */ + + part_sn2->local_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->local_GPs_base); + if (part_sn2->local_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for local get/put " + "values\n"); + return xpNoMemory; + } + + part_sn2->remote_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->remote_GPs_base); + if (part_sn2->remote_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for remote get/put " + "values\n"); + retval = xpNoMemory; + goto out_1; + } + + part_sn2->remote_GPs_pa = 0; + + /* allocate all the required open and close args */ + + part_sn2->local_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part_sn2-> + local_openclose_args_base); + if (part_sn2->local_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for local connect args\n"); + retval = xpNoMemory; + goto out_2; + } + + part_sn2->remote_openclose_args_pa = 0; + + part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid); + + part_sn2->notify_IRQ_nasid = 0; + part_sn2->notify_IRQ_phys_cpuid = 0; + part_sn2->remote_chctl_amo_va = NULL; + + sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid); + ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2, + IRQF_SHARED, part_sn2->notify_IRQ_owner, + (void *)(u64)partid); + if (ret != 0) { + dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " + "errno=%d\n", -ret); + retval = xpLackOfResources; + goto out_3; + } + + /* Setup a timer to check for dropped notify IRQs */ + timer = &part_sn2->dropped_notify_IRQ_timer; + init_timer(timer); + timer->function = + (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2; + timer->data = (unsigned long)part; + timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(timer); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_sn2 = &part->channels[ch_number].sn.sn2; + + ch_sn2->local_GP = &part_sn2->local_GPs[ch_number]; + ch_sn2->local_openclose_args = + &part_sn2->local_openclose_args[ch_number]; + + mutex_init(&ch_sn2->msg_to_pull_mutex); + } + + /* + * Setup the per partition specific variables required by the + * remote partition to establish channel connections with us. + * + * The setting of the magic # indicates that these per partition + * specific variables are ready to be used. + */ + xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs); + xpc_vars_part_sn2[partid].openclose_args_pa = + xp_pa(part_sn2->local_openclose_args); + xpc_vars_part_sn2[partid].chctl_amo_pa = + xp_pa(part_sn2->local_chctl_amo_va); + cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ + xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid); + xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid = + cpu_physical_id(cpuid); + xpc_vars_part_sn2[partid].nchannels = part->nchannels; + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2; + + return xpSuccess; + + /* setup of ch structures failed */ +out_3: + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; +out_2: + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; +out_1: + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + return retval; +} + +/* + * Teardown the channel structures that are sn2 specific. + */ +static void +xpc_teardown_ch_structures_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + short partid = XPC_PARTID(part); + + /* + * Indicate that the variables specific to the remote partition are no + * longer available for its use. + */ + xpc_vars_part_sn2[partid].magic = 0; + + /* in case we've still got outstanding timers registered... */ + del_timer_sync(&part_sn2->dropped_notify_IRQ_timer); + free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid); + + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + part_sn2->local_chctl_amo_va = NULL; +} + +/* + * Create a wrapper that hides the underlying mechanism for pulling a cacheline + * (or multiple cachelines) from a remote partition. + * + * src_pa must be a cacheline aligned physical address on the remote partition. + * dst must be a cacheline aligned virtual address on this partition. + * cnt must be cacheline sized + */ +/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */ +static enum xp_retval +xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst, + const unsigned long src_pa, size_t cnt) +{ + enum xp_retval ret; + + DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa)); + DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst)); + DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + + ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed," + " ret=%d\n", XPC_PARTID(part), ret); + } + return ret; +} + +/* + * Pull the remote per partition specific variables from the specified + * partition. + */ +static enum xp_retval +xpc_pull_remote_vars_part_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + u8 buffer[L1_CACHE_BYTES * 2]; + struct xpc_vars_part_sn2 *pulled_entry_cacheline = + (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer); + struct xpc_vars_part_sn2 *pulled_entry; + unsigned long remote_entry_cacheline_pa; + unsigned long remote_entry_pa; + short partid = XPC_PARTID(part); + enum xp_retval ret; + + /* pull the cacheline that contains the variables we're interested in */ + + DBUG_ON(part_sn2->remote_vars_part_pa != + L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa)); + DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2); + + remote_entry_pa = part_sn2->remote_vars_part_pa + + sn_partition_id * sizeof(struct xpc_vars_part_sn2); + + remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); + + pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline + + (remote_entry_pa & + (L1_CACHE_BYTES - 1))); + + ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline, + remote_entry_cacheline_pa, + L1_CACHE_BYTES); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "failed to pull XPC vars_part from " + "partition %d, ret=%d\n", partid, ret); + return ret; + } + + /* see if they've been set up yet */ + + if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 && + pulled_entry->magic != XPC_VP_MAGIC2_SN2) { + + if (pulled_entry->magic != 0) { + dev_dbg(xpc_chan, "partition %d's XPC vars_part for " + "partition %d has bad magic value (=0x%llx)\n", + partid, sn_partition_id, pulled_entry->magic); + return xpBadMagic; + } + + /* they've not been initialized yet */ + return xpRetry; + } + + if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) { + + /* validate the variables */ + + if (pulled_entry->GPs_pa == 0 || + pulled_entry->openclose_args_pa == 0 || + pulled_entry->chctl_amo_pa == 0) { + + dev_err(xpc_chan, "partition %d's XPC vars_part for " + "partition %d are not valid\n", partid, + sn_partition_id); + return xpInvalidAddress; + } + + /* the variables we imported look to be valid */ + + part_sn2->remote_GPs_pa = pulled_entry->GPs_pa; + part_sn2->remote_openclose_args_pa = + pulled_entry->openclose_args_pa; + part_sn2->remote_chctl_amo_va = + (struct amo *)__va(pulled_entry->chctl_amo_pa); + part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid; + part_sn2->notify_IRQ_phys_cpuid = + pulled_entry->notify_IRQ_phys_cpuid; + + if (part->nchannels > pulled_entry->nchannels) + part->nchannels = pulled_entry->nchannels; + + /* let the other side know that we've pulled their variables */ + + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2; + } + + if (pulled_entry->magic == XPC_VP_MAGIC1_SN2) + return xpRetry; + + return xpSuccess; +} + +/* + * Establish first contact with the remote partititon. This involves pulling + * the XPC per partition variables from the remote partition and waiting for + * the remote partition to pull ours. + */ +static enum xp_retval +xpc_make_first_contact_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + enum xp_retval ret; + + /* + * Register the remote partition's amos with SAL so it can handle + * and cleanup errors within that address range should the remote + * partition go down. We don't unregister this range because it is + * difficult to tell when outstanding writes to the remote partition + * are finished and thus when it is safe to unregister. This should + * not result in wasted space in the SAL xp_addr_region table because + * we should get the same page for remote_amos_page_pa after module + * reloads and system reboots. + */ + if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_warn(xpc_part, "xpc_activating(%d) failed to register " + "xp_addr region\n", XPC_PARTID(part)); + + ret = xpPhysAddrRegFailed; + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + /* + * Send activate IRQ to get other side to activate if they've not + * already begun to do so. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); + + while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) { + if (ret != xpRetry) { + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +/* + * Get the chctl flags and pull the openclose args and/or remote GPs as needed. + */ +static u64 +xpc_get_chctl_all_flags_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + enum xp_retval ret; + + /* + * See if there are any chctl flags to be handled. + */ + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + if (xpc_any_openclose_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part-> + remote_openclose_args, + part_sn2-> + remote_openclose_args_pa, + XPC_OPENCLOSE_ARGS_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull openclose args from " + "partition %d, ret=%d\n", XPC_PARTID(part), + ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + if (xpc_any_msg_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs, + part_sn2->remote_GPs_pa, + XPC_GP_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull GPs from partition " + "%d, ret=%d\n", XPC_PARTID(part), ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + return chctl.all_flags; +} + +/* + * Allocate the local message queue and the notify queue. + */ +static enum xp_retval +xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->local_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, + &ch_sn2->local_msgqueue_base); + if (ch_sn2->local_msgqueue == NULL) + continue; + + nbytes = nentries * sizeof(struct xpc_notify_sn2); + ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL); + if (ch_sn2->notify_queue == NULL) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + continue; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->local_nentries, ch->partid, ch->number); + + ch->local_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for local message queue and notify " + "queue, partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate the cached remote message queue. + */ +static enum xp_retval +xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + DBUG_ON(ch->remote_nentries <= 0); + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->remote_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2-> + remote_msgqueue_base); + if (ch_sn2->remote_msgqueue == NULL) + continue; + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->remote_nentries, ch->partid, ch->number); + + ch->remote_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate message queues and other stuff associated with a channel. + * + * Note: Assumes all of the channel sizes are filled in. + */ +static enum xp_retval +xpc_setup_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + enum xp_retval ret; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ret = xpc_allocate_local_msgqueue_sn2(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_remote_msgqueue_sn2(ch); + if (ret != xpSuccess) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } + } + return ret; +} + +/* + * Free up message queues and other stuff that were allocated for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + ch_sn2->remote_msgqueue_pa = 0; + + ch_sn2->local_GP->get = 0; + ch_sn2->local_GP->put = 0; + ch_sn2->remote_GP.get = 0; + ch_sn2->remote_GP.put = 0; + ch_sn2->w_local_GP.get = 0; + ch_sn2->w_local_GP.put = 0; + ch_sn2->w_remote_GP.get = 0; + ch_sn2->w_remote_GP.put = 0; + ch_sn2->next_msg_to_pull = 0; + + if (ch->flags & XPC_C_SETUP) { + dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", + ch->flags, ch->partid, ch->number); + + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->remote_msgqueue_base); + ch_sn2->remote_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } +} + +/* + * Notify those who wanted to be notified upon delivery of their message. + */ +static void +xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put) +{ + struct xpc_notify_sn2 *notify; + u8 notify_type; + s64 get = ch->sn.sn2.w_remote_GP.get - 1; + + while (++get < put && atomic_read(&ch->n_to_notify) > 0) { + + notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries]; + + /* + * See if the notify entry indicates it was associated with + * a message who's sender wants to be notified. It is possible + * that it is, but someone else is doing or has done the + * notification. + */ + notify_type = notify->type; + if (notify_type == 0 || + cmpxchg(¬ify->type, notify_type, 0) != notify_type) { + continue; + } + + DBUG_ON(notify_type != XPC_N_CALL); + + atomic_dec(&ch->n_to_notify); + + if (notify->func != NULL) { + dev_dbg(xpc_chan, "notify->func() called, notify=0x%p " + "msg_number=%lld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + + notify->func(reason, ch->partid, ch->number, + notify->key); + + dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p" + " msg_number=%lld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + } + } +} + +static void +xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch) +{ + xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put); +} + +/* + * Clear some of the msg flags in the local message queue. + */ +static inline void +xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get; + + get = ch_sn2->w_remote_GP.get; + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (get % ch->local_nentries) * + ch->entry_size); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + msg->flags = 0; + } while (++get < ch_sn2->remote_GP.get); +} + +/* + * Clear some of the msg flags in the remote message queue. + */ +static inline void +xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put, remote_nentries = ch->remote_nentries; + + /* flags are zeroed when the buffer is allocated */ + if (ch_sn2->remote_GP.put < remote_nentries) + return; + + put = max(ch_sn2->w_remote_GP.put, remote_nentries); + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + (put % remote_nentries) * + ch->entry_size); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + DBUG_ON(!(msg->flags & XPC_M_SN2_DONE)); + DBUG_ON(msg->number != put - remote_nentries); + msg->flags = 0; + } while (++put < ch_sn2->remote_GP.put); +} + +static int +xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch) +{ + return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get; +} + +static void +xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + int npayloads_sent; + + ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number]; + + /* See what, if anything, has changed for each connected channel */ + + xpc_msgqueue_ref(ch); + + if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get && + ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) { + /* nothing changed since GPs were last pulled */ + xpc_msgqueue_deref(ch); + return; + } + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* + * First check to see if messages recently sent by us have been + * received by the other side. (The remote GET value will have + * changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) { + + /* + * We need to notify any senders that want to be notified + * that their sent messages have been received by their + * intended recipients. We need to do this before updating + * w_remote_GP.get so that we don't allocate the same message + * queue entries prematurely (see xpc_allocate_msg()). + */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* + * Notify senders that messages sent have been + * received and delivered by the other side. + */ + xpc_notify_senders_sn2(ch, xpMsgDelivered, + ch_sn2->remote_GP.get); + } + + /* + * Clear msg->flags in previously sent messages, so that + * they're ready for xpc_allocate_msg(). + */ + xpc_clear_local_msgqueue_flags_sn2(ch); + + ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get; + + dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid, + ch->number); + + /* + * If anyone was waiting for message queue entries to become + * available, wake them up. + */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + } + + /* + * Now check for newly sent messages by the other side. (The remote + * PUT value will have changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) { + /* + * Clear msg->flags in previously received messages, so that + * they're ready for xpc_get_deliverable_payload_sn2(). + */ + xpc_clear_remote_msgqueue_flags_sn2(ch); + + smp_wmb(); /* ensure flags have been cleared before bte_copy */ + ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put; + + dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid, + ch->number); + + npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch); + if (npayloads_sent > 0) { + dev_dbg(xpc_chan, "msgs waiting to be copied and " + "delivered=%d, partid=%d, channel=%d\n", + npayloads_sent, ch->partid, ch->number); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) + xpc_activate_kthreads(ch, npayloads_sent); + } + } + + xpc_msgqueue_deref(ch); +} + +static struct xpc_msg_sn2 * +xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long remote_msg_pa; + struct xpc_msg_sn2 *msg; + u32 msg_index; + u32 nmsgs; + u64 msg_offset; + enum xp_retval ret; + + if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) { + /* we were interrupted by a signal */ + return NULL; + } + + while (get >= ch_sn2->next_msg_to_pull) { + + /* pull as many messages as are ready and able to be pulled */ + + msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries; + + DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put); + nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull; + if (msg_index + nmsgs > ch->remote_nentries) { + /* ignore the ones that wrap the msg queue for now */ + nmsgs = ch->remote_nentries - msg_index; + } + + msg_offset = msg_index * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + msg_offset); + remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset; + + ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa, + nmsgs * ch->entry_size); + if (ret != xpSuccess) { + + dev_dbg(xpc_chan, "failed to pull %d msgs starting with" + " msg %lld from partition %d, channel=%d, " + "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull, + ch->partid, ch->number, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + return NULL; + } + + ch_sn2->next_msg_to_pull += nmsgs; + } + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + + /* return the message we were looking for */ + msg_offset = (get % ch->remote_nentries) * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset); + + return msg; +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + void *payload = NULL; + s64 get; + + do { + if (ch->flags & XPC_C_DISCONNECTING) + break; + + get = ch_sn2->w_local_GP.get; + smp_rmb(); /* guarantee that .get loads before .put */ + if (get == ch_sn2->w_remote_GP.put) + break; + + /* There are messages waiting to be pulled and delivered. + * We need to try to secure one for ourselves. We'll do this + * by trying to increment w_local_GP.get and hope that no one + * else beats us to it. If they do, we'll we'll simply have + * to try again for the next one. + */ + + if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) { + /* we got the entry referenced by get */ + + dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, " + "partid=%d, channel=%d\n", get + 1, + ch->partid, ch->number); + + /* pull the message from the remote partition */ + + msg = xpc_pull_remote_msg_sn2(ch, get); + + if (msg != NULL) { + DBUG_ON(msg->number != get); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + + payload = &msg->payload; + } + break; + } + + } while (1); + + return payload; +} + +/* + * Now we actually send the messages that are ready to be sent by advancing + * the local message queue's Put value and then send a chctl msgrequest to the + * recipient partition. + */ +static void +xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put = initial_put + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (put == ch_sn2->w_local_GP.put) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + local_msgqueue + (put % + ch->local_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_READY)) + break; + + put++; + } + + if (put == initial_put) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) != + initial_put) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->put < initial_put); + break; + } + + /* we just set the new value of local_GP->put */ + + dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, " + "channel=%d\n", put, ch->partid, ch->number); + + send_msgrequest = 1; + + /* + * We need to ensure that the message referenced by + * local_GP->put is not XPC_M_SN2_READY or that local_GP->put + * equals w_local_GP.put, so we'll go have a look. + */ + initial_put = put; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. + */ +static enum xp_retval +xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags, + struct xpc_msg_sn2 **address_of_msg) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + enum xp_retval ret; + s64 put; + + /* + * Get the next available message entry from the local message queue. + * If none are available, we'll make sure that we grab the latest + * GP values. + */ + ret = xpTimeout; + + while (1) { + + put = ch_sn2->w_local_GP.put; + smp_rmb(); /* guarantee that .put loads before .get */ + if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) { + + /* There are available message entries. We need to try + * to secure one for ourselves. We'll do this by trying + * to increment w_local_GP.put as long as someone else + * doesn't beat us to it. If they do, we'll have to + * try again. + */ + if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) == + put) { + /* we got the entry referenced by put */ + break; + } + continue; /* try again */ + } + + /* + * There aren't any available msg entries at this time. + * + * In waiting for a message entry to become available, + * we set a timeout in case the other side is not sending + * completion interrupts. This lets us fake a notify IRQ + * that will cause the notify IRQ handler to fetch the latest + * GP values as if an interrupt was sent by the other side. + */ + if (ret == xpTimeout) + xpc_send_chctl_local_msgrequest_sn2(ch); + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + /* get the message's address and initialize it */ + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (put % ch->local_nentries) * + ch->entry_size); + + DBUG_ON(msg->flags != 0); + msg->number = put; + + dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, " + "msg_number=%lld, partid=%d, channel=%d\n", put + 1, + (void *)msg, msg->number, ch->partid, ch->number); + + *address_of_msg = msg; + return xpSuccess; +} + +/* + * Common code that does the actual sending of the message by advancing the + * local message queue's Put value and sends a chctl msgrequest to the + * partition the message is being sent to. + */ +static enum xp_retval +xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg = msg; + struct xpc_notify_sn2 *notify = notify; + s64 msg_number; + s64 put; + + DBUG_ON(notify_type == XPC_N_CALL && func == NULL); + + if (XPC_MSG_SIZE(payload_size) > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_sn2(ch, flags, &msg); + if (ret != xpSuccess) + goto out_1; + + msg_number = msg->number; + + if (notify_type != 0) { + /* + * Tell the remote side to send an ACK interrupt when the + * message has been delivered. + */ + msg->flags |= XPC_M_SN2_INTERRUPT; + + atomic_inc(&ch->n_to_notify); + + notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries]; + notify->func = func; + notify->key = key; + notify->type = notify_type; + + /* ??? Is a mb() needed here? */ + + if (ch->flags & XPC_C_DISCONNECTING) { + /* + * An error occurred between our last error check and + * this one. We will try to clear the type field from + * the notify entry. If we succeed then + * xpc_disconnect_channel() didn't already process + * the notify entry. + */ + if (cmpxchg(¬ify->type, notify_type, 0) == + notify_type) { + atomic_dec(&ch->n_to_notify); + ret = ch->reason; + } + goto out_1; + } + } + + memcpy(&msg->payload, payload, payload_size); + + msg->flags |= XPC_M_SN2_READY; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->put. + */ + smp_mb(); + + /* see if the message is next in line to be sent, if so send it */ + + put = ch_sn2->local_GP->put; + if (put == msg_number) + xpc_send_msgs_sn2(ch, put); + +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Now we actually acknowledge the messages that have been delivered and ack'd + * by advancing the cached remote message queue's Get value and if requested + * send a chctl msgrequest to the message sender's partition. + * + * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition + * that sent the message. + */ +static void +xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get = initial_get + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (get == ch_sn2->w_local_GP.get) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + remote_msgqueue + (get % + ch->remote_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_DONE)) + break; + + msg_flags |= msg->flags; + get++; + } + + if (get == initial_get) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) != + initial_get) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->get <= initial_get); + break; + } + + /* we just set the new value of local_GP->get */ + + dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, " + "channel=%d\n", get, ch->partid, ch->number); + + send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT); + + /* + * We need to ensure that the message referenced by + * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get + * equals w_local_GP.get, so we'll go have a look. + */ + initial_get = get; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +static void +xpc_received_payload_sn2(struct xpc_channel *ch, void *payload) +{ + struct xpc_msg_sn2 *msg; + s64 msg_number; + s64 get; + + msg = container_of(payload, struct xpc_msg_sn2, payload); + msg_number = msg->number; + + dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n", + (void *)msg, msg_number, ch->partid, ch->number); + + DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) != + msg_number % ch->remote_nentries); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + + msg->flags |= XPC_M_SN2_DONE; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->get. + */ + smp_mb(); + + /* + * See if this message is next in line to be acknowledged as having + * been delivered. + */ + get = ch->sn.sn2.local_GP->get; + if (get == msg_number) + xpc_acknowledge_msgs_sn2(ch, get, msg->flags); +} + +static struct xpc_arch_operations xpc_arch_ops_sn2 = { + .setup_partitions = xpc_setup_partitions_sn2, + .teardown_partitions = xpc_teardown_partitions_sn2, + .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2, + .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2, + .setup_rsvd_page = xpc_setup_rsvd_page_sn2, + + .allow_hb = xpc_allow_hb_sn2, + .disallow_hb = xpc_disallow_hb_sn2, + .disallow_all_hbs = xpc_disallow_all_hbs_sn2, + .increment_heartbeat = xpc_increment_heartbeat_sn2, + .offline_heartbeat = xpc_offline_heartbeat_sn2, + .online_heartbeat = xpc_online_heartbeat_sn2, + .heartbeat_init = xpc_heartbeat_init_sn2, + .heartbeat_exit = xpc_heartbeat_exit_sn2, + .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2, + + .request_partition_activation = + xpc_request_partition_activation_sn2, + .request_partition_reactivation = + xpc_request_partition_reactivation_sn2, + .request_partition_deactivation = + xpc_request_partition_deactivation_sn2, + .cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_sn2, + + .setup_ch_structures = xpc_setup_ch_structures_sn2, + .teardown_ch_structures = xpc_teardown_ch_structures_sn2, + + .make_first_contact = xpc_make_first_contact_sn2, + + .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2, + .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2, + .send_chctl_closereply = xpc_send_chctl_closereply_sn2, + .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2, + .send_chctl_openreply = xpc_send_chctl_openreply_sn2, + .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2, + .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2, + + .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2, + + .setup_msg_structures = xpc_setup_msg_structures_sn2, + .teardown_msg_structures = xpc_teardown_msg_structures_sn2, + + .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2, + .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2, + .partition_engaged = xpc_partition_engaged_sn2, + .any_partition_engaged = xpc_any_partition_engaged_sn2, + .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2, + + .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2, + .send_payload = xpc_send_payload_sn2, + .get_deliverable_payload = xpc_get_deliverable_payload_sn2, + .received_payload = xpc_received_payload_sn2, + .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2, +}; + +int +xpc_init_sn2(void) +{ + int ret; + size_t buf_size; + + xpc_arch_ops = xpc_arch_ops_sn2; + + if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " + "larger than %d\n", XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + buf_size = max(XPC_RP_VARS_SIZE, + XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2); + xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size, + GFP_KERNEL, + &xpc_remote_copy_buffer_base_sn2); + if (xpc_remote_copy_buffer_sn2 == NULL) { + dev_err(xpc_part, "can't get memory for remote copy buffer\n"); + return -ENOMEM; + } + + /* open up protections for IPI and [potentially] amo operations */ + xpc_allow_IPI_ops_sn2(); + xpc_allow_amo_ops_shub_wars_1_1_sn2(); + + /* + * This is safe to do before the xpc_hb_checker thread has started + * because the handler releases a wait queue. If an interrupt is + * received before the thread is waiting, it will not go to sleep, + * but rather immediately process the interrupt. + */ + ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0, + "xpc hb", NULL); + if (ret != 0) { + dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " + "errno=%d\n", -ret); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); + } + return ret; +} + +void +xpc_exit_sn2(void) +{ + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); +} diff --git a/kernel/drivers/misc/sgi-xp/xpc_uv.c b/kernel/drivers/misc/sgi-xp/xpc_uv.c new file mode 100644 index 000000000..95c894482 --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpc_uv.c @@ -0,0 +1,1813 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <asm/uv/uv_hub.h> +#if defined CONFIG_X86_64 +#include <asm/uv/bios.h> +#include <asm/uv/uv_irq.h> +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#include <asm/sn/intr.h> +#include <asm/sn/sn_sal.h> +#endif +#include "../sgi-gru/gru.h" +#include "../sgi-gru/grukservices.h" +#include "xpc.h" + +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; +#endif + +static struct xpc_heartbeat_uv *xpc_heartbeat_uv; + +#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) +#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_ACTIVATE_MSG_SIZE_UV) +#define XPC_ACTIVATE_IRQ_NAME "xpc_activate" + +#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) +#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_NOTIFY_MSG_SIZE_UV) +#define XPC_NOTIFY_IRQ_NAME "xpc_notify" + +static int xpc_mq_node = -1; + +static struct xpc_gru_mq_uv *xpc_activate_mq_uv; +static struct xpc_gru_mq_uv *xpc_notify_mq_uv; + +static int +xpc_setup_partitions_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_init(&part_uv->flags_lock); + part_uv->remote_act_state = XPC_P_AS_INACTIVE; + } + return 0; +} + +static void +xpc_teardown_partitions_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + unsigned long irq_flags; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + if (part_uv->cached_activate_gru_mq_desc != NULL) { + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + kfree(part_uv->cached_activate_gru_mq_desc); + part_uv->cached_activate_gru_mq_desc = NULL; + mutex_unlock(&part_uv-> + cached_activate_gru_mq_desc_mutex); + } + } +} + +static int +xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) +{ + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + +#if defined CONFIG_X86_64 + mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, + UV_AFFINITY_CPU); + if (mq->irq < 0) + return mq->irq; + + mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) + mq->irq = SGI_XPC_ACTIVATE; + else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) + mq->irq = SGI_XPC_NOTIFY; + else + return -EINVAL; + + mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value); +#else + #error not a supported configuration +#endif + + return 0; +} + +static void +xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) +{ +#if defined CONFIG_X86_64 + uv_teardown_irq(mq->irq); + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode; + unsigned long mmr_value; + + mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + mmr_value = 1UL << 16; + + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); +#else + #error not a supported configuration +#endif +} + +static int +xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) +{ + int ret; + +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + + ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", + ret); + return -EBUSY; + } +#elif defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, " + "ret=%d\n", ret); + return ret; + } +#else + #error not a supported configuration +#endif + + mq->watchlist_num = ret; + return 0; +} + +static void +xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) +{ + int ret; + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + +#if defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != BIOS_STATUS_SUCCESS); +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != SALRET_OK); +#else + #error not a supported configuration +#endif +} + +static struct xpc_gru_mq_uv * +xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, + irq_handler_t irq_handler) +{ + enum xp_retval xp_ret; + int ret; + int nid; + int nasid; + int pg_order; + struct page *page; + struct xpc_gru_mq_uv *mq; + struct uv_IO_APIC_route_entry *mmr_value; + + mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL); + if (mq == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a xpc_gru_mq_uv structure\n"); + ret = -ENOMEM; + goto out_0; + } + + mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc), + GFP_KERNEL); + if (mq->gru_mq_desc == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a gru_message_queue_desc structure\n"); + ret = -ENOMEM; + goto out_1; + } + + pg_order = get_order(mq_size); + mq->order = pg_order + PAGE_SHIFT; + mq_size = 1UL << mq->order; + + mq->mmr_blade = uv_cpu_to_blade_id(cpu); + + nid = cpu_to_node(cpu); + page = alloc_pages_exact_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, + pg_order); + if (page == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " + "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); + ret = -ENOMEM; + goto out_2; + } + mq->address = page_address(page); + + /* enable generation of irq when GRU mq operation occurs to this mq */ + ret = xpc_gru_mq_watchlist_alloc_uv(mq); + if (ret != 0) + goto out_3; + + ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name); + if (ret != 0) + goto out_4; + + ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL); + if (ret != 0) { + dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", + mq->irq, -ret); + goto out_5; + } + + nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu)); + + mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value; + ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size, + nasid, mmr_value->vector, mmr_value->dest); + if (ret != 0) { + dev_err(xpc_part, "gru_create_message_queue() returned " + "error=%d\n", ret); + ret = -EINVAL; + goto out_6; + } + + /* allow other partitions to access this GRU mq */ + xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size); + if (xp_ret != xpSuccess) { + ret = -EACCES; + goto out_6; + } + + return mq; + + /* something went wrong */ +out_6: + free_irq(mq->irq, NULL); +out_5: + xpc_release_gru_mq_irq_uv(mq); +out_4: + xpc_gru_mq_watchlist_free_uv(mq); +out_3: + free_pages((unsigned long)mq->address, pg_order); +out_2: + kfree(mq->gru_mq_desc); +out_1: + kfree(mq); +out_0: + return ERR_PTR(ret); +} + +static void +xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq) +{ + unsigned int mq_size; + int pg_order; + int ret; + + /* disallow other partitions to access GRU mq */ + mq_size = 1UL << mq->order; + ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size); + BUG_ON(ret != xpSuccess); + + /* unregister irq handler and release mq irq/vector mapping */ + free_irq(mq->irq, NULL); + xpc_release_gru_mq_irq_uv(mq); + + /* disable generation of irq when GRU mq op occurs to this mq */ + xpc_gru_mq_watchlist_free_uv(mq); + + pg_order = mq->order - PAGE_SHIFT; + free_pages((unsigned long)mq->address, pg_order); + + kfree(mq); +} + +static enum xp_retval +xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg, + size_t msg_size) +{ + enum xp_retval xp_ret; + int ret; + + while (1) { + ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size); + if (ret == MQE_OK) { + xp_ret = xpSuccess; + break; + } + + if (ret == MQE_QUEUE_FULL) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_QUEUE_FULL\n"); + /* !!! handle QLimit reached; delay & try again */ + /* ??? Do we add a limit to the number of retries? */ + (void)msleep_interruptible(10); + } else if (ret == MQE_CONGESTION) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_CONGESTION\n"); + /* !!! handle LB Overflow; simply try again */ + /* ??? Do we add a limit to the number of retries? */ + } else { + /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */ + dev_err(xpc_chan, "gru_send_message_gpa() returned " + "error=%d\n", ret); + xp_ret = xpGruSendMqError; + break; + } + } + return xp_ret; +} + +static void +xpc_process_activate_IRQ_rcvd_uv(void) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + u8 act_state_req; + + DBUG_ON(xpc_activate_IRQ_rcvd == 0); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part = &xpc_partitions[partid]; + + if (part->sn.uv.act_state_req == 0) + continue; + + xpc_activate_IRQ_rcvd--; + BUG_ON(xpc_activate_IRQ_rcvd < 0); + + act_state_req = part->sn.uv.act_state_req; + part->sn.uv.act_state_req = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + if (act_state_req == XPC_P_ASR_ACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else if (part->act_state == XPC_P_AS_DEACTIVATING) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) { + XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason); + + } else { + BUG(); + } + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (xpc_activate_IRQ_rcvd == 0) + break; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + +} + +static void +xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, + struct xpc_activate_mq_msghdr_uv *msg_hdr, + int part_setup, + int *wakeup_hb_checker) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_openclose_args *args; + + part_uv->remote_act_state = msg_hdr->act_state; + + switch (msg_hdr->type) { + case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV: + /* syncing of remote_act_state was just done above */ + break; + + case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_activate_req_uv *msg; + + /* + * ??? Do we deal here with ts_jiffies being different + * ??? if act_state != XPC_P_AS_INACTIVE instead of + * ??? below? + */ + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_activate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; + part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ + part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; + part_uv->heartbeat_gpa = msg->heartbeat_gpa; + + if (msg->activate_gru_mq_desc_gpa != + part_uv->activate_gru_mq_desc_gpa) { + spin_lock(&part_uv->flags_lock); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock(&part_uv->flags_lock); + part_uv->activate_gru_mq_desc_gpa = + msg->activate_gru_mq_desc_gpa; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + break; + } + case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_deactivate_req_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_deactivate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = msg->reason; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_closerequest_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closerequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->reason = msg->reason; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: { + struct xpc_activate_mq_msg_chctl_closereply_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closereply_uv, + hdr); + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_openrequest_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openrequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->entry_size = msg->entry_size; + args->local_nentries = msg->local_nentries; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: { + struct xpc_activate_mq_msg_chctl_openreply_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openreply_uv, hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->remote_nentries = msg->remote_nentries; + args->local_nentries = msg->local_nentries; + args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: { + struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_opencomplete_uv, hdr); + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + } + case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + default: + dev_err(xpc_part, "received unknown activate_mq msg type=%d " + "from partition=%d\n", msg_hdr->type, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadMsgType; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + + if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies && + part->remote_rp_ts_jiffies != 0) { + /* + * ??? Does what we do here need to be sensitive to + * ??? act_state or remote_act_state? + */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + } +} + +static irqreturn_t +xpc_handle_activate_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr; + short partid; + struct xpc_partition *part; + int wakeup_hb_checker = 0; + int part_referenced; + + while (1) { + msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc); + if (msg_hdr == NULL) + break; + + partid = msg_hdr->partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() " + "received invalid partid=0x%x in message\n", + partid); + } else { + part = &xpc_partitions[partid]; + + part_referenced = xpc_part_ref(part); + xpc_handle_activate_mq_msg_uv(part, msg_hdr, + part_referenced, + &wakeup_hb_checker); + if (part_referenced) + xpc_part_deref(part); + } + + gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr); + } + + if (wakeup_hb_checker) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + return IRQ_HANDLED; +} + +static enum xp_retval +xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc, + unsigned long gru_mq_desc_gpa) +{ + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa, + sizeof(struct gru_message_queue_desc)); + if (ret == xpSuccess) + gru_mq_desc->mq = NULL; + + return ret; +} + +static enum xp_retval +xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size, + int msg_type) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr = msg; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct gru_message_queue_desc *gru_mq_desc; + unsigned long irq_flags; + enum xp_retval ret; + + DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV); + + msg_hdr->type = msg_type; + msg_hdr->partid = xp_partition_id; + msg_hdr->act_state = part->act_state; + msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies; + + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); +again: + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) { + gru_mq_desc = part_uv->cached_activate_gru_mq_desc; + if (gru_mq_desc == NULL) { + gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (gru_mq_desc == NULL) { + ret = xpNoMemory; + goto done; + } + part_uv->cached_activate_gru_mq_desc = gru_mq_desc; + } + + ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc, + part_uv-> + activate_gru_mq_desc_gpa); + if (ret != xpSuccess) + goto done; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + } + + /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */ + ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg, + msg_size); + if (ret != xpSuccess) { + smp_rmb(); /* ensure a fresh copy of part_uv->flags */ + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) + goto again; + } +done: + mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex); + return ret; +} + +static void +xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg, + size_t msg_size, int msg_type) +{ + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) + XPC_DEACTIVATE_PARTITION(part, ret); +} + +static void +xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags, + void *msg, size_t msg_size, int msg_type) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + XPC_DEACTIVATE_PARTITION(part, ret); + + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } +} + +static void +xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + + /* + * !!! Make our side think that the remote partition sent an activate + * !!! mq message our way by doing what the activate IRQ handler would + * !!! do had one really been sent. + */ + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = act_state_req; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + +#if defined CONFIG_X86_64 + status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa, + (u64 *)len); + if (status == BIOS_STATUS_SUCCESS) + ret = xpSuccess; + else if (status == BIOS_STATUS_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpBiosError; + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + +#else + #error not a supported configuration +#endif + + return ret; +} + +static int +xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp) +{ + xpc_heartbeat_uv = + &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat; + rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv); + rp->sn.uv.activate_gru_mq_desc_gpa = + uv_gpa(xpc_activate_mq_uv->gru_mq_desc); + return 0; +} + +static void +xpc_allow_hb_uv(short partid) +{ +} + +static void +xpc_disallow_hb_uv(short partid) +{ +} + +static void +xpc_disallow_all_hbs_uv(void) +{ +} + +static void +xpc_increment_heartbeat_uv(void) +{ + xpc_heartbeat_uv->value++; +} + +static void +xpc_offline_heartbeat_uv(void) +{ + xpc_increment_heartbeat_uv(); + xpc_heartbeat_uv->offline = 1; +} + +static void +xpc_online_heartbeat_uv(void) +{ + xpc_increment_heartbeat_uv(); + xpc_heartbeat_uv->offline = 0; +} + +static void +xpc_heartbeat_init_uv(void) +{ + xpc_heartbeat_uv->value = 1; + xpc_heartbeat_uv->offline = 0; +} + +static void +xpc_heartbeat_exit_uv(void) +{ + xpc_offline_heartbeat_uv(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_uv(struct xpc_partition *part) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat), + part_uv->heartbeat_gpa, + sizeof(struct xpc_heartbeat_uv)); + if (ret != xpSuccess) + return ret; + + if (part_uv->cached_heartbeat.value == part->last_heartbeat && + !part_uv->cached_heartbeat.offline) { + + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = part_uv->cached_heartbeat.value; + } + return ret; +} + +static void +xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_gpa, int nasid) +{ + short partid = remote_rp->SAL_partid; + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_activate_mq_msg_activate_req_uv msg; + + part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ + part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; + part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa; + part->sn.uv.activate_gru_mq_desc_gpa = + remote_rp->sn.uv.activate_gru_mq_desc_gpa; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { + msg.rp_gpa = uv_gpa(xpc_rsvd_page); + msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa; + msg.activate_gru_mq_desc_gpa = + xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); + } + + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_reactivation_uv(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_deactivation_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_deactivate_req_uv msg; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING && + part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) { + + msg.reason = part->reason; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV); + } +} + +static void +xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static void +xpc_init_fifo_uv(struct xpc_fifo_head_uv *head) +{ + head->first = NULL; + head->last = NULL; + spin_lock_init(&head->lock); + head->n_entries = 0; +} + +static void * +xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head) +{ + unsigned long irq_flags; + struct xpc_fifo_entry_uv *first; + + spin_lock_irqsave(&head->lock, irq_flags); + first = head->first; + if (head->first != NULL) { + head->first = first->next; + if (head->first == NULL) + head->last = NULL; + + head->n_entries--; + BUG_ON(head->n_entries < 0); + + first->next = NULL; + } + spin_unlock_irqrestore(&head->lock, irq_flags); + return first; +} + +static void +xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head, + struct xpc_fifo_entry_uv *last) +{ + unsigned long irq_flags; + + last->next = NULL; + spin_lock_irqsave(&head->lock, irq_flags); + if (head->last != NULL) + head->last->next = last; + else + head->first = last; + head->last = last; + head->n_entries++; + spin_unlock_irqrestore(&head->lock, irq_flags); +} + +static int +xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head) +{ + return head->n_entries; +} + +/* + * Setup the channel structures that are uv specific. + */ +static enum xp_retval +xpc_setup_ch_structures_uv(struct xpc_partition *part) +{ + struct xpc_channel_uv *ch_uv; + int ch_number; + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_uv = &part->channels[ch_number].sn.uv; + + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + } + + return xpSuccess; +} + +/* + * Teardown the channel structures that are uv specific. + */ +static void +xpc_teardown_ch_structures_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static enum xp_retval +xpc_make_first_contact_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + /* + * We send a sync msg to get the remote partition's remote_act_state + * updated to our current act_state which at this point should + * be XPC_P_AS_ACTIVATING. + */ + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV); + + while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) || + (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) { + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +static u64 +xpc_get_chctl_all_flags_uv(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + return chctl.all_flags; +} + +static enum xp_retval +xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_send_msg_slot_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv); + ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->send_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = &ch_uv->send_msg_slots[entry]; + + msg_slot->msg_slot_number = entry; + xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list, + &msg_slot->next); + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) + ch->local_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +static enum xp_retval +xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + nbytes = nentries * ch->entry_size; + ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->recv_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = ch_uv->recv_msg_slots + + entry * ch->entry_size; + + msg_slot->hdr.msg_slot_number = entry; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) + ch->remote_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +/* + * Allocate msg_slots associated with the channel. + */ +static enum xp_retval +xpc_setup_msg_structures_uv(struct xpc_channel *ch) +{ + static enum xp_retval ret; + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (ch_uv->cached_notify_gru_mq_desc == NULL) + return xpNoMemory; + + ret = xpc_allocate_send_msg_slot_uv(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_recv_msg_slot_uv(ch); + if (ret != xpSuccess) { + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + } + } + return ret; +} + +/* + * Free up msg_slots and clear other stuff that were setup for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + kfree(ch_uv->cached_notify_gru_mq_desc); + ch_uv->cached_notify_gru_mq_desc = NULL; + + if (ch->flags & XPC_C_SETUP) { + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + kfree(ch_uv->recv_msg_slots); + } +} + +static void +xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closerequest_uv msg; + + msg.ch_number = ch->number; + msg.reason = ch->reason; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV); +} + +static void +xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closereply_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV); +} + +static void +xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openrequest_uv msg; + + msg.ch_number = ch->number; + msg.entry_size = ch->entry_size; + msg.local_nentries = ch->local_nentries; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV); +} + +static void +xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openreply_uv msg; + + msg.ch_number = ch->number; + msg.local_nentries = ch->local_nentries; + msg.remote_nentries = ch->remote_nentries; + msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc); + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV); +} + +static void +xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_opencomplete_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV); +} + +static void +xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); +} + +static enum xp_retval +xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch, + unsigned long gru_mq_desc_gpa) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL); + return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc, + gru_mq_desc_gpa); +} + +static void +xpc_indicate_partition_engaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV); +} + +static void +xpc_indicate_partition_disengaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV); +} + +static void +xpc_assume_partition_disengaged_uv(short partid) +{ + struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv; + unsigned long irq_flags; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); +} + +static int +xpc_partition_engaged_uv(short partid) +{ + return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0; +} + +static int +xpc_any_partition_engaged_uv(void) +{ + struct xpc_partition_uv *part_uv; + short partid; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0) + return 1; + } + return 0; +} + +static enum xp_retval +xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags, + struct xpc_send_msg_slot_uv **address_of_msg_slot) +{ + enum xp_retval ret; + struct xpc_send_msg_slot_uv *msg_slot; + struct xpc_fifo_entry_uv *entry; + + while (1) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list); + if (entry != NULL) + break; + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next); + *address_of_msg_slot = msg_slot; + return xpSuccess; +} + +static void +xpc_free_msg_slot_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot) +{ + xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next); + + /* wakeup anyone waiting for a free msg slot */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); +} + +static void +xpc_notify_sender_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot, + enum xp_retval reason) +{ + xpc_notify_func func = msg_slot->func; + + if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) { + + atomic_dec(&ch->n_to_notify); + + dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + + func(reason, ch->partid, ch->number, msg_slot->key); + + dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + } +} + +static void +xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry = msg->hdr.msg_slot_number % ch->local_nentries; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + + BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number); + msg_slot->msg_slot_number += ch->local_nentries; + + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered); + + xpc_free_msg_slot_uv(ch, msg_slot); +} + +static void +xpc_handle_notify_mq_msg_uv(struct xpc_partition *part, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_channel *ch; + struct xpc_channel_uv *ch_uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int ch_number = msg->hdr.ch_number; + + if (unlikely(ch_number >= part->nchannels)) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid " + "channel number=0x%x in message from partid=%d\n", + ch_number, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadChannelNumber; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return; + } + + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* see if we're really dealing with an ACK for a previously sent msg */ + if (msg->hdr.size == 0) { + xpc_handle_notify_mq_ack_uv(ch, msg); + xpc_msgqueue_deref(ch); + return; + } + + /* we're dealing with a normal message sent via the notify_mq */ + ch_uv = &ch->sn.uv; + + msg_slot = ch_uv->recv_msg_slots + + (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size; + + BUG_ON(msg_slot->hdr.size != 0); + + memcpy(msg_slot, msg, msg->hdr.size); + + xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { + /* + * If there is an existing idle kthread get it to deliver + * the payload, otherwise we'll have to get the channel mgr + * for this partition to create a kthread to do the delivery. + */ + if (atomic_read(&ch->kthreads_idle) > 0) + wake_up_nr(&ch->idle_wq, 1); + else + xpc_send_chctl_local_msgrequest_uv(part, ch->number); + } + xpc_msgqueue_deref(ch); +} + +static irqreturn_t +xpc_handle_notify_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_notify_mq_msg_uv *msg; + short partid; + struct xpc_partition *part; + + while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) != + NULL) { + + partid = msg->hdr.partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received " + "invalid partid=0x%x in message\n", partid); + } else { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + xpc_handle_notify_mq_msg_uv(part, msg); + xpc_part_deref(part); + } + } + + gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg); + } + + return IRQ_HANDLED; +} + +static int +xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch) +{ + return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list); +} + +static void +xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + int ndeliverable_payloads; + + xpc_msgqueue_ref(ch); + + ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch); + + if (ndeliverable_payloads > 0 && + (ch->flags & XPC_C_CONNECTED) && + (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) { + + xpc_activate_kthreads(ch, ndeliverable_payloads); + } + + xpc_msgqueue_deref(ch); +} + +static enum xp_retval +xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_send_msg_slot_uv *msg_slot = NULL; + struct xpc_notify_mq_msg_uv *msg; + u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV]; + size_t msg_size; + + DBUG_ON(notify_type != XPC_N_CALL); + + msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size; + if (msg_size > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot); + if (ret != xpSuccess) + goto out_1; + + if (func != NULL) { + atomic_inc(&ch->n_to_notify); + + msg_slot->key = key; + smp_wmb(); /* a non-NULL func must hit memory after the key */ + msg_slot->func = func; + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_2; + } + } + + msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer; + msg->hdr.partid = xp_partition_id; + msg->hdr.ch_number = ch->number; + msg->hdr.size = msg_size; + msg->hdr.msg_slot_number = msg_slot->msg_slot_number; + memcpy(&msg->payload, payload, payload_size); + + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + msg_size); + if (ret == xpSuccess) + goto out_1; + + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +out_2: + if (func != NULL) { + /* + * Try to NULL the msg_slot's func field. If we fail, then + * xpc_notify_senders_of_disconnect_uv() beat us to it, in which + * case we need to pretend we succeeded to send the message + * since the user will get a callout for the disconnect error + * by xpc_notify_senders_of_disconnect_uv(), and to also get an + * error returned here will confuse them. Additionally, since + * in this case the channel is being disconnected we don't need + * to put the the msg_slot back on the free list. + */ + if (cmpxchg(&msg_slot->func, func, NULL) != func) { + ret = xpSuccess; + goto out_1; + } + + msg_slot->key = NULL; + atomic_dec(&ch->n_to_notify); + } + xpc_free_msg_slot_uv(ch, msg_slot); +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Tell the callers of xpc_send_notify() that the status of their payloads + * is unknown because the channel is now disconnecting. + * + * We don't worry about putting these msg_slots on the free list since the + * msg_slots themselves are about to be kfree'd. + */ +static void +xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry; + + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + + for (entry = 0; entry < ch->local_nentries; entry++) { + + if (atomic_read(&ch->n_to_notify) == 0) + break; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, ch->reason); + } +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_uv(struct xpc_channel *ch) +{ + struct xpc_fifo_entry_uv *entry; + struct xpc_notify_mq_msg_uv *msg; + void *payload = NULL; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list); + if (entry != NULL) { + msg = container_of(entry, struct xpc_notify_mq_msg_uv, + hdr.u.next); + payload = &msg->payload; + } + } + return payload; +} + +static void +xpc_received_payload_uv(struct xpc_channel *ch, void *payload) +{ + struct xpc_notify_mq_msg_uv *msg; + enum xp_retval ret; + + msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload); + + /* return an ACK to the sender of this message */ + + msg->hdr.partid = xp_partition_id; + msg->hdr.size = 0; /* size of zero indicates this is an ACK */ + + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + sizeof(struct xpc_notify_mq_msghdr_uv)); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +} + +static struct xpc_arch_operations xpc_arch_ops_uv = { + .setup_partitions = xpc_setup_partitions_uv, + .teardown_partitions = xpc_teardown_partitions_uv, + .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv, + .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv, + .setup_rsvd_page = xpc_setup_rsvd_page_uv, + + .allow_hb = xpc_allow_hb_uv, + .disallow_hb = xpc_disallow_hb_uv, + .disallow_all_hbs = xpc_disallow_all_hbs_uv, + .increment_heartbeat = xpc_increment_heartbeat_uv, + .offline_heartbeat = xpc_offline_heartbeat_uv, + .online_heartbeat = xpc_online_heartbeat_uv, + .heartbeat_init = xpc_heartbeat_init_uv, + .heartbeat_exit = xpc_heartbeat_exit_uv, + .get_remote_heartbeat = xpc_get_remote_heartbeat_uv, + + .request_partition_activation = + xpc_request_partition_activation_uv, + .request_partition_reactivation = + xpc_request_partition_reactivation_uv, + .request_partition_deactivation = + xpc_request_partition_deactivation_uv, + .cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_uv, + + .setup_ch_structures = xpc_setup_ch_structures_uv, + .teardown_ch_structures = xpc_teardown_ch_structures_uv, + + .make_first_contact = xpc_make_first_contact_uv, + + .get_chctl_all_flags = xpc_get_chctl_all_flags_uv, + .send_chctl_closerequest = xpc_send_chctl_closerequest_uv, + .send_chctl_closereply = xpc_send_chctl_closereply_uv, + .send_chctl_openrequest = xpc_send_chctl_openrequest_uv, + .send_chctl_openreply = xpc_send_chctl_openreply_uv, + .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv, + .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv, + + .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv, + + .setup_msg_structures = xpc_setup_msg_structures_uv, + .teardown_msg_structures = xpc_teardown_msg_structures_uv, + + .indicate_partition_engaged = xpc_indicate_partition_engaged_uv, + .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv, + .assume_partition_disengaged = xpc_assume_partition_disengaged_uv, + .partition_engaged = xpc_partition_engaged_uv, + .any_partition_engaged = xpc_any_partition_engaged_uv, + + .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv, + .send_payload = xpc_send_payload_uv, + .get_deliverable_payload = xpc_get_deliverable_payload_uv, + .received_payload = xpc_received_payload_uv, + .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, +}; + +static int +xpc_init_mq_node(int nid) +{ + int cpu; + + get_online_cpus(); + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_activate_mq_uv = + xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, + XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); + if (!IS_ERR(xpc_activate_mq_uv)) + break; + } + if (IS_ERR(xpc_activate_mq_uv)) { + put_online_cpus(); + return PTR_ERR(xpc_activate_mq_uv); + } + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_notify_mq_uv = + xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, + XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); + if (!IS_ERR(xpc_notify_mq_uv)) + break; + } + if (IS_ERR(xpc_notify_mq_uv)) { + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + put_online_cpus(); + return PTR_ERR(xpc_notify_mq_uv); + } + + put_online_cpus(); + return 0; +} + +int +xpc_init_uv(void) +{ + int nid; + int ret = 0; + + xpc_arch_ops = xpc_arch_ops_uv; + + if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n", + XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + if (xpc_mq_node < 0) + for_each_online_node(nid) { + ret = xpc_init_mq_node(nid); + + if (!ret) + break; + } + else + ret = xpc_init_mq_node(xpc_mq_node); + + if (ret < 0) + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", + -ret); + + return ret; +} + +void +xpc_exit_uv(void) +{ + xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); +} + +module_param(xpc_mq_node, int, 0); +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); diff --git a/kernel/drivers/misc/sgi-xp/xpnet.c b/kernel/drivers/misc/sgi-xp/xpnet.c new file mode 100644 index 000000000..557f9782c --- /dev/null +++ b/kernel/drivers/misc/sgi-xp/xpnet.c @@ -0,0 +1,608 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * Cross Partition Network Interface (XPNET) support + * + * XPNET provides a virtual network layered on top of the Cross + * Partition communication layer. + * + * XPNET provides direct point-to-point and broadcast-like support + * for an ethernet-like device. The ethernet broadcast medium is + * replaced with a point-to-point message structure which passes + * pointers to a DMA-capable block that a remote partition should + * retrieve and pass to the upper level networking layer. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include "xp.h" + +/* + * The message payload transferred by XPC. + * + * buf_pa is the physical address where the DMA should pull from. + * + * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a + * cacheline boundary. To accomplish this, we record the number of + * bytes from the beginning of the first cacheline to the first useful + * byte of the skb (leadin_ignore) and the number of bytes from the + * last useful byte of the skb to the end of the last cacheline + * (tailout_ignore). + * + * size is the number of bytes to transfer which includes the skb->len + * (useful bytes of the senders skb) plus the leadin and tailout + */ +struct xpnet_message { + u16 version; /* Version for this message */ + u16 embedded_bytes; /* #of bytes embedded in XPC message */ + u32 magic; /* Special number indicating this is xpnet */ + unsigned long buf_pa; /* phys address of buffer to retrieve */ + u32 size; /* #of bytes in buffer */ + u8 leadin_ignore; /* #of bytes to ignore at the beginning */ + u8 tailout_ignore; /* #of bytes to ignore at the end */ + unsigned char data; /* body of small packets */ +}; + +/* + * Determine the size of our message, the cacheline aligned size, + * and then the number of message will request from XPC. + * + * XPC expects each message to exist in an individual cacheline. + */ +#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE +#define XPNET_MSG_DATA_MAX \ + (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data)) +#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE) + +#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) +#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) + +/* + * Version number of XPNET implementation. XPNET can always talk to versions + * with same major #, and never talk to versions with a different version. + */ +#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) +#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) + +#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */ +#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */ +#define XPNET_MAGIC 0x88786984 /* "XNET" */ + +#define XPNET_VALID_MSG(_m) \ + ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ + && (msg->magic == XPNET_MAGIC)) + +#define XPNET_DEVICE_NAME "xp0" + +/* + * When messages are queued with xpc_send_notify, a kmalloc'd buffer + * of the following type is passed as a notification cookie. When the + * notification function is called, we use the cookie to decide + * whether all outstanding message sends have completed. The skb can + * then be released. + */ +struct xpnet_pending_msg { + struct sk_buff *skb; + atomic_t use_count; +}; + +struct net_device *xpnet_device; + +/* + * When we are notified of other partitions activating, we add them to + * our bitmask of partitions to which we broadcast. + */ +static unsigned long *xpnet_broadcast_partitions; +/* protect above */ +static DEFINE_SPINLOCK(xpnet_broadcast_lock); + +/* + * Since the Block Transfer Engine (BTE) is being used for the transfer + * and it relies upon cache-line size transfers, we need to reserve at + * least one cache-line for head and tail alignment. The BTE is + * limited to 8MB transfers. + * + * Testing has shown that changing MTU to greater than 64KB has no effect + * on TCP as the two sides negotiate a Max Segment Size that is limited + * to 64K. Other protocols May use packets greater than this, but for + * now, the default is 64KB. + */ +#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 32KB has been determined to be the ideal */ +#define XPNET_DEF_MTU (0x8000UL) + +/* + * The partid is encapsulated in the MAC address beginning in the following + * octet and it consists of two octets. + */ +#define XPNET_PARTID_OCTET 2 + +/* Define the XPNET debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpnet_dbg_name = { + .name = "xpnet" +}; + +struct device xpnet_dbg_subname = { + .init_name = "", /* set to "" */ + .driver = &xpnet_dbg_name +}; + +struct device *xpnet = &xpnet_dbg_subname; + +/* + * Packet was recevied by XPC and forwarded to us. + */ +static void +xpnet_receive(short partid, int channel, struct xpnet_message *msg) +{ + struct sk_buff *skb; + void *dst; + enum xp_retval ret; + + if (!XPNET_VALID_MSG(msg)) { + /* + * Packet with a different XPC version. Ignore. + */ + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + /* reserve an extra cache line */ + skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); + if (!skb) { + dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", + msg->size + L1_CACHE_BYTES); + + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + + /* + * The allocated skb has some reserved space. + * In order to use xp_remote_memcpy(), we need to get the + * skb->data pointer moved forward. + */ + skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & + (L1_CACHE_BYTES - 1)) + + msg->leadin_ignore)); + + /* + * Update the tail pointer to indicate data actually + * transferred. + */ + skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); + + /* + * Move the data over from the other side. + */ + if ((XPNET_VERSION_MINOR(msg->version) == 1) && + (msg->embedded_bytes != 0)) { + dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " + "%lu)\n", skb->data, &msg->data, + (size_t)msg->embedded_bytes); + + skb_copy_to_linear_data(skb, &msg->data, + (size_t)msg->embedded_bytes); + } else { + dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1)); + dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" + "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst, + (void *)msg->buf_pa, msg->size); + + ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size); + if (ret != xpSuccess) { + /* + * !!! Need better way of cleaning skb. Currently skb + * !!! appears in_use and we can't just call + * !!! dev_kfree_skb. + */ + dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) " + "returned error=0x%x\n", dst, + (void *)msg->buf_pa, msg->size, ret); + + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + } + + dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *)skb->head, + (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + skb->protocol = eth_type_trans(skb, xpnet_device); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + dev_dbg(xpnet, "passing skb to network layer\n" + "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", + (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), + skb_end_pointer(skb), skb->len); + + xpnet_device->stats.rx_packets++; + xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN; + + netif_rx_ni(skb); + xpc_received(partid, channel, (void *)msg); +} + +/* + * This is the handler which XPC calls during any sort of change in + * state or message reception on a connection. + */ +static void +xpnet_connection_activity(enum xp_retval reason, short partid, int channel, + void *data, void *key) +{ + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(channel != XPC_NET_CHANNEL); + + switch (reason) { + case xpMsgReceived: /* message received */ + DBUG_ON(data == NULL); + + xpnet_receive(partid, channel, (struct xpnet_message *)data); + break; + + case xpConnected: /* connection completed to a partition */ + spin_lock_bh(&xpnet_broadcast_lock); + __set_bit(partid, xpnet_broadcast_partitions); + spin_unlock_bh(&xpnet_broadcast_lock); + + netif_carrier_on(xpnet_device); + + dev_dbg(xpnet, "%s connected to partition %d\n", + xpnet_device->name, partid); + break; + + default: + spin_lock_bh(&xpnet_broadcast_lock); + __clear_bit(partid, xpnet_broadcast_partitions); + spin_unlock_bh(&xpnet_broadcast_lock); + + if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions, + xp_max_npartitions)) { + netif_carrier_off(xpnet_device); + } + + dev_dbg(xpnet, "%s disconnected from partition %d\n", + xpnet_device->name, partid); + break; + } +} + +static int +xpnet_dev_open(struct net_device *dev) +{ + enum xp_retval ret; + + dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " + "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, + (unsigned long)XPNET_MSG_SIZE, + (unsigned long)XPNET_MSG_NENTRIES, + (unsigned long)XPNET_MAX_KTHREADS, + (unsigned long)XPNET_MAX_IDLE_KTHREADS); + + ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, + XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); + if (ret != xpSuccess) { + dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " + "ret=%d\n", dev->name, ret); + + return -ENOMEM; + } + + dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); + + return 0; +} + +static int +xpnet_dev_stop(struct net_device *dev) +{ + xpc_disconnect(XPC_NET_CHANNEL); + + dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); + + return 0; +} + +static int +xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) +{ + /* 68 comes from min TCP+IP+MAC header */ + if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { + dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " + "between 68 and %ld\n", dev->name, new_mtu, + XPNET_MAX_MTU); + return -EINVAL; + } + + dev->mtu = new_mtu; + dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); + return 0; +} + +/* + * Notification that the other end has received the message and + * DMA'd the skb information. At this point, they are done with + * our side. When all recipients are done processing, we + * release the skb and then release our pending message structure. + */ +static void +xpnet_send_completed(enum xp_retval reason, short partid, int channel, + void *__qm) +{ + struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm; + + DBUG_ON(queued_msg == NULL); + + dev_dbg(xpnet, "message to %d notified with reason %d\n", + partid, reason); + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "all acks for skb->head=-x%p\n", + (void *)queued_msg->skb->head); + + dev_kfree_skb_any(queued_msg->skb); + kfree(queued_msg); + } +} + +static void +xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg, + u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid) +{ + u8 msg_buffer[XPNET_MSG_SIZE]; + struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer; + u16 msg_size = sizeof(struct xpnet_message); + enum xp_retval ret; + + msg->embedded_bytes = embedded_bytes; + if (unlikely(embedded_bytes != 0)) { + msg->version = XPNET_VERSION_EMBED; + dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", + &msg->data, skb->data, (size_t)embedded_bytes); + skb_copy_from_linear_data(skb, &msg->data, + (size_t)embedded_bytes); + msg_size += embedded_bytes - 1; + } else { + msg->version = XPNET_VERSION; + } + msg->magic = XPNET_MAGIC; + msg->size = end_addr - start_addr; + msg->leadin_ignore = (u64)skb->data - start_addr; + msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); + msg->buf_pa = xp_pa((void *)start_addr); + + dev_dbg(xpnet, "sending XPC message to %d:%d\n" + "msg->buf_pa=0x%lx, msg->size=%u, " + "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", + dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + atomic_inc(&queued_msg->use_count); + + ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg, + msg_size, xpnet_send_completed, queued_msg); + if (unlikely(ret != xpSuccess)) + atomic_dec(&queued_msg->use_count); +} + +/* + * Network layer has formatted a packet (skb) and is ready to place it + * "on the wire". Prepare and send an xpnet_message to all partitions + * which have connected with us and are targets of this packet. + * + * MAC-NOTE: For the XPNET driver, the MAC address contains the + * destination partid. If the destination partid octets are 0xffff, + * this packet is to be broadcast to all connected partitions. + */ +static int +xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct xpnet_pending_msg *queued_msg; + u64 start_addr, end_addr; + short dest_partid; + u16 embedded_bytes = 0; + + dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *)skb->head, + (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + if (skb->data[0] == 0x33) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; /* nothing needed to be done */ + } + + /* + * The xpnet_pending_msg tracks how many outstanding + * xpc_send_notifies are relying on this skb. When none + * remain, release the skb. + */ + queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); + if (queued_msg == NULL) { + dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " + "packet\n", sizeof(struct xpnet_pending_msg)); + + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + /* get the beginning of the first cacheline and end of last */ + start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1)); + end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); + + /* calculate how many bytes to embed in the XPC message */ + if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { + /* skb->data does fit so embed */ + embedded_bytes = skb->len; + } + + /* + * Since the send occurs asynchronously, we set the count to one + * and begin sending. Any sends that happen to complete before + * we are done sending will not free the skb. We will be left + * with that task during exit. This also handles the case of + * a packet destined for a partition which is no longer up. + */ + atomic_set(&queued_msg->use_count, 1); + queued_msg->skb = skb; + + if (skb->data[0] == 0xff) { + /* we are being asked to broadcast to all partitions */ + for_each_set_bit(dest_partid, xpnet_broadcast_partitions, + xp_max_npartitions) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); + } + } else { + dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1]; + dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8; + + if (dest_partid >= 0 && + dest_partid < xp_max_npartitions && + test_bit(dest_partid, xpnet_broadcast_partitions) != 0) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); + } + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_kfree_skb(skb); + kfree(queued_msg); + } + + return NETDEV_TX_OK; +} + +/* + * Deal with transmit timeouts coming from the network layer. + */ +static void +xpnet_dev_tx_timeout(struct net_device *dev) +{ + dev->stats.tx_errors++; +} + +static const struct net_device_ops xpnet_netdev_ops = { + .ndo_open = xpnet_dev_open, + .ndo_stop = xpnet_dev_stop, + .ndo_start_xmit = xpnet_dev_hard_start_xmit, + .ndo_change_mtu = xpnet_dev_change_mtu, + .ndo_tx_timeout = xpnet_dev_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +static int __init +xpnet_init(void) +{ + int result; + + if (!is_shub() && !is_uv()) + return -ENODEV; + + dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); + + xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) * + sizeof(long), GFP_KERNEL); + if (xpnet_broadcast_partitions == NULL) + return -ENOMEM; + + /* + * use ether_setup() to init the majority of our device + * structure and then override the necessary pieces. + */ + xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, NET_NAME_UNKNOWN, + ether_setup); + if (xpnet_device == NULL) { + kfree(xpnet_broadcast_partitions); + return -ENOMEM; + } + + netif_carrier_off(xpnet_device); + + xpnet_device->netdev_ops = &xpnet_netdev_ops; + xpnet_device->mtu = XPNET_DEF_MTU; + + /* + * Multicast assumes the LSB of the first octet is set for multicast + * MAC addresses. We chose the first octet of the MAC to be unlikely + * to collide with any vendor's officially issued MAC. + */ + xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */ + + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id; + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8); + + /* + * ether_setup() sets this to a multicast device. We are + * really not supporting multicast at this time. + */ + xpnet_device->flags &= ~IFF_MULTICAST; + + /* + * No need to checksum as it is a DMA transfer. The BTE will + * report an error if the data is not retrievable and the + * packet will be dropped. + */ + xpnet_device->features = NETIF_F_HW_CSUM; + + result = register_netdev(xpnet_device); + if (result != 0) { + free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); + } + + return result; +} + +module_init(xpnet_init); + +static void __exit +xpnet_exit(void) +{ + dev_info(xpnet, "unregistering network device %s\n", + xpnet_device[0].name); + + unregister_netdev(xpnet_device); + free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); +} + +module_exit(xpnet_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/spear13xx_pcie_gadget.c b/kernel/drivers/misc/spear13xx_pcie_gadget.c new file mode 100644 index 000000000..fe3ad0ca9 --- /dev/null +++ b/kernel/drivers/misc/spear13xx_pcie_gadget.c @@ -0,0 +1,871 @@ +/* + * drivers/misc/spear13xx_pcie_gadget.c + * + * Copyright (C) 2010 ST Microelectronics + * Pratyush Anand<pratyush.anand@st.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/device.h> +#include <linux/clk.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pci_regs.h> +#include <linux/configfs.h> +#include <mach/pcie.h> +#include <mach/misc_regs.h> + +#define IN0_MEM_SIZE (200 * 1024 * 1024 - 1) +/* In current implementation address translation is done using IN0 only. + * So IN1 start address and IN0 end address has been kept same +*/ +#define IN1_MEM_SIZE (0 * 1024 * 1024 - 1) +#define IN_IO_SIZE (20 * 1024 * 1024 - 1) +#define IN_CFG0_SIZE (12 * 1024 * 1024 - 1) +#define IN_CFG1_SIZE (12 * 1024 * 1024 - 1) +#define IN_MSG_SIZE (12 * 1024 * 1024 - 1) +/* Keep default BAR size as 4K*/ +/* AORAM would be mapped by default*/ +#define INBOUND_ADDR_MASK (SPEAR13XX_SYSRAM1_SIZE - 1) + +#define INT_TYPE_NO_INT 0 +#define INT_TYPE_INTX 1 +#define INT_TYPE_MSI 2 +struct spear_pcie_gadget_config { + void __iomem *base; + void __iomem *va_app_base; + void __iomem *va_dbi_base; + char int_type[10]; + ulong requested_msi; + ulong configured_msi; + ulong bar0_size; + ulong bar0_rw_offset; + void __iomem *va_bar0_address; +}; + +struct pcie_gadget_target { + struct configfs_subsystem subsys; + struct spear_pcie_gadget_config config; +}; + +struct pcie_gadget_target_attr { + struct configfs_attribute attr; + ssize_t (*show)(struct spear_pcie_gadget_config *config, + char *buf); + ssize_t (*store)(struct spear_pcie_gadget_config *config, + const char *buf, + size_t count); +}; + +static void enable_dbi_access(struct pcie_app_reg __iomem *app_reg) +{ + /* Enable DBI access */ + writel(readl(&app_reg->slv_armisc) | (1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_armisc); + writel(readl(&app_reg->slv_awmisc) | (1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_awmisc); + +} + +static void disable_dbi_access(struct pcie_app_reg __iomem *app_reg) +{ + /* disable DBI access */ + writel(readl(&app_reg->slv_armisc) & ~(1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_armisc); + writel(readl(&app_reg->slv_awmisc) & ~(1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_awmisc); + +} + +static void spear_dbi_read_reg(struct spear_pcie_gadget_config *config, + int where, int size, u32 *val) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong va_address; + + /* Enable DBI access */ + enable_dbi_access(app_reg); + + va_address = (ulong)config->va_dbi_base + (where & ~0x3); + + *val = readl(va_address); + + if (size == 1) + *val = (*val >> (8 * (where & 3))) & 0xff; + else if (size == 2) + *val = (*val >> (8 * (where & 3))) & 0xffff; + + /* Disable DBI access */ + disable_dbi_access(app_reg); +} + +static void spear_dbi_write_reg(struct spear_pcie_gadget_config *config, + int where, int size, u32 val) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong va_address; + + /* Enable DBI access */ + enable_dbi_access(app_reg); + + va_address = (ulong)config->va_dbi_base + (where & ~0x3); + + if (size == 4) + writel(val, va_address); + else if (size == 2) + writew(val, va_address + (where & 2)); + else if (size == 1) + writeb(val, va_address + (where & 3)); + + /* Disable DBI access */ + disable_dbi_access(app_reg); +} + +#define PCI_FIND_CAP_TTL 48 + +static int pci_find_own_next_cap_ttl(struct spear_pcie_gadget_config *config, + u32 pos, int cap, int *ttl) +{ + u32 id; + + while ((*ttl)--) { + spear_dbi_read_reg(config, pos, 1, &pos); + if (pos < 0x40) + break; + pos &= ~3; + spear_dbi_read_reg(config, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static int pci_find_own_next_cap(struct spear_pcie_gadget_config *config, + u32 pos, int cap) +{ + int ttl = PCI_FIND_CAP_TTL; + + return pci_find_own_next_cap_ttl(config, pos, cap, &ttl); +} + +static int pci_find_own_cap_start(struct spear_pcie_gadget_config *config, + u8 hdr_type) +{ + u32 status; + + spear_dbi_read_reg(config, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + switch (hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + case PCI_HEADER_TYPE_BRIDGE: + return PCI_CAPABILITY_LIST; + case PCI_HEADER_TYPE_CARDBUS: + return PCI_CB_CAPABILITY_LIST; + default: + return 0; + } + + return 0; +} + +/* + * Tell if a device supports a given PCI capability. + * Returns the address of the requested capability structure within the + * device's PCI configuration space or 0 in case the device does not + * support it. Possible values for @cap: + * + * %PCI_CAP_ID_PM Power Management + * %PCI_CAP_ID_AGP Accelerated Graphics Port + * %PCI_CAP_ID_VPD Vital Product Data + * %PCI_CAP_ID_SLOTID Slot Identification + * %PCI_CAP_ID_MSI Message Signalled Interrupts + * %PCI_CAP_ID_CHSWP CompactPCI HotSwap + * %PCI_CAP_ID_PCIX PCI-X + * %PCI_CAP_ID_EXP PCI Express + */ +static int pci_find_own_capability(struct spear_pcie_gadget_config *config, + int cap) +{ + u32 pos; + u32 hdr_type; + + spear_dbi_read_reg(config, PCI_HEADER_TYPE, 1, &hdr_type); + + pos = pci_find_own_cap_start(config, hdr_type); + if (pos) + pos = pci_find_own_next_cap(config, pos, cap); + + return pos; +} + +static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id) +{ + return 0; +} + +/* + * configfs interfaces show/store functions + */ +static ssize_t pcie_gadget_show_link( + struct spear_pcie_gadget_config *config, + char *buf) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + + if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID)) + return sprintf(buf, "UP"); + else + return sprintf(buf, "DOWN"); +} + +static ssize_t pcie_gadget_store_link( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + + if (sysfs_streq(buf, "UP")) + writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID), + &app_reg->app_ctrl_0); + else if (sysfs_streq(buf, "DOWN")) + writel(readl(&app_reg->app_ctrl_0) + & ~(1 << APP_LTSSM_ENABLE_ID), + &app_reg->app_ctrl_0); + else + return -EINVAL; + return count; +} + +static ssize_t pcie_gadget_show_int_type( + struct spear_pcie_gadget_config *config, + char *buf) +{ + return sprintf(buf, "%s", config->int_type); +} + +static ssize_t pcie_gadget_store_int_type( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + u32 cap, vec, flags; + ulong vector; + + if (sysfs_streq(buf, "INTA")) + spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1); + + else if (sysfs_streq(buf, "MSI")) { + vector = config->requested_msi; + vec = 0; + while (vector > 1) { + vector /= 2; + vec++; + } + spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 0); + cap = pci_find_own_capability(config, PCI_CAP_ID_MSI); + spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags); + flags &= ~PCI_MSI_FLAGS_QMASK; + flags |= vec << 1; + spear_dbi_write_reg(config, cap + PCI_MSI_FLAGS, 1, flags); + } else + return -EINVAL; + + strcpy(config->int_type, buf); + + return count; +} + +static ssize_t pcie_gadget_show_no_of_msi( + struct spear_pcie_gadget_config *config, + char *buf) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + u32 cap, vec, flags; + ulong vector; + + if ((readl(&app_reg->msg_status) & (1 << CFG_MSI_EN_ID)) + != (1 << CFG_MSI_EN_ID)) + vector = 0; + else { + cap = pci_find_own_capability(config, PCI_CAP_ID_MSI); + spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags); + flags &= ~PCI_MSI_FLAGS_QSIZE; + vec = flags >> 4; + vector = 1; + while (vec--) + vector *= 2; + } + config->configured_msi = vector; + + return sprintf(buf, "%lu", vector); +} + +static ssize_t pcie_gadget_store_no_of_msi( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + int ret; + + ret = kstrtoul(buf, 0, &config->requested_msi); + if (ret) + return ret; + + if (config->requested_msi > 32) + config->requested_msi = 32; + + return count; +} + +static ssize_t pcie_gadget_store_inta( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong en; + int ret; + + ret = kstrtoul(buf, 0, &en); + if (ret) + return ret; + + if (en) + writel(readl(&app_reg->app_ctrl_0) | (1 << SYS_INT_ID), + &app_reg->app_ctrl_0); + else + writel(readl(&app_reg->app_ctrl_0) & ~(1 << SYS_INT_ID), + &app_reg->app_ctrl_0); + + return count; +} + +static ssize_t pcie_gadget_store_send_msi( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong vector; + u32 ven_msi; + int ret; + + ret = kstrtoul(buf, 0, &vector); + if (ret) + return ret; + + if (!config->configured_msi) + return -EINVAL; + + if (vector >= config->configured_msi) + return -EINVAL; + + ven_msi = readl(&app_reg->ven_msi_1); + ven_msi &= ~VEN_MSI_FUN_NUM_MASK; + ven_msi |= 0 << VEN_MSI_FUN_NUM_ID; + ven_msi &= ~VEN_MSI_TC_MASK; + ven_msi |= 0 << VEN_MSI_TC_ID; + ven_msi &= ~VEN_MSI_VECTOR_MASK; + ven_msi |= vector << VEN_MSI_VECTOR_ID; + + /* generating interrupt for msi vector */ + ven_msi |= VEN_MSI_REQ_EN; + writel(ven_msi, &app_reg->ven_msi_1); + udelay(1); + ven_msi &= ~VEN_MSI_REQ_EN; + writel(ven_msi, &app_reg->ven_msi_1); + + return count; +} + +static ssize_t pcie_gadget_show_vendor_id( + struct spear_pcie_gadget_config *config, + char *buf) +{ + u32 id; + + spear_dbi_read_reg(config, PCI_VENDOR_ID, 2, &id); + + return sprintf(buf, "%x", id); +} + +static ssize_t pcie_gadget_store_vendor_id( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + ulong id; + int ret; + + ret = kstrtoul(buf, 0, &id); + if (ret) + return ret; + + spear_dbi_write_reg(config, PCI_VENDOR_ID, 2, id); + + return count; +} + +static ssize_t pcie_gadget_show_device_id( + struct spear_pcie_gadget_config *config, + char *buf) +{ + u32 id; + + spear_dbi_read_reg(config, PCI_DEVICE_ID, 2, &id); + + return sprintf(buf, "%x", id); +} + +static ssize_t pcie_gadget_store_device_id( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + ulong id; + int ret; + + ret = kstrtoul(buf, 0, &id); + if (ret) + return ret; + + spear_dbi_write_reg(config, PCI_DEVICE_ID, 2, id); + + return count; +} + +static ssize_t pcie_gadget_show_bar0_size( + struct spear_pcie_gadget_config *config, + char *buf) +{ + return sprintf(buf, "%lx", config->bar0_size); +} + +static ssize_t pcie_gadget_store_bar0_size( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + ulong size; + u32 pos, pos1; + u32 no_of_bit = 0; + int ret; + + ret = kstrtoul(buf, 0, &size); + if (ret) + return ret; + + /* min bar size is 256 */ + if (size <= 0x100) + size = 0x100; + /* max bar size is 1MB*/ + else if (size >= 0x100000) + size = 0x100000; + else { + pos = 0; + pos1 = 0; + while (pos < 21) { + pos = find_next_bit((ulong *)&size, 21, pos); + if (pos != 21) + pos1 = pos + 1; + pos++; + no_of_bit++; + } + if (no_of_bit == 2) + pos1--; + + size = 1 << pos1; + } + config->bar0_size = size; + spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, size - 1); + + return count; +} + +static ssize_t pcie_gadget_show_bar0_address( + struct spear_pcie_gadget_config *config, + char *buf) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + + u32 address = readl(&app_reg->pim0_mem_addr_start); + + return sprintf(buf, "%x", address); +} + +static ssize_t pcie_gadget_store_bar0_address( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong address; + int ret; + + ret = kstrtoul(buf, 0, &address); + if (ret) + return ret; + + address &= ~(config->bar0_size - 1); + if (config->va_bar0_address) + iounmap(config->va_bar0_address); + config->va_bar0_address = ioremap(address, config->bar0_size); + if (!config->va_bar0_address) + return -ENOMEM; + + writel(address, &app_reg->pim0_mem_addr_start); + + return count; +} + +static ssize_t pcie_gadget_show_bar0_rw_offset( + struct spear_pcie_gadget_config *config, + char *buf) +{ + return sprintf(buf, "%lx", config->bar0_rw_offset); +} + +static ssize_t pcie_gadget_store_bar0_rw_offset( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + ulong offset; + int ret; + + ret = kstrtoul(buf, 0, &offset); + if (ret) + return ret; + + if (offset % 4) + return -EINVAL; + + config->bar0_rw_offset = offset; + + return count; +} + +static ssize_t pcie_gadget_show_bar0_data( + struct spear_pcie_gadget_config *config, + char *buf) +{ + ulong data; + + if (!config->va_bar0_address) + return -ENOMEM; + + data = readl((ulong)config->va_bar0_address + config->bar0_rw_offset); + + return sprintf(buf, "%lx", data); +} + +static ssize_t pcie_gadget_store_bar0_data( + struct spear_pcie_gadget_config *config, + const char *buf, size_t count) +{ + ulong data; + int ret; + + ret = kstrtoul(buf, 0, &data); + if (ret) + return ret; + + if (!config->va_bar0_address) + return -ENOMEM; + + writel(data, (ulong)config->va_bar0_address + config->bar0_rw_offset); + + return count; +} + +/* + * Attribute definitions. + */ + +#define PCIE_GADGET_TARGET_ATTR_RO(_name) \ +static struct pcie_gadget_target_attr pcie_gadget_target_##_name = \ + __CONFIGFS_ATTR(_name, S_IRUGO, pcie_gadget_show_##_name, NULL) + +#define PCIE_GADGET_TARGET_ATTR_WO(_name) \ +static struct pcie_gadget_target_attr pcie_gadget_target_##_name = \ + __CONFIGFS_ATTR(_name, S_IWUSR, NULL, pcie_gadget_store_##_name) + +#define PCIE_GADGET_TARGET_ATTR_RW(_name) \ +static struct pcie_gadget_target_attr pcie_gadget_target_##_name = \ + __CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, pcie_gadget_show_##_name, \ + pcie_gadget_store_##_name) +PCIE_GADGET_TARGET_ATTR_RW(link); +PCIE_GADGET_TARGET_ATTR_RW(int_type); +PCIE_GADGET_TARGET_ATTR_RW(no_of_msi); +PCIE_GADGET_TARGET_ATTR_WO(inta); +PCIE_GADGET_TARGET_ATTR_WO(send_msi); +PCIE_GADGET_TARGET_ATTR_RW(vendor_id); +PCIE_GADGET_TARGET_ATTR_RW(device_id); +PCIE_GADGET_TARGET_ATTR_RW(bar0_size); +PCIE_GADGET_TARGET_ATTR_RW(bar0_address); +PCIE_GADGET_TARGET_ATTR_RW(bar0_rw_offset); +PCIE_GADGET_TARGET_ATTR_RW(bar0_data); + +static struct configfs_attribute *pcie_gadget_target_attrs[] = { + &pcie_gadget_target_link.attr, + &pcie_gadget_target_int_type.attr, + &pcie_gadget_target_no_of_msi.attr, + &pcie_gadget_target_inta.attr, + &pcie_gadget_target_send_msi.attr, + &pcie_gadget_target_vendor_id.attr, + &pcie_gadget_target_device_id.attr, + &pcie_gadget_target_bar0_size.attr, + &pcie_gadget_target_bar0_address.attr, + &pcie_gadget_target_bar0_rw_offset.attr, + &pcie_gadget_target_bar0_data.attr, + NULL, +}; + +static struct pcie_gadget_target *to_target(struct config_item *item) +{ + return item ? + container_of(to_configfs_subsystem(to_config_group(item)), + struct pcie_gadget_target, subsys) : NULL; +} + +/* + * Item operations and type for pcie_gadget_target. + */ + +static ssize_t pcie_gadget_target_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *buf) +{ + ssize_t ret = -EINVAL; + struct pcie_gadget_target *target = to_target(item); + struct pcie_gadget_target_attr *t_attr = + container_of(attr, struct pcie_gadget_target_attr, attr); + + if (t_attr->show) + ret = t_attr->show(&target->config, buf); + return ret; +} + +static ssize_t pcie_gadget_target_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *buf, + size_t count) +{ + ssize_t ret = -EINVAL; + struct pcie_gadget_target *target = to_target(item); + struct pcie_gadget_target_attr *t_attr = + container_of(attr, struct pcie_gadget_target_attr, attr); + + if (t_attr->store) + ret = t_attr->store(&target->config, buf, count); + return ret; +} + +static struct configfs_item_operations pcie_gadget_target_item_ops = { + .show_attribute = pcie_gadget_target_attr_show, + .store_attribute = pcie_gadget_target_attr_store, +}; + +static struct config_item_type pcie_gadget_target_type = { + .ct_attrs = pcie_gadget_target_attrs, + .ct_item_ops = &pcie_gadget_target_item_ops, + .ct_owner = THIS_MODULE, +}; + +static void spear13xx_pcie_device_init(struct spear_pcie_gadget_config *config) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + + /*setup registers for outbound translation */ + + writel(config->base, &app_reg->in0_mem_addr_start); + writel(app_reg->in0_mem_addr_start + IN0_MEM_SIZE, + &app_reg->in0_mem_addr_limit); + writel(app_reg->in0_mem_addr_limit + 1, &app_reg->in1_mem_addr_start); + writel(app_reg->in1_mem_addr_start + IN1_MEM_SIZE, + &app_reg->in1_mem_addr_limit); + writel(app_reg->in1_mem_addr_limit + 1, &app_reg->in_io_addr_start); + writel(app_reg->in_io_addr_start + IN_IO_SIZE, + &app_reg->in_io_addr_limit); + writel(app_reg->in_io_addr_limit + 1, &app_reg->in_cfg0_addr_start); + writel(app_reg->in_cfg0_addr_start + IN_CFG0_SIZE, + &app_reg->in_cfg0_addr_limit); + writel(app_reg->in_cfg0_addr_limit + 1, &app_reg->in_cfg1_addr_start); + writel(app_reg->in_cfg1_addr_start + IN_CFG1_SIZE, + &app_reg->in_cfg1_addr_limit); + writel(app_reg->in_cfg1_addr_limit + 1, &app_reg->in_msg_addr_start); + writel(app_reg->in_msg_addr_start + IN_MSG_SIZE, + &app_reg->in_msg_addr_limit); + + writel(app_reg->in0_mem_addr_start, &app_reg->pom0_mem_addr_start); + writel(app_reg->in1_mem_addr_start, &app_reg->pom1_mem_addr_start); + writel(app_reg->in_io_addr_start, &app_reg->pom_io_addr_start); + + /*setup registers for inbound translation */ + + /* Keep AORAM mapped at BAR0 as default */ + config->bar0_size = INBOUND_ADDR_MASK + 1; + spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, INBOUND_ADDR_MASK); + spear_dbi_write_reg(config, PCI_BASE_ADDRESS_0, 4, 0xC); + config->va_bar0_address = ioremap(SPEAR13XX_SYSRAM1_BASE, + config->bar0_size); + + writel(SPEAR13XX_SYSRAM1_BASE, &app_reg->pim0_mem_addr_start); + writel(0, &app_reg->pim1_mem_addr_start); + writel(INBOUND_ADDR_MASK + 1, &app_reg->mem0_addr_offset_limit); + + writel(0x0, &app_reg->pim_io_addr_start); + writel(0x0, &app_reg->pim_io_addr_start); + writel(0x0, &app_reg->pim_rom_addr_start); + + writel(DEVICE_TYPE_EP | (1 << MISCTRL_EN_ID) + | ((u32)1 << REG_TRANSLATION_ENABLE), + &app_reg->app_ctrl_0); + /* disable all rx interrupts */ + writel(0, &app_reg->int_mask); + + /* Select INTA as default*/ + spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1); +} + +static int spear_pcie_gadget_probe(struct platform_device *pdev) +{ + struct resource *res0, *res1; + unsigned int status = 0; + int irq; + struct clk *clk; + static struct pcie_gadget_target *target; + struct spear_pcie_gadget_config *config; + struct config_item *cg_item; + struct configfs_subsystem *subsys; + + target = devm_kzalloc(&pdev->dev, sizeof(*target), GFP_KERNEL); + if (!target) { + dev_err(&pdev->dev, "out of memory\n"); + return -ENOMEM; + } + + cg_item = &target->subsys.su_group.cg_item; + sprintf(cg_item->ci_namebuf, "pcie_gadget.%d", pdev->id); + cg_item->ci_type = &pcie_gadget_target_type; + config = &target->config; + + /* get resource for application registers*/ + res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0); + config->va_app_base = devm_ioremap_resource(&pdev->dev, res0); + if (IS_ERR(config->va_app_base)) { + dev_err(&pdev->dev, "ioremap fail\n"); + return PTR_ERR(config->va_app_base); + } + + /* get resource for dbi registers*/ + res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); + config->base = (void __iomem *)res1->start; + + config->va_dbi_base = devm_ioremap_resource(&pdev->dev, res1); + if (IS_ERR(config->va_dbi_base)) { + dev_err(&pdev->dev, "ioremap fail\n"); + return PTR_ERR(config->va_dbi_base); + } + + platform_set_drvdata(pdev, target); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no update irq?\n"); + return irq; + } + + status = devm_request_irq(&pdev->dev, irq, spear_pcie_gadget_irq, + 0, pdev->name, NULL); + if (status) { + dev_err(&pdev->dev, + "pcie gadget interrupt IRQ%d already claimed\n", irq); + return status; + } + + /* Register configfs hooks */ + subsys = &target->subsys; + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + status = configfs_register_subsystem(subsys); + if (status) + return status; + + /* + * init basic pcie application registers + * do not enable clock if it is PCIE0.Ideally , all controller should + * have been independent from others with respect to clock. But PCIE1 + * and 2 depends on PCIE0.So PCIE0 clk is provided during board init. + */ + if (pdev->id == 1) { + /* + * Ideally CFG Clock should have been also enabled here. But + * it is done currently during board init routne + */ + clk = clk_get_sys("pcie1", NULL); + if (IS_ERR(clk)) { + pr_err("%s:couldn't get clk for pcie1\n", __func__); + return PTR_ERR(clk); + } + status = clk_enable(clk); + if (status) { + pr_err("%s:couldn't enable clk for pcie1\n", __func__); + return status; + } + } else if (pdev->id == 2) { + /* + * Ideally CFG Clock should have been also enabled here. But + * it is done currently during board init routne + */ + clk = clk_get_sys("pcie2", NULL); + if (IS_ERR(clk)) { + pr_err("%s:couldn't get clk for pcie2\n", __func__); + return PTR_ERR(clk); + } + status = clk_enable(clk); + if (status) { + pr_err("%s:couldn't enable clk for pcie2\n", __func__); + return status; + } + } + spear13xx_pcie_device_init(config); + + return 0; +} + +static int spear_pcie_gadget_remove(struct platform_device *pdev) +{ + static struct pcie_gadget_target *target; + + target = platform_get_drvdata(pdev); + + configfs_unregister_subsystem(&target->subsys); + + return 0; +} + +static void spear_pcie_gadget_shutdown(struct platform_device *pdev) +{ +} + +static struct platform_driver spear_pcie_gadget_driver = { + .probe = spear_pcie_gadget_probe, + .remove = spear_pcie_gadget_remove, + .shutdown = spear_pcie_gadget_shutdown, + .driver = { + .name = "pcie-gadget-spear", + .bus = &platform_bus_type + }, +}; + +module_platform_driver(spear_pcie_gadget_driver); + +MODULE_ALIAS("platform:pcie-gadget-spear"); +MODULE_AUTHOR("Pratyush Anand"); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/sram.c b/kernel/drivers/misc/sram.c new file mode 100644 index 000000000..eeaaf5fca --- /dev/null +++ b/kernel/drivers/misc/sram.c @@ -0,0 +1,239 @@ +/* + * Generic on-chip SRAM allocation driver + * + * Copyright (C) 2012 Philipp Zabel, Pengutronix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/list.h> +#include <linux/list_sort.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/genalloc.h> + +#define SRAM_GRANULARITY 32 + +struct sram_dev { + struct gen_pool *pool; + struct clk *clk; +}; + +struct sram_reserve { + struct list_head list; + u32 start; + u32 size; +}; + +static int sram_reserve_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct sram_reserve *ra = list_entry(a, struct sram_reserve, list); + struct sram_reserve *rb = list_entry(b, struct sram_reserve, list); + + return ra->start - rb->start; +} + +static int sram_probe(struct platform_device *pdev) +{ + void __iomem *virt_base; + struct sram_dev *sram; + struct resource *res; + struct device_node *np = pdev->dev.of_node, *child; + unsigned long size, cur_start, cur_size; + struct sram_reserve *rblocks, *block; + struct list_head reserve_list; + unsigned int nblocks; + int ret; + + INIT_LIST_HEAD(&reserve_list); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "found no memory resource\n"); + return -EINVAL; + } + + size = resource_size(res); + + if (!devm_request_mem_region(&pdev->dev, + res->start, size, pdev->name)) { + dev_err(&pdev->dev, "could not request region for resource\n"); + return -EBUSY; + } + + virt_base = devm_ioremap_wc(&pdev->dev, res->start, size); + if (IS_ERR(virt_base)) + return PTR_ERR(virt_base); + + sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL); + if (!sram) + return -ENOMEM; + + sram->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(sram->clk)) + sram->clk = NULL; + else + clk_prepare_enable(sram->clk); + + sram->pool = devm_gen_pool_create(&pdev->dev, ilog2(SRAM_GRANULARITY), -1); + if (!sram->pool) + return -ENOMEM; + + /* + * We need an additional block to mark the end of the memory region + * after the reserved blocks from the dt are processed. + */ + nblocks = (np) ? of_get_available_child_count(np) + 1 : 1; + rblocks = kmalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL); + if (!rblocks) { + ret = -ENOMEM; + goto err_alloc; + } + + block = &rblocks[0]; + for_each_available_child_of_node(np, child) { + struct resource child_res; + + ret = of_address_to_resource(child, 0, &child_res); + if (ret < 0) { + dev_err(&pdev->dev, + "could not get address for node %s\n", + child->full_name); + goto err_chunks; + } + + if (child_res.start < res->start || child_res.end > res->end) { + dev_err(&pdev->dev, + "reserved block %s outside the sram area\n", + child->full_name); + ret = -EINVAL; + goto err_chunks; + } + + block->start = child_res.start - res->start; + block->size = resource_size(&child_res); + list_add_tail(&block->list, &reserve_list); + + dev_dbg(&pdev->dev, "found reserved block 0x%x-0x%x\n", + block->start, + block->start + block->size); + + block++; + } + + /* the last chunk marks the end of the region */ + rblocks[nblocks - 1].start = size; + rblocks[nblocks - 1].size = 0; + list_add_tail(&rblocks[nblocks - 1].list, &reserve_list); + + list_sort(NULL, &reserve_list, sram_reserve_cmp); + + cur_start = 0; + + list_for_each_entry(block, &reserve_list, list) { + /* can only happen if sections overlap */ + if (block->start < cur_start) { + dev_err(&pdev->dev, + "block at 0x%x starts after current offset 0x%lx\n", + block->start, cur_start); + ret = -EINVAL; + goto err_chunks; + } + + /* current start is in a reserved block, so continue after it */ + if (block->start == cur_start) { + cur_start = block->start + block->size; + continue; + } + + /* + * allocate the space between the current starting + * address and the following reserved block, or the + * end of the region. + */ + cur_size = block->start - cur_start; + + dev_dbg(&pdev->dev, "adding chunk 0x%lx-0x%lx\n", + cur_start, cur_start + cur_size); + ret = gen_pool_add_virt(sram->pool, + (unsigned long)virt_base + cur_start, + res->start + cur_start, cur_size, -1); + if (ret < 0) + goto err_chunks; + + /* next allocation after this reserved block */ + cur_start = block->start + block->size; + } + + kfree(rblocks); + + platform_set_drvdata(pdev, sram); + + dev_dbg(&pdev->dev, "SRAM pool: %ld KiB @ 0x%p\n", size / 1024, virt_base); + + return 0; + +err_chunks: + kfree(rblocks); +err_alloc: + if (sram->clk) + clk_disable_unprepare(sram->clk); + return ret; +} + +static int sram_remove(struct platform_device *pdev) +{ + struct sram_dev *sram = platform_get_drvdata(pdev); + + if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool)) + dev_dbg(&pdev->dev, "removed while SRAM allocated\n"); + + if (sram->clk) + clk_disable_unprepare(sram->clk); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id sram_dt_ids[] = { + { .compatible = "mmio-sram" }, + {} +}; +#endif + +static struct platform_driver sram_driver = { + .driver = { + .name = "sram", + .of_match_table = of_match_ptr(sram_dt_ids), + }, + .probe = sram_probe, + .remove = sram_remove, +}; + +static int __init sram_init(void) +{ + return platform_driver_register(&sram_driver); +} + +postcore_initcall(sram_init); diff --git a/kernel/drivers/misc/ti-st/Kconfig b/kernel/drivers/misc/ti-st/Kconfig new file mode 100644 index 000000000..f34dcc514 --- /dev/null +++ b/kernel/drivers/misc/ti-st/Kconfig @@ -0,0 +1,17 @@ +# +# TI's shared transport line discipline and the protocol +# drivers (BT, FM and GPS) +# +menu "Texas Instruments shared transport line discipline" +config TI_ST + tristate "Shared transport core driver" + depends on NET && GPIOLIB && TTY + select FW_LOADER + help + This enables the shared transport core driver for TI + BT / FM and GPS combo chips. This enables protocol drivers + to register themselves with core and send data, the responses + are returned to relevant protocol drivers based on their + packet types. + +endmenu diff --git a/kernel/drivers/misc/ti-st/Makefile b/kernel/drivers/misc/ti-st/Makefile new file mode 100644 index 000000000..78d7ebb14 --- /dev/null +++ b/kernel/drivers/misc/ti-st/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for TI's shared transport line discipline +# and its protocol drivers (BT, FM, GPS) +# +obj-$(CONFIG_TI_ST) += st_drv.o +st_drv-objs := st_core.o st_kim.o st_ll.o diff --git a/kernel/drivers/misc/ti-st/st_core.c b/kernel/drivers/misc/ti-st/st_core.c new file mode 100644 index 000000000..c8c6a3630 --- /dev/null +++ b/kernel/drivers/misc/ti-st/st_core.c @@ -0,0 +1,909 @@ +/* + * Shared Transport Line discipline driver Core + * This hooks up ST KIM driver and ST LL driver + * Copyright (C) 2009-2010 Texas Instruments + * Author: Pavan Savoy <pavan_savoy@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) "(stc): " fmt +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/tty.h> + +#include <linux/seq_file.h> +#include <linux/skbuff.h> + +#include <linux/ti_wilink_st.h> + +extern void st_kim_recv(void *, const unsigned char *, long); +void st_int_recv(void *, const unsigned char *, long); +/* function pointer pointing to either, + * st_kim_recv during registration to receive fw download responses + * st_int_recv after registration to receive proto stack responses + */ +static void (*st_recv) (void *, const unsigned char *, long); + +/********************************************************************/ +static void add_channel_to_table(struct st_data_s *st_gdata, + struct st_proto_s *new_proto) +{ + pr_info("%s: id %d\n", __func__, new_proto->chnl_id); + /* list now has the channel id as index itself */ + st_gdata->list[new_proto->chnl_id] = new_proto; + st_gdata->is_registered[new_proto->chnl_id] = true; +} + +static void remove_channel_from_table(struct st_data_s *st_gdata, + struct st_proto_s *proto) +{ + pr_info("%s: id %d\n", __func__, proto->chnl_id); +/* st_gdata->list[proto->chnl_id] = NULL; */ + st_gdata->is_registered[proto->chnl_id] = false; +} + +/* + * called from KIM during firmware download. + * + * This is a wrapper function to tty->ops->write_room. + * It returns number of free space available in + * uart tx buffer. + */ +int st_get_uart_wr_room(struct st_data_s *st_gdata) +{ + struct tty_struct *tty; + if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) { + pr_err("tty unavailable to perform write"); + return -1; + } + tty = st_gdata->tty; + return tty->ops->write_room(tty); +} + +/* can be called in from + * -- KIM (during fw download) + * -- ST Core (during st_write) + * + * This is the internal write function - a wrapper + * to tty->ops->write + */ +int st_int_write(struct st_data_s *st_gdata, + const unsigned char *data, int count) +{ + struct tty_struct *tty; + if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) { + pr_err("tty unavailable to perform write"); + return -EINVAL; + } + tty = st_gdata->tty; +#ifdef VERBOSE + print_hex_dump(KERN_DEBUG, "<out<", DUMP_PREFIX_NONE, + 16, 1, data, count, 0); +#endif + return tty->ops->write(tty, data, count); + +} + +/* + * push the skb received to relevant + * protocol stacks + */ +static void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata) +{ + pr_debug(" %s(prot:%d) ", __func__, chnl_id); + + if (unlikely + (st_gdata == NULL || st_gdata->rx_skb == NULL + || st_gdata->is_registered[chnl_id] == false)) { + pr_err("chnl_id %d not registered, no data to send?", + chnl_id); + kfree_skb(st_gdata->rx_skb); + return; + } + /* this cannot fail + * this shouldn't take long + * - should be just skb_queue_tail for the + * protocol stack driver + */ + if (likely(st_gdata->list[chnl_id]->recv != NULL)) { + if (unlikely + (st_gdata->list[chnl_id]->recv + (st_gdata->list[chnl_id]->priv_data, st_gdata->rx_skb) + != 0)) { + pr_err(" proto stack %d's ->recv failed", chnl_id); + kfree_skb(st_gdata->rx_skb); + return; + } + } else { + pr_err(" proto stack %d's ->recv null", chnl_id); + kfree_skb(st_gdata->rx_skb); + } + return; +} + +/** + * st_reg_complete - + * to call registration complete callbacks + * of all protocol stack drivers + * This function is being called with spin lock held, protocol drivers are + * only expected to complete their waits and do nothing more than that. + */ +static void st_reg_complete(struct st_data_s *st_gdata, char err) +{ + unsigned char i = 0; + pr_info(" %s ", __func__); + for (i = 0; i < ST_MAX_CHANNELS; i++) { + if (likely(st_gdata != NULL && + st_gdata->is_registered[i] == true && + st_gdata->list[i]->reg_complete_cb != NULL)) { + st_gdata->list[i]->reg_complete_cb + (st_gdata->list[i]->priv_data, err); + pr_info("protocol %d's cb sent %d\n", i, err); + if (err) { /* cleanup registered protocol */ + st_gdata->is_registered[i] = false; + if (st_gdata->protos_registered) + st_gdata->protos_registered--; + } + } + } +} + +static inline int st_check_data_len(struct st_data_s *st_gdata, + unsigned char chnl_id, int len) +{ + int room = skb_tailroom(st_gdata->rx_skb); + + pr_debug("len %d room %d", len, room); + + if (!len) { + /* Received packet has only packet header and + * has zero length payload. So, ask ST CORE to + * forward the packet to protocol driver (BT/FM/GPS) + */ + st_send_frame(chnl_id, st_gdata); + + } else if (len > room) { + /* Received packet's payload length is larger. + * We can't accommodate it in created skb. + */ + pr_err("Data length is too large len %d room %d", len, + room); + kfree_skb(st_gdata->rx_skb); + } else { + /* Packet header has non-zero payload length and + * we have enough space in created skb. Lets read + * payload data */ + st_gdata->rx_state = ST_W4_DATA; + st_gdata->rx_count = len; + return len; + } + + /* Change ST state to continue to process next + * packet */ + st_gdata->rx_state = ST_W4_PACKET_TYPE; + st_gdata->rx_skb = NULL; + st_gdata->rx_count = 0; + st_gdata->rx_chnl = 0; + + return 0; +} + +/** + * st_wakeup_ack - internal function for action when wake-up ack + * received + */ +static inline void st_wakeup_ack(struct st_data_s *st_gdata, + unsigned char cmd) +{ + struct sk_buff *waiting_skb; + unsigned long flags = 0; + + spin_lock_irqsave(&st_gdata->lock, flags); + /* de-Q from waitQ and Q in txQ now that the + * chip is awake + */ + while ((waiting_skb = skb_dequeue(&st_gdata->tx_waitq))) + skb_queue_tail(&st_gdata->txq, waiting_skb); + + /* state forwarded to ST LL */ + st_ll_sleep_state(st_gdata, (unsigned long)cmd); + spin_unlock_irqrestore(&st_gdata->lock, flags); + + /* wake up to send the recently copied skbs from waitQ */ + st_tx_wakeup(st_gdata); +} + +/** + * st_int_recv - ST's internal receive function. + * Decodes received RAW data and forwards to corresponding + * client drivers (Bluetooth,FM,GPS..etc). + * This can receive various types of packets, + * HCI-Events, ACL, SCO, 4 types of HCI-LL PM packets + * CH-8 packets from FM, CH-9 packets from GPS cores. + */ +void st_int_recv(void *disc_data, + const unsigned char *data, long count) +{ + char *ptr; + struct st_proto_s *proto; + unsigned short payload_len = 0; + int len = 0; + unsigned char type = 0; + unsigned char *plen; + struct st_data_s *st_gdata = (struct st_data_s *)disc_data; + unsigned long flags; + + ptr = (char *)data; + /* tty_receive sent null ? */ + if (unlikely(ptr == NULL) || (st_gdata == NULL)) { + pr_err(" received null from TTY "); + return; + } + + pr_debug("count %ld rx_state %ld" + "rx_count %ld", count, st_gdata->rx_state, + st_gdata->rx_count); + + spin_lock_irqsave(&st_gdata->lock, flags); + /* Decode received bytes here */ + while (count) { + if (st_gdata->rx_count) { + len = min_t(unsigned int, st_gdata->rx_count, count); + memcpy(skb_put(st_gdata->rx_skb, len), ptr, len); + st_gdata->rx_count -= len; + count -= len; + ptr += len; + + if (st_gdata->rx_count) + continue; + + /* Check ST RX state machine , where are we? */ + switch (st_gdata->rx_state) { + /* Waiting for complete packet ? */ + case ST_W4_DATA: + pr_debug("Complete pkt received"); + /* Ask ST CORE to forward + * the packet to protocol driver */ + st_send_frame(st_gdata->rx_chnl, st_gdata); + + st_gdata->rx_state = ST_W4_PACKET_TYPE; + st_gdata->rx_skb = NULL; + continue; + /* parse the header to know details */ + case ST_W4_HEADER: + proto = st_gdata->list[st_gdata->rx_chnl]; + plen = + &st_gdata->rx_skb->data + [proto->offset_len_in_hdr]; + pr_debug("plen pointing to %x\n", *plen); + if (proto->len_size == 1)/* 1 byte len field */ + payload_len = *(unsigned char *)plen; + else if (proto->len_size == 2) + payload_len = + __le16_to_cpu(*(unsigned short *)plen); + else + pr_info("%s: invalid length " + "for id %d\n", + __func__, proto->chnl_id); + st_check_data_len(st_gdata, proto->chnl_id, + payload_len); + pr_debug("off %d, pay len %d\n", + proto->offset_len_in_hdr, payload_len); + continue; + } /* end of switch rx_state */ + } + + /* end of if rx_count */ + /* Check first byte of packet and identify module + * owner (BT/FM/GPS) */ + switch (*ptr) { + case LL_SLEEP_IND: + case LL_SLEEP_ACK: + case LL_WAKE_UP_IND: + pr_debug("PM packet"); + /* this takes appropriate action based on + * sleep state received -- + */ + st_ll_sleep_state(st_gdata, *ptr); + /* if WAKEUP_IND collides copy from waitq to txq + * and assume chip awake + */ + spin_unlock_irqrestore(&st_gdata->lock, flags); + if (st_ll_getstate(st_gdata) == ST_LL_AWAKE) + st_wakeup_ack(st_gdata, LL_WAKE_UP_ACK); + spin_lock_irqsave(&st_gdata->lock, flags); + + ptr++; + count--; + continue; + case LL_WAKE_UP_ACK: + pr_debug("PM packet"); + + spin_unlock_irqrestore(&st_gdata->lock, flags); + /* wake up ack received */ + st_wakeup_ack(st_gdata, *ptr); + spin_lock_irqsave(&st_gdata->lock, flags); + + ptr++; + count--; + continue; + /* Unknow packet? */ + default: + type = *ptr; + + /* Default case means non-HCILL packets, + * possibilities are packets for: + * (a) valid protocol - Supported Protocols within + * the ST_MAX_CHANNELS. + * (b) registered protocol - Checked by + * "st_gdata->list[type] == NULL)" are supported + * protocols only. + * Rules out any invalid protocol and + * unregistered protocols with channel ID < 16. + */ + + if ((type >= ST_MAX_CHANNELS) || + (st_gdata->list[type] == NULL)) { + pr_err("chip/interface misbehavior: " + "dropping frame starting " + "with 0x%02x\n", type); + goto done; + } + + st_gdata->rx_skb = alloc_skb( + st_gdata->list[type]->max_frame_size, + GFP_ATOMIC); + if (st_gdata->rx_skb == NULL) { + pr_err("out of memory: dropping\n"); + goto done; + } + + skb_reserve(st_gdata->rx_skb, + st_gdata->list[type]->reserve); + /* next 2 required for BT only */ + st_gdata->rx_skb->cb[0] = type; /*pkt_type*/ + st_gdata->rx_skb->cb[1] = 0; /*incoming*/ + st_gdata->rx_chnl = *ptr; + st_gdata->rx_state = ST_W4_HEADER; + st_gdata->rx_count = st_gdata->list[type]->hdr_len; + pr_debug("rx_count %ld\n", st_gdata->rx_count); + }; + ptr++; + count--; + } +done: + spin_unlock_irqrestore(&st_gdata->lock, flags); + pr_debug("done %s", __func__); + return; +} + +/** + * st_int_dequeue - internal de-Q function. + * If the previous data set was not written + * completely, return that skb which has the pending data. + * In normal cases, return top of txq. + */ +static struct sk_buff *st_int_dequeue(struct st_data_s *st_gdata) +{ + struct sk_buff *returning_skb; + + pr_debug("%s", __func__); + if (st_gdata->tx_skb != NULL) { + returning_skb = st_gdata->tx_skb; + st_gdata->tx_skb = NULL; + return returning_skb; + } + return skb_dequeue(&st_gdata->txq); +} + +/** + * st_int_enqueue - internal Q-ing function. + * Will either Q the skb to txq or the tx_waitq + * depending on the ST LL state. + * If the chip is asleep, then Q it onto waitq and + * wakeup the chip. + * txq and waitq needs protection since the other contexts + * may be sending data, waking up chip. + */ +static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb) +{ + unsigned long flags = 0; + + pr_debug("%s", __func__); + spin_lock_irqsave(&st_gdata->lock, flags); + + switch (st_ll_getstate(st_gdata)) { + case ST_LL_AWAKE: + pr_debug("ST LL is AWAKE, sending normally"); + skb_queue_tail(&st_gdata->txq, skb); + break; + case ST_LL_ASLEEP_TO_AWAKE: + skb_queue_tail(&st_gdata->tx_waitq, skb); + break; + case ST_LL_AWAKE_TO_ASLEEP: + pr_err("ST LL is illegal state(%ld)," + "purging received skb.", st_ll_getstate(st_gdata)); + kfree_skb(skb); + break; + case ST_LL_ASLEEP: + skb_queue_tail(&st_gdata->tx_waitq, skb); + st_ll_wakeup(st_gdata); + break; + default: + pr_err("ST LL is illegal state(%ld)," + "purging received skb.", st_ll_getstate(st_gdata)); + kfree_skb(skb); + break; + } + + spin_unlock_irqrestore(&st_gdata->lock, flags); + pr_debug("done %s", __func__); + return; +} + +/* + * internal wakeup function + * called from either + * - TTY layer when write's finished + * - st_write (in context of the protocol stack) + */ +void st_tx_wakeup(struct st_data_s *st_data) +{ + struct sk_buff *skb; + unsigned long flags; /* for irq save flags */ + pr_debug("%s", __func__); + /* check for sending & set flag sending here */ + if (test_and_set_bit(ST_TX_SENDING, &st_data->tx_state)) { + pr_debug("ST already sending"); + /* keep sending */ + set_bit(ST_TX_WAKEUP, &st_data->tx_state); + return; + /* TX_WAKEUP will be checked in another + * context + */ + } + do { /* come back if st_tx_wakeup is set */ + /* woke-up to write */ + clear_bit(ST_TX_WAKEUP, &st_data->tx_state); + while ((skb = st_int_dequeue(st_data))) { + int len; + spin_lock_irqsave(&st_data->lock, flags); + /* enable wake-up from TTY */ + set_bit(TTY_DO_WRITE_WAKEUP, &st_data->tty->flags); + len = st_int_write(st_data, skb->data, skb->len); + skb_pull(skb, len); + /* if skb->len = len as expected, skb->len=0 */ + if (skb->len) { + /* would be the next skb to be sent */ + st_data->tx_skb = skb; + spin_unlock_irqrestore(&st_data->lock, flags); + break; + } + kfree_skb(skb); + spin_unlock_irqrestore(&st_data->lock, flags); + } + /* if wake-up is set in another context- restart sending */ + } while (test_bit(ST_TX_WAKEUP, &st_data->tx_state)); + + /* clear flag sending */ + clear_bit(ST_TX_SENDING, &st_data->tx_state); +} + +/********************************************************************/ +/* functions called from ST KIM +*/ +void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf) +{ + seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n", + st_gdata->protos_registered, + st_gdata->is_registered[0x04] == true ? 'R' : 'U', + st_gdata->is_registered[0x08] == true ? 'R' : 'U', + st_gdata->is_registered[0x09] == true ? 'R' : 'U'); +} + +/********************************************************************/ +/* + * functions called from protocol stack drivers + * to be EXPORT-ed + */ +long st_register(struct st_proto_s *new_proto) +{ + struct st_data_s *st_gdata; + long err = 0; + unsigned long flags = 0; + + st_kim_ref(&st_gdata, 0); + if (st_gdata == NULL || new_proto == NULL || new_proto->recv == NULL + || new_proto->reg_complete_cb == NULL) { + pr_err("gdata/new_proto/recv or reg_complete_cb not ready"); + return -EINVAL; + } + + if (new_proto->chnl_id >= ST_MAX_CHANNELS) { + pr_err("chnl_id %d not supported", new_proto->chnl_id); + return -EPROTONOSUPPORT; + } + + if (st_gdata->is_registered[new_proto->chnl_id] == true) { + pr_err("chnl_id %d already registered", new_proto->chnl_id); + return -EALREADY; + } + + /* can be from process context only */ + spin_lock_irqsave(&st_gdata->lock, flags); + + if (test_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state)) { + pr_info(" ST_REG_IN_PROGRESS:%d ", new_proto->chnl_id); + /* fw download in progress */ + + add_channel_to_table(st_gdata, new_proto); + st_gdata->protos_registered++; + new_proto->write = st_write; + + set_bit(ST_REG_PENDING, &st_gdata->st_state); + spin_unlock_irqrestore(&st_gdata->lock, flags); + return -EINPROGRESS; + } else if (st_gdata->protos_registered == ST_EMPTY) { + pr_info(" chnl_id list empty :%d ", new_proto->chnl_id); + set_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state); + st_recv = st_kim_recv; + + /* enable the ST LL - to set default chip state */ + st_ll_enable(st_gdata); + + /* release lock previously held - re-locked below */ + spin_unlock_irqrestore(&st_gdata->lock, flags); + + /* this may take a while to complete + * since it involves BT fw download + */ + err = st_kim_start(st_gdata->kim_data); + if (err != 0) { + clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state); + if ((st_gdata->protos_registered != ST_EMPTY) && + (test_bit(ST_REG_PENDING, &st_gdata->st_state))) { + pr_err(" KIM failure complete callback "); + spin_lock_irqsave(&st_gdata->lock, flags); + st_reg_complete(st_gdata, err); + spin_unlock_irqrestore(&st_gdata->lock, flags); + clear_bit(ST_REG_PENDING, &st_gdata->st_state); + } + return -EINVAL; + } + + spin_lock_irqsave(&st_gdata->lock, flags); + + clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state); + st_recv = st_int_recv; + + /* this is where all pending registration + * are signalled to be complete by calling callback functions + */ + if ((st_gdata->protos_registered != ST_EMPTY) && + (test_bit(ST_REG_PENDING, &st_gdata->st_state))) { + pr_debug(" call reg complete callback "); + st_reg_complete(st_gdata, 0); + } + clear_bit(ST_REG_PENDING, &st_gdata->st_state); + + /* check for already registered once more, + * since the above check is old + */ + if (st_gdata->is_registered[new_proto->chnl_id] == true) { + pr_err(" proto %d already registered ", + new_proto->chnl_id); + spin_unlock_irqrestore(&st_gdata->lock, flags); + return -EALREADY; + } + + add_channel_to_table(st_gdata, new_proto); + st_gdata->protos_registered++; + new_proto->write = st_write; + spin_unlock_irqrestore(&st_gdata->lock, flags); + return err; + } + /* if fw is already downloaded & new stack registers protocol */ + else { + add_channel_to_table(st_gdata, new_proto); + st_gdata->protos_registered++; + new_proto->write = st_write; + + /* lock already held before entering else */ + spin_unlock_irqrestore(&st_gdata->lock, flags); + return err; + } + pr_debug("done %s(%d) ", __func__, new_proto->chnl_id); +} +EXPORT_SYMBOL_GPL(st_register); + +/* to unregister a protocol - + * to be called from protocol stack driver + */ +long st_unregister(struct st_proto_s *proto) +{ + long err = 0; + unsigned long flags = 0; + struct st_data_s *st_gdata; + + pr_debug("%s: %d ", __func__, proto->chnl_id); + + st_kim_ref(&st_gdata, 0); + if (!st_gdata || proto->chnl_id >= ST_MAX_CHANNELS) { + pr_err(" chnl_id %d not supported", proto->chnl_id); + return -EPROTONOSUPPORT; + } + + spin_lock_irqsave(&st_gdata->lock, flags); + + if (st_gdata->is_registered[proto->chnl_id] == false) { + pr_err(" chnl_id %d not registered", proto->chnl_id); + spin_unlock_irqrestore(&st_gdata->lock, flags); + return -EPROTONOSUPPORT; + } + + if (st_gdata->protos_registered) + st_gdata->protos_registered--; + + remove_channel_from_table(st_gdata, proto); + spin_unlock_irqrestore(&st_gdata->lock, flags); + + if ((st_gdata->protos_registered == ST_EMPTY) && + (!test_bit(ST_REG_PENDING, &st_gdata->st_state))) { + pr_info(" all chnl_ids unregistered "); + + /* stop traffic on tty */ + if (st_gdata->tty) { + tty_ldisc_flush(st_gdata->tty); + stop_tty(st_gdata->tty); + } + + /* all chnl_ids now unregistered */ + st_kim_stop(st_gdata->kim_data); + /* disable ST LL */ + st_ll_disable(st_gdata); + } + return err; +} + +/* + * called in protocol stack drivers + * via the write function pointer + */ +long st_write(struct sk_buff *skb) +{ + struct st_data_s *st_gdata; + long len; + + st_kim_ref(&st_gdata, 0); + if (unlikely(skb == NULL || st_gdata == NULL + || st_gdata->tty == NULL)) { + pr_err("data/tty unavailable to perform write"); + return -EINVAL; + } + + pr_debug("%d to be written", skb->len); + len = skb->len; + + /* st_ll to decide where to enqueue the skb */ + st_int_enqueue(st_gdata, skb); + /* wake up */ + st_tx_wakeup(st_gdata); + + /* return number of bytes written */ + return len; +} + +/* for protocols making use of shared transport */ +EXPORT_SYMBOL_GPL(st_unregister); + +/********************************************************************/ +/* + * functions called from TTY layer + */ +static int st_tty_open(struct tty_struct *tty) +{ + int err = 0; + struct st_data_s *st_gdata; + pr_info("%s ", __func__); + + st_kim_ref(&st_gdata, 0); + st_gdata->tty = tty; + tty->disc_data = st_gdata; + + /* don't do an wakeup for now */ + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + + /* mem already allocated + */ + tty->receive_room = 65536; + /* Flush any pending characters in the driver and discipline. */ + tty_ldisc_flush(tty); + tty_driver_flush_buffer(tty); + /* + * signal to UIM via KIM that - + * installation of N_TI_WL ldisc is complete + */ + st_kim_complete(st_gdata->kim_data); + pr_debug("done %s", __func__); + return err; +} + +static void st_tty_close(struct tty_struct *tty) +{ + unsigned char i = ST_MAX_CHANNELS; + unsigned long flags = 0; + struct st_data_s *st_gdata = tty->disc_data; + + pr_info("%s ", __func__); + + /* TODO: + * if a protocol has been registered & line discipline + * un-installed for some reason - what should be done ? + */ + spin_lock_irqsave(&st_gdata->lock, flags); + for (i = ST_BT; i < ST_MAX_CHANNELS; i++) { + if (st_gdata->is_registered[i] == true) + pr_err("%d not un-registered", i); + st_gdata->list[i] = NULL; + st_gdata->is_registered[i] = false; + } + st_gdata->protos_registered = 0; + spin_unlock_irqrestore(&st_gdata->lock, flags); + /* + * signal to UIM via KIM that - + * N_TI_WL ldisc is un-installed + */ + st_kim_complete(st_gdata->kim_data); + st_gdata->tty = NULL; + /* Flush any pending characters in the driver and discipline. */ + tty_ldisc_flush(tty); + tty_driver_flush_buffer(tty); + + spin_lock_irqsave(&st_gdata->lock, flags); + /* empty out txq and tx_waitq */ + skb_queue_purge(&st_gdata->txq); + skb_queue_purge(&st_gdata->tx_waitq); + /* reset the TTY Rx states of ST */ + st_gdata->rx_count = 0; + st_gdata->rx_state = ST_W4_PACKET_TYPE; + kfree_skb(st_gdata->rx_skb); + st_gdata->rx_skb = NULL; + spin_unlock_irqrestore(&st_gdata->lock, flags); + + pr_debug("%s: done ", __func__); +} + +static void st_tty_receive(struct tty_struct *tty, const unsigned char *data, + char *tty_flags, int count) +{ +#ifdef VERBOSE + print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE, + 16, 1, data, count, 0); +#endif + + /* + * if fw download is in progress then route incoming data + * to KIM for validation + */ + st_recv(tty->disc_data, data, count); + pr_debug("done %s", __func__); +} + +/* wake-up function called in from the TTY layer + * inside the internal wakeup function will be called + */ +static void st_tty_wakeup(struct tty_struct *tty) +{ + struct st_data_s *st_gdata = tty->disc_data; + pr_debug("%s ", __func__); + /* don't do an wakeup for now */ + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + + /* call our internal wakeup */ + st_tx_wakeup((void *)st_gdata); +} + +static void st_tty_flush_buffer(struct tty_struct *tty) +{ + struct st_data_s *st_gdata = tty->disc_data; + pr_debug("%s ", __func__); + + kfree_skb(st_gdata->tx_skb); + st_gdata->tx_skb = NULL; + + tty_driver_flush_buffer(tty); + return; +} + +static struct tty_ldisc_ops st_ldisc_ops = { + .magic = TTY_LDISC_MAGIC, + .name = "n_st", + .open = st_tty_open, + .close = st_tty_close, + .receive_buf = st_tty_receive, + .write_wakeup = st_tty_wakeup, + .flush_buffer = st_tty_flush_buffer, + .owner = THIS_MODULE +}; + +/********************************************************************/ +int st_core_init(struct st_data_s **core_data) +{ + struct st_data_s *st_gdata; + long err; + + err = tty_register_ldisc(N_TI_WL, &st_ldisc_ops); + if (err) { + pr_err("error registering %d line discipline %ld", + N_TI_WL, err); + return err; + } + pr_debug("registered n_shared line discipline"); + + st_gdata = kzalloc(sizeof(struct st_data_s), GFP_KERNEL); + if (!st_gdata) { + pr_err("memory allocation failed"); + err = tty_unregister_ldisc(N_TI_WL); + if (err) + pr_err("unable to un-register ldisc %ld", err); + err = -ENOMEM; + return err; + } + + /* Initialize ST TxQ and Tx waitQ queue head. All BT/FM/GPS module skb's + * will be pushed in this queue for actual transmission. + */ + skb_queue_head_init(&st_gdata->txq); + skb_queue_head_init(&st_gdata->tx_waitq); + + /* Locking used in st_int_enqueue() to avoid multiple execution */ + spin_lock_init(&st_gdata->lock); + + err = st_ll_init(st_gdata); + if (err) { + pr_err("error during st_ll initialization(%ld)", err); + kfree(st_gdata); + err = tty_unregister_ldisc(N_TI_WL); + if (err) + pr_err("unable to un-register ldisc"); + return err; + } + *core_data = st_gdata; + return 0; +} + +void st_core_exit(struct st_data_s *st_gdata) +{ + long err; + /* internal module cleanup */ + err = st_ll_deinit(st_gdata); + if (err) + pr_err("error during deinit of ST LL %ld", err); + + if (st_gdata != NULL) { + /* Free ST Tx Qs and skbs */ + skb_queue_purge(&st_gdata->txq); + skb_queue_purge(&st_gdata->tx_waitq); + kfree_skb(st_gdata->rx_skb); + kfree_skb(st_gdata->tx_skb); + /* TTY ldisc cleanup */ + err = tty_unregister_ldisc(N_TI_WL); + if (err) + pr_err("unable to un-register ldisc %ld", err); + /* free the global data pointer */ + kfree(st_gdata); + } +} diff --git a/kernel/drivers/misc/ti-st/st_kim.c b/kernel/drivers/misc/ti-st/st_kim.c new file mode 100644 index 000000000..18e7a0398 --- /dev/null +++ b/kernel/drivers/misc/ti-st/st_kim.c @@ -0,0 +1,952 @@ +/* + * Shared Transport Line discipline driver Core + * Init Manager module responsible for GPIO control + * and firmware download + * Copyright (C) 2009-2010 Texas Instruments + * Author: Pavan Savoy <pavan_savoy@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) "(stk) :" fmt +#include <linux/platform_device.h> +#include <linux/jiffies.h> +#include <linux/firmware.h> +#include <linux/delay.h> +#include <linux/wait.h> +#include <linux/gpio.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/sched.h> +#include <linux/sysfs.h> +#include <linux/tty.h> + +#include <linux/skbuff.h> +#include <linux/ti_wilink_st.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#define MAX_ST_DEVICES 3 /* Imagine 1 on each UART for now */ +static struct platform_device *st_kim_devices[MAX_ST_DEVICES]; + +/**********************************************************************/ +/* internal functions */ + +struct ti_st_plat_data *dt_pdata; +static struct ti_st_plat_data *get_platform_data(struct device *dev); + +/** + * st_get_plat_device - + * function which returns the reference to the platform device + * requested by id. As of now only 1 such device exists (id=0) + * the context requesting for reference can get the id to be + * requested by a. The protocol driver which is registering or + * b. the tty device which is opened. + */ +static struct platform_device *st_get_plat_device(int id) +{ + return st_kim_devices[id]; +} + +/** + * validate_firmware_response - + * function to return whether the firmware response was proper + * in case of error don't complete so that waiting for proper + * response times out + */ +static void validate_firmware_response(struct kim_data_s *kim_gdata) +{ + struct sk_buff *skb = kim_gdata->rx_skb; + if (!skb) + return; + + /* these magic numbers are the position in the response buffer which + * allows us to distinguish whether the response is for the read + * version info. command + */ + if (skb->data[2] == 0x01 && skb->data[3] == 0x01 && + skb->data[4] == 0x10 && skb->data[5] == 0x00) { + /* fw version response */ + memcpy(kim_gdata->resp_buffer, + kim_gdata->rx_skb->data, + kim_gdata->rx_skb->len); + complete_all(&kim_gdata->kim_rcvd); + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_skb = NULL; + kim_gdata->rx_count = 0; + } else if (unlikely(skb->data[5] != 0)) { + pr_err("no proper response during fw download"); + pr_err("data6 %x", skb->data[5]); + kfree_skb(skb); + return; /* keep waiting for the proper response */ + } + /* becos of all the script being downloaded */ + complete_all(&kim_gdata->kim_rcvd); + kfree_skb(skb); +} + +/* check for data len received inside kim_int_recv + * most often hit the last case to update state to waiting for data + */ +static inline int kim_check_data_len(struct kim_data_s *kim_gdata, int len) +{ + register int room = skb_tailroom(kim_gdata->rx_skb); + + pr_debug("len %d room %d", len, room); + + if (!len) { + validate_firmware_response(kim_gdata); + } else if (len > room) { + /* Received packet's payload length is larger. + * We can't accommodate it in created skb. + */ + pr_err("Data length is too large len %d room %d", len, + room); + kfree_skb(kim_gdata->rx_skb); + } else { + /* Packet header has non-zero payload length and + * we have enough space in created skb. Lets read + * payload data */ + kim_gdata->rx_state = ST_W4_DATA; + kim_gdata->rx_count = len; + return len; + } + + /* Change ST LL state to continue to process next + * packet */ + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_skb = NULL; + kim_gdata->rx_count = 0; + + return 0; +} + +/** + * kim_int_recv - receive function called during firmware download + * firmware download responses on different UART drivers + * have been observed to come in bursts of different + * tty_receive and hence the logic + */ +static void kim_int_recv(struct kim_data_s *kim_gdata, + const unsigned char *data, long count) +{ + const unsigned char *ptr; + int len = 0, type = 0; + unsigned char *plen; + + pr_debug("%s", __func__); + /* Decode received bytes here */ + ptr = data; + if (unlikely(ptr == NULL)) { + pr_err(" received null from TTY "); + return; + } + + while (count) { + if (kim_gdata->rx_count) { + len = min_t(unsigned int, kim_gdata->rx_count, count); + memcpy(skb_put(kim_gdata->rx_skb, len), ptr, len); + kim_gdata->rx_count -= len; + count -= len; + ptr += len; + + if (kim_gdata->rx_count) + continue; + + /* Check ST RX state machine , where are we? */ + switch (kim_gdata->rx_state) { + /* Waiting for complete packet ? */ + case ST_W4_DATA: + pr_debug("Complete pkt received"); + validate_firmware_response(kim_gdata); + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_skb = NULL; + continue; + /* Waiting for Bluetooth event header ? */ + case ST_W4_HEADER: + plen = + (unsigned char *)&kim_gdata->rx_skb->data[1]; + pr_debug("event hdr: plen 0x%02x\n", *plen); + kim_check_data_len(kim_gdata, *plen); + continue; + } /* end of switch */ + } /* end of if rx_state */ + switch (*ptr) { + /* Bluetooth event packet? */ + case 0x04: + kim_gdata->rx_state = ST_W4_HEADER; + kim_gdata->rx_count = 2; + type = *ptr; + break; + default: + pr_info("unknown packet"); + ptr++; + count--; + continue; + } + ptr++; + count--; + kim_gdata->rx_skb = + alloc_skb(1024+8, GFP_ATOMIC); + if (!kim_gdata->rx_skb) { + pr_err("can't allocate mem for new packet"); + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_count = 0; + return; + } + skb_reserve(kim_gdata->rx_skb, 8); + kim_gdata->rx_skb->cb[0] = 4; + kim_gdata->rx_skb->cb[1] = 0; + + } + return; +} + +static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) +{ + unsigned short version = 0, chip = 0, min_ver = 0, maj_ver = 0; + const char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 }; + long timeout; + + pr_debug("%s", __func__); + + reinit_completion(&kim_gdata->kim_rcvd); + if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) { + pr_err("kim: couldn't write 4 bytes"); + return -EIO; + } + + timeout = wait_for_completion_interruptible_timeout( + &kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME)); + if (timeout <= 0) { + pr_err(" waiting for ver info- timed out or received signal"); + return timeout ? -ERESTARTSYS : -ETIMEDOUT; + } + reinit_completion(&kim_gdata->kim_rcvd); + /* the positions 12 & 13 in the response buffer provide with the + * chip, major & minor numbers + */ + + version = + MAKEWORD(kim_gdata->resp_buffer[12], + kim_gdata->resp_buffer[13]); + chip = (version & 0x7C00) >> 10; + min_ver = (version & 0x007F); + maj_ver = (version & 0x0380) >> 7; + + if (version & 0x8000) + maj_ver |= 0x0008; + + sprintf(bts_scr_name, "ti-connectivity/TIInit_%d.%d.%d.bts", + chip, maj_ver, min_ver); + + /* to be accessed later via sysfs entry */ + kim_gdata->version.full = version; + kim_gdata->version.chip = chip; + kim_gdata->version.maj_ver = maj_ver; + kim_gdata->version.min_ver = min_ver; + + pr_info("%s", bts_scr_name); + return 0; +} + +static void skip_change_remote_baud(unsigned char **ptr, long *len) +{ + unsigned char *nxt_action, *cur_action; + cur_action = *ptr; + + nxt_action = cur_action + sizeof(struct bts_action) + + ((struct bts_action *) cur_action)->size; + + if (((struct bts_action *) nxt_action)->type != ACTION_WAIT_EVENT) { + pr_err("invalid action after change remote baud command"); + } else { + *ptr = *ptr + sizeof(struct bts_action) + + ((struct bts_action *)cur_action)->size; + *len = *len - (sizeof(struct bts_action) + + ((struct bts_action *)cur_action)->size); + /* warn user on not commenting these in firmware */ + pr_warn("skipping the wait event of change remote baud"); + } +} + +/** + * download_firmware - + * internal function which parses through the .bts firmware + * script file intreprets SEND, DELAY actions only as of now + */ +static long download_firmware(struct kim_data_s *kim_gdata) +{ + long err = 0; + long len = 0; + unsigned char *ptr = NULL; + unsigned char *action_ptr = NULL; + unsigned char bts_scr_name[40] = { 0 }; /* 40 char long bts scr name? */ + int wr_room_space; + int cmd_size; + unsigned long timeout; + + err = read_local_version(kim_gdata, bts_scr_name); + if (err != 0) { + pr_err("kim: failed to read local ver"); + return err; + } + err = + request_firmware(&kim_gdata->fw_entry, bts_scr_name, + &kim_gdata->kim_pdev->dev); + if (unlikely((err != 0) || (kim_gdata->fw_entry->data == NULL) || + (kim_gdata->fw_entry->size == 0))) { + pr_err(" request_firmware failed(errno %ld) for %s", err, + bts_scr_name); + return -EINVAL; + } + ptr = (void *)kim_gdata->fw_entry->data; + len = kim_gdata->fw_entry->size; + /* bts_header to remove out magic number and + * version + */ + ptr += sizeof(struct bts_header); + len -= sizeof(struct bts_header); + + while (len > 0 && ptr) { + pr_debug(" action size %d, type %d ", + ((struct bts_action *)ptr)->size, + ((struct bts_action *)ptr)->type); + + switch (((struct bts_action *)ptr)->type) { + case ACTION_SEND_COMMAND: /* action send */ + pr_debug("S"); + action_ptr = &(((struct bts_action *)ptr)->data[0]); + if (unlikely + (((struct hci_command *)action_ptr)->opcode == + 0xFF36)) { + /* ignore remote change + * baud rate HCI VS command */ + pr_warn("change remote baud" + " rate command in firmware"); + skip_change_remote_baud(&ptr, &len); + break; + } + /* + * Make sure we have enough free space in uart + * tx buffer to write current firmware command + */ + cmd_size = ((struct bts_action *)ptr)->size; + timeout = jiffies + msecs_to_jiffies(CMD_WR_TIME); + do { + wr_room_space = + st_get_uart_wr_room(kim_gdata->core_data); + if (wr_room_space < 0) { + pr_err("Unable to get free " + "space info from uart tx buffer"); + release_firmware(kim_gdata->fw_entry); + return wr_room_space; + } + mdelay(1); /* wait 1ms before checking room */ + } while ((wr_room_space < cmd_size) && + time_before(jiffies, timeout)); + + /* Timeout happened ? */ + if (time_after_eq(jiffies, timeout)) { + pr_err("Timeout while waiting for free " + "free space in uart tx buffer"); + release_firmware(kim_gdata->fw_entry); + return -ETIMEDOUT; + } + /* reinit completion before sending for the + * relevant wait + */ + reinit_completion(&kim_gdata->kim_rcvd); + + /* + * Free space found in uart buffer, call st_int_write + * to send current firmware command to the uart tx + * buffer. + */ + err = st_int_write(kim_gdata->core_data, + ((struct bts_action_send *)action_ptr)->data, + ((struct bts_action *)ptr)->size); + if (unlikely(err < 0)) { + release_firmware(kim_gdata->fw_entry); + return err; + } + /* + * Check number of bytes written to the uart tx buffer + * and requested command write size + */ + if (err != cmd_size) { + pr_err("Number of bytes written to uart " + "tx buffer are not matching with " + "requested cmd write size"); + release_firmware(kim_gdata->fw_entry); + return -EIO; + } + break; + case ACTION_WAIT_EVENT: /* wait */ + pr_debug("W"); + err = wait_for_completion_interruptible_timeout( + &kim_gdata->kim_rcvd, + msecs_to_jiffies(CMD_RESP_TIME)); + if (err <= 0) { + pr_err("response timeout/signaled during fw download "); + /* timed out */ + release_firmware(kim_gdata->fw_entry); + return err ? -ERESTARTSYS : -ETIMEDOUT; + } + reinit_completion(&kim_gdata->kim_rcvd); + break; + case ACTION_DELAY: /* sleep */ + pr_info("sleep command in scr"); + action_ptr = &(((struct bts_action *)ptr)->data[0]); + mdelay(((struct bts_action_delay *)action_ptr)->msec); + break; + } + len = + len - (sizeof(struct bts_action) + + ((struct bts_action *)ptr)->size); + ptr = + ptr + sizeof(struct bts_action) + + ((struct bts_action *)ptr)->size; + } + /* fw download complete */ + release_firmware(kim_gdata->fw_entry); + return 0; +} + +/**********************************************************************/ +/* functions called from ST core */ +/* called from ST Core, when REG_IN_PROGRESS (registration in progress) + * can be because of + * 1. response to read local version + * 2. during send/recv's of firmware download + */ +void st_kim_recv(void *disc_data, const unsigned char *data, long count) +{ + struct st_data_s *st_gdata = (struct st_data_s *)disc_data; + struct kim_data_s *kim_gdata = st_gdata->kim_data; + + /* proceed to gather all data and distinguish read fw version response + * from other fw responses when data gathering is complete + */ + kim_int_recv(kim_gdata, data, count); + return; +} + +/* to signal completion of line discipline installation + * called from ST Core, upon tty_open + */ +void st_kim_complete(void *kim_data) +{ + struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + complete(&kim_gdata->ldisc_installed); +} + +/** + * st_kim_start - called from ST Core upon 1st registration + * This involves toggling the chip enable gpio, reading + * the firmware version from chip, forming the fw file name + * based on the chip version, requesting the fw, parsing it + * and perform download(send/recv). + */ +long st_kim_start(void *kim_data) +{ + long err = 0; + long retry = POR_RETRY_COUNT; + struct ti_st_plat_data *pdata; + struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + + pr_info(" %s", __func__); + if (kim_gdata->kim_pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else { + pdata = kim_gdata->kim_pdev->dev.platform_data; + } + + do { + /* platform specific enabling code here */ + if (pdata->chip_enable) + pdata->chip_enable(kim_gdata); + + /* Configure BT nShutdown to HIGH state */ + gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + mdelay(5); /* FIXME: a proper toggle */ + gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); + mdelay(100); + /* re-initialize the completion */ + reinit_completion(&kim_gdata->ldisc_installed); + /* send notification to UIM */ + kim_gdata->ldisc_install = 1; + pr_info("ldisc_install = 1"); + sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, + NULL, "install"); + /* wait for ldisc to be installed */ + err = wait_for_completion_interruptible_timeout( + &kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME)); + if (!err) { + /* ldisc installation timeout, + * flush uart, power cycle BT_EN */ + pr_err("ldisc installation timeout"); + err = st_kim_stop(kim_gdata); + continue; + } else { + /* ldisc installed now */ + pr_info("line discipline installed"); + err = download_firmware(kim_gdata); + if (err != 0) { + /* ldisc installed but fw download failed, + * flush uart & power cycle BT_EN */ + pr_err("download firmware failed"); + err = st_kim_stop(kim_gdata); + continue; + } else { /* on success don't retry */ + break; + } + } + } while (retry--); + return err; +} + +/** + * st_kim_stop - stop communication with chip. + * This can be called from ST Core/KIM, on the- + * (a) last un-register when chip need not be powered there-after, + * (b) upon failure to either install ldisc or download firmware. + * The function is responsible to (a) notify UIM about un-installation, + * (b) flush UART if the ldisc was installed. + * (c) reset BT_EN - pull down nshutdown at the end. + * (d) invoke platform's chip disabling routine. + */ +long st_kim_stop(void *kim_data) +{ + long err = 0; + struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + struct ti_st_plat_data *pdata; + struct tty_struct *tty = kim_gdata->core_data->tty; + + reinit_completion(&kim_gdata->ldisc_installed); + + if (kim_gdata->kim_pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else + pdata = kim_gdata->kim_pdev->dev.platform_data; + + + if (tty) { /* can be called before ldisc is installed */ + /* Flush any pending characters in the driver and discipline. */ + tty_ldisc_flush(tty); + tty_driver_flush_buffer(tty); + } + + /* send uninstall notification to UIM */ + pr_info("ldisc_install = 0"); + kim_gdata->ldisc_install = 0; + sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install"); + + /* wait for ldisc to be un-installed */ + err = wait_for_completion_interruptible_timeout( + &kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME)); + if (!err) { /* timeout */ + pr_err(" timed out waiting for ldisc to be un-installed"); + err = -ETIMEDOUT; + } + + /* By default configure BT nShutdown to LOW state */ + gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + mdelay(1); + gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); + mdelay(1); + gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + + /* platform specific disable */ + if (pdata->chip_disable) + pdata->chip_disable(kim_gdata); + return err; +} + +/**********************************************************************/ +/* functions called from subsystems */ +/* called when debugfs entry is read from */ + +static int show_version(struct seq_file *s, void *unused) +{ + struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private; + seq_printf(s, "%04X %d.%d.%d\n", kim_gdata->version.full, + kim_gdata->version.chip, kim_gdata->version.maj_ver, + kim_gdata->version.min_ver); + return 0; +} + +static int show_list(struct seq_file *s, void *unused) +{ + struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private; + kim_st_list_protocols(kim_gdata->core_data, s); + return 0; +} + +static ssize_t show_install(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", kim_data->ldisc_install); +} + +#ifdef DEBUG +static ssize_t store_dev_name(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + pr_debug("storing dev name >%s<", buf); + strncpy(kim_data->dev_name, buf, count); + pr_debug("stored dev name >%s<", kim_data->dev_name); + return count; +} + +static ssize_t store_baud_rate(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + pr_debug("storing baud rate >%s<", buf); + sscanf(buf, "%ld", &kim_data->baud_rate); + pr_debug("stored baud rate >%ld<", kim_data->baud_rate); + return count; +} +#endif /* if DEBUG */ + +static ssize_t show_dev_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", kim_data->dev_name); +} + +static ssize_t show_baud_rate(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", kim_data->baud_rate); +} + +static ssize_t show_flow_cntrl(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", kim_data->flow_cntrl); +} + +/* structures specific for sysfs entries */ +static struct kobj_attribute ldisc_install = +__ATTR(install, 0444, (void *)show_install, NULL); + +static struct kobj_attribute uart_dev_name = +#ifdef DEBUG /* TODO: move this to debug-fs if possible */ +__ATTR(dev_name, 0644, (void *)show_dev_name, (void *)store_dev_name); +#else +__ATTR(dev_name, 0444, (void *)show_dev_name, NULL); +#endif + +static struct kobj_attribute uart_baud_rate = +#ifdef DEBUG /* TODO: move to debugfs */ +__ATTR(baud_rate, 0644, (void *)show_baud_rate, (void *)store_baud_rate); +#else +__ATTR(baud_rate, 0444, (void *)show_baud_rate, NULL); +#endif + +static struct kobj_attribute uart_flow_cntrl = +__ATTR(flow_cntrl, 0444, (void *)show_flow_cntrl, NULL); + +static struct attribute *uim_attrs[] = { + &ldisc_install.attr, + &uart_dev_name.attr, + &uart_baud_rate.attr, + &uart_flow_cntrl.attr, + NULL, +}; + +static struct attribute_group uim_attr_grp = { + .attrs = uim_attrs, +}; + +/** + * st_kim_ref - reference the core's data + * This references the per-ST platform device in the arch/xx/ + * board-xx.c file. + * This would enable multiple such platform devices to exist + * on a given platform + */ +void st_kim_ref(struct st_data_s **core_data, int id) +{ + struct platform_device *pdev; + struct kim_data_s *kim_gdata; + /* get kim_gdata reference from platform device */ + pdev = st_get_plat_device(id); + if (!pdev) + goto err; + kim_gdata = platform_get_drvdata(pdev); + if (!kim_gdata) + goto err; + + *core_data = kim_gdata->core_data; + return; +err: + *core_data = NULL; +} + +static int kim_version_open(struct inode *i, struct file *f) +{ + return single_open(f, show_version, i->i_private); +} + +static int kim_list_open(struct inode *i, struct file *f) +{ + return single_open(f, show_list, i->i_private); +} + +static const struct file_operations version_debugfs_fops = { + /* version info */ + .open = kim_version_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +static const struct file_operations list_debugfs_fops = { + /* protocols info */ + .open = kim_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/**********************************************************************/ +/* functions called from platform device driver subsystem + * need to have a relevant platform device entry in the platform's + * board-*.c file + */ + +static const struct of_device_id kim_of_match[] = { +{ + .compatible = "kim", + }, + {} +}; +MODULE_DEVICE_TABLE(of, kim_of_match); + +static struct ti_st_plat_data *get_platform_data(struct device *dev) +{ + struct device_node *np = dev->of_node; + const u32 *dt_property; + int len; + + dt_pdata = kzalloc(sizeof(*dt_pdata), GFP_KERNEL); + + if (!dt_pdata) + pr_err("Can't allocate device_tree platform data\n"); + + dt_property = of_get_property(np, "dev_name", &len); + if (dt_property) + memcpy(&dt_pdata->dev_name, dt_property, len); + of_property_read_u32(np, "nshutdown_gpio", + &dt_pdata->nshutdown_gpio); + of_property_read_u32(np, "flow_cntrl", &dt_pdata->flow_cntrl); + of_property_read_u32(np, "baud_rate", &dt_pdata->baud_rate); + + return dt_pdata; +} + +static struct dentry *kim_debugfs_dir; +static int kim_probe(struct platform_device *pdev) +{ + struct kim_data_s *kim_gdata; + struct ti_st_plat_data *pdata; + int err; + + if (pdev->dev.of_node) + pdata = get_platform_data(&pdev->dev); + else + pdata = pdev->dev.platform_data; + + if (pdata == NULL) { + dev_err(&pdev->dev, "Platform Data is missing\n"); + return -ENXIO; + } + + if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) { + /* multiple devices could exist */ + st_kim_devices[pdev->id] = pdev; + } else { + /* platform's sure about existence of 1 device */ + st_kim_devices[0] = pdev; + } + + kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_ATOMIC); + if (!kim_gdata) { + pr_err("no mem to allocate"); + return -ENOMEM; + } + platform_set_drvdata(pdev, kim_gdata); + + err = st_core_init(&kim_gdata->core_data); + if (err != 0) { + pr_err(" ST core init failed"); + err = -EIO; + goto err_core_init; + } + /* refer to itself */ + kim_gdata->core_data->kim_data = kim_gdata; + + /* Claim the chip enable nShutdown gpio from the system */ + kim_gdata->nshutdown = pdata->nshutdown_gpio; + err = gpio_request(kim_gdata->nshutdown, "kim"); + if (unlikely(err)) { + pr_err(" gpio %d request failed ", kim_gdata->nshutdown); + return err; + } + + /* Configure nShutdown GPIO as output=0 */ + err = gpio_direction_output(kim_gdata->nshutdown, 0); + if (unlikely(err)) { + pr_err(" unable to configure gpio %d", kim_gdata->nshutdown); + return err; + } + /* get reference of pdev for request_firmware + */ + kim_gdata->kim_pdev = pdev; + init_completion(&kim_gdata->kim_rcvd); + init_completion(&kim_gdata->ldisc_installed); + + err = sysfs_create_group(&pdev->dev.kobj, &uim_attr_grp); + if (err) { + pr_err("failed to create sysfs entries"); + goto err_sysfs_group; + } + + /* copying platform data */ + strncpy(kim_gdata->dev_name, pdata->dev_name, UART_DEV_NAME_LEN); + kim_gdata->flow_cntrl = pdata->flow_cntrl; + kim_gdata->baud_rate = pdata->baud_rate; + pr_info("sysfs entries created\n"); + + kim_debugfs_dir = debugfs_create_dir("ti-st", NULL); + if (!kim_debugfs_dir) { + pr_err(" debugfs entries creation failed "); + return 0; + } + + debugfs_create_file("version", S_IRUGO, kim_debugfs_dir, + kim_gdata, &version_debugfs_fops); + debugfs_create_file("protocols", S_IRUGO, kim_debugfs_dir, + kim_gdata, &list_debugfs_fops); + return 0; + +err_sysfs_group: + st_core_exit(kim_gdata->core_data); + +err_core_init: + kfree(kim_gdata); + + return err; +} + +static int kim_remove(struct platform_device *pdev) +{ + /* free the GPIOs requested */ + struct ti_st_plat_data *pdata; + struct kim_data_s *kim_gdata; + + if (pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else { + pdata = pdev->dev.platform_data; + } + + kim_gdata = platform_get_drvdata(pdev); + + /* Free the Bluetooth/FM/GPIO + * nShutdown gpio from the system + */ + gpio_free(pdata->nshutdown_gpio); + pr_info("nshutdown GPIO Freed"); + + debugfs_remove_recursive(kim_debugfs_dir); + sysfs_remove_group(&pdev->dev.kobj, &uim_attr_grp); + pr_info("sysfs entries removed"); + + kim_gdata->kim_pdev = NULL; + st_core_exit(kim_gdata->core_data); + + kfree(kim_gdata); + kim_gdata = NULL; + kfree(dt_pdata); + dt_pdata = NULL; + + return 0; +} + +static int kim_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct ti_st_plat_data *pdata; + + if (pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else { + pdata = pdev->dev.platform_data; + } + + if (pdata->suspend) + return pdata->suspend(pdev, state); + + return 0; +} + +static int kim_resume(struct platform_device *pdev) +{ + struct ti_st_plat_data *pdata; + + if (pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else { + pdata = pdev->dev.platform_data; + } + + if (pdata->resume) + return pdata->resume(pdev); + + return 0; +} + +/**********************************************************************/ +/* entry point for ST KIM module, called in from ST Core */ +static struct platform_driver kim_platform_driver = { + .probe = kim_probe, + .remove = kim_remove, + .suspend = kim_suspend, + .resume = kim_resume, + .driver = { + .name = "kim", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(kim_of_match), + }, +}; + +module_platform_driver(kim_platform_driver); + +MODULE_AUTHOR("Pavan Savoy <pavan_savoy@ti.com>"); +MODULE_DESCRIPTION("Shared Transport Driver for TI BT/FM/GPS combo chips "); +MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/ti-st/st_ll.c b/kernel/drivers/misc/ti-st/st_ll.c new file mode 100644 index 000000000..518e1b7f2 --- /dev/null +++ b/kernel/drivers/misc/ti-st/st_ll.c @@ -0,0 +1,182 @@ +/* + * Shared Transport driver + * HCI-LL module responsible for TI proprietary HCI_LL protocol + * Copyright (C) 2009-2010 Texas Instruments + * Author: Pavan Savoy <pavan_savoy@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) "(stll) :" fmt +#include <linux/skbuff.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/ti_wilink_st.h> + +/**********************************************************************/ + +/* internal functions */ +static void send_ll_cmd(struct st_data_s *st_data, + unsigned char cmd) +{ + + pr_debug("%s: writing %x", __func__, cmd); + st_int_write(st_data, &cmd, 1); + return; +} + +static void ll_device_want_to_sleep(struct st_data_s *st_data) +{ + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + + pr_debug("%s", __func__); + /* sanity check */ + if (st_data->ll_state != ST_LL_AWAKE) + pr_err("ERR hcill: ST_LL_GO_TO_SLEEP_IND" + "in state %ld", st_data->ll_state); + + send_ll_cmd(st_data, LL_SLEEP_ACK); + /* update state */ + st_data->ll_state = ST_LL_ASLEEP; + + /* communicate to platform about chip asleep */ + kim_data = st_data->kim_data; + if (kim_data->kim_pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else { + pdata = kim_data->kim_pdev->dev.platform_data; + } + + if (pdata->chip_asleep) + pdata->chip_asleep(NULL); +} + +static void ll_device_want_to_wakeup(struct st_data_s *st_data) +{ + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + + /* diff actions in diff states */ + switch (st_data->ll_state) { + case ST_LL_ASLEEP: + send_ll_cmd(st_data, LL_WAKE_UP_ACK); /* send wake_ack */ + break; + case ST_LL_ASLEEP_TO_AWAKE: + /* duplicate wake_ind */ + pr_err("duplicate wake_ind while waiting for Wake ack"); + break; + case ST_LL_AWAKE: + /* duplicate wake_ind */ + pr_err("duplicate wake_ind already AWAKE"); + break; + case ST_LL_AWAKE_TO_ASLEEP: + /* duplicate wake_ind */ + pr_err("duplicate wake_ind"); + break; + } + /* update state */ + st_data->ll_state = ST_LL_AWAKE; + + /* communicate to platform about chip wakeup */ + kim_data = st_data->kim_data; + if (kim_data->kim_pdev->dev.of_node) { + pr_debug("use device tree data"); + pdata = dt_pdata; + } else { + pdata = kim_data->kim_pdev->dev.platform_data; + } + + if (pdata->chip_awake) + pdata->chip_awake(NULL); +} + +/**********************************************************************/ +/* functions invoked by ST Core */ + +/* called when ST Core wants to + * enable ST LL */ +void st_ll_enable(struct st_data_s *ll) +{ + ll->ll_state = ST_LL_AWAKE; +} + +/* called when ST Core /local module wants to + * disable ST LL */ +void st_ll_disable(struct st_data_s *ll) +{ + ll->ll_state = ST_LL_INVALID; +} + +/* called when ST Core wants to update the state */ +void st_ll_wakeup(struct st_data_s *ll) +{ + if (likely(ll->ll_state != ST_LL_AWAKE)) { + send_ll_cmd(ll, LL_WAKE_UP_IND); /* WAKE_IND */ + ll->ll_state = ST_LL_ASLEEP_TO_AWAKE; + } else { + /* don't send the duplicate wake_indication */ + pr_err(" Chip already AWAKE "); + } +} + +/* called when ST Core wants the state */ +unsigned long st_ll_getstate(struct st_data_s *ll) +{ + pr_debug(" returning state %ld", ll->ll_state); + return ll->ll_state; +} + +/* called from ST Core, when a PM related packet arrives */ +unsigned long st_ll_sleep_state(struct st_data_s *st_data, + unsigned char cmd) +{ + switch (cmd) { + case LL_SLEEP_IND: /* sleep ind */ + pr_debug("sleep indication recvd"); + ll_device_want_to_sleep(st_data); + break; + case LL_SLEEP_ACK: /* sleep ack */ + pr_err("sleep ack rcvd: host shouldn't"); + break; + case LL_WAKE_UP_IND: /* wake ind */ + pr_debug("wake indication recvd"); + ll_device_want_to_wakeup(st_data); + break; + case LL_WAKE_UP_ACK: /* wake ack */ + pr_debug("wake ack rcvd"); + st_data->ll_state = ST_LL_AWAKE; + break; + default: + pr_err(" unknown input/state "); + return -EINVAL; + } + return 0; +} + +/* Called from ST CORE to initialize ST LL */ +long st_ll_init(struct st_data_s *ll) +{ + /* set state to invalid */ + ll->ll_state = ST_LL_INVALID; + return 0; +} + +/* Called from ST CORE to de-initialize ST LL */ +long st_ll_deinit(struct st_data_s *ll) +{ + return 0; +} diff --git a/kernel/drivers/misc/ti_dac7512.c b/kernel/drivers/misc/ti_dac7512.c new file mode 100644 index 000000000..cb0289b44 --- /dev/null +++ b/kernel/drivers/misc/ti_dac7512.c @@ -0,0 +1,104 @@ +/* + * dac7512.c - Linux kernel module for + * Texas Instruments DAC7512 + * + * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/spi/spi.h> +#include <linux/of.h> + +static ssize_t dac7512_store_val(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct spi_device *spi = to_spi_device(dev); + unsigned char tmp[2]; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + tmp[0] = val >> 8; + tmp[1] = val & 0xff; + spi_write(spi, tmp, sizeof(tmp)); + return count; +} + +static DEVICE_ATTR(value, S_IWUSR, NULL, dac7512_store_val); + +static struct attribute *dac7512_attributes[] = { + &dev_attr_value.attr, + NULL +}; + +static const struct attribute_group dac7512_attr_group = { + .attrs = dac7512_attributes, +}; + +static int dac7512_probe(struct spi_device *spi) +{ + int ret; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + ret = spi_setup(spi); + if (ret < 0) + return ret; + + return sysfs_create_group(&spi->dev.kobj, &dac7512_attr_group); +} + +static int dac7512_remove(struct spi_device *spi) +{ + sysfs_remove_group(&spi->dev.kobj, &dac7512_attr_group); + return 0; +} + +static const struct spi_device_id dac7512_id_table[] = { + { "dac7512", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, dac7512_id_table); + +#ifdef CONFIG_OF +static const struct of_device_id dac7512_of_match[] = { + { .compatible = "ti,dac7512", }, + { } +}; +MODULE_DEVICE_TABLE(of, dac7512_of_match); +#endif + +static struct spi_driver dac7512_driver = { + .driver = { + .name = "dac7512", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(dac7512_of_match), + }, + .probe = dac7512_probe, + .remove = dac7512_remove, + .id_table = dac7512_id_table, +}; + +module_spi_driver(dac7512_driver); + +MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); +MODULE_DESCRIPTION("DAC7512 16-bit DAC"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/tifm_7xx1.c b/kernel/drivers/misc/tifm_7xx1.c new file mode 100644 index 000000000..a37a42f67 --- /dev/null +++ b/kernel/drivers/misc/tifm_7xx1.c @@ -0,0 +1,442 @@ +/* + * tifm_7xx1.c - TI FlashMedia driver + * + * Copyright (C) 2006 Alex Dubov <oakad@yahoo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/tifm.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> + +#define DRIVER_NAME "tifm_7xx1" +#define DRIVER_VERSION "0.8" + +#define TIFM_IRQ_ENABLE 0x80000000 +#define TIFM_IRQ_SOCKMASK(x) (x) +#define TIFM_IRQ_CARDMASK(x) ((x) << 8) +#define TIFM_IRQ_FIFOMASK(x) ((x) << 16) +#define TIFM_IRQ_SETALL 0xffffffff + +static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm, + struct tifm_dev *sock) +{ +} + +static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock) +{ + unsigned long flags; + + spin_lock_irqsave(&fm->lock, flags); + fm->socket_change_set |= 1 << sock->socket_id; + tifm_queue_work(&fm->media_switcher); + spin_unlock_irqrestore(&fm->lock, flags); +} + +static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id) +{ + struct tifm_adapter *fm = dev_id; + struct tifm_dev *sock; + unsigned int irq_status, cnt; + + spin_lock(&fm->lock); + irq_status = readl(fm->addr + FM_INTERRUPT_STATUS); + if (irq_status == 0 || irq_status == (~0)) { + spin_unlock(&fm->lock); + return IRQ_NONE; + } + + if (irq_status & TIFM_IRQ_ENABLE) { + writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + sock = fm->sockets[cnt]; + if (sock) { + if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1)) + sock->data_event(sock); + if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1)) + sock->card_event(sock); + } + } + + fm->socket_change_set |= irq_status + & ((1 << fm->num_sockets) - 1); + } + writel(irq_status, fm->addr + FM_INTERRUPT_STATUS); + + if (fm->finish_me) + complete_all(fm->finish_me); + else if (!fm->socket_change_set) + writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE); + else + tifm_queue_work(&fm->media_switcher); + + spin_unlock(&fm->lock); + return IRQ_HANDLED; +} + +static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr) +{ + unsigned int s_state; + int cnt; + + writel(0x0e00, sock_addr + SOCK_CONTROL); + + for (cnt = 16; cnt <= 256; cnt <<= 1) { + if (!(TIFM_SOCK_STATE_POWERED + & readl(sock_addr + SOCK_PRESENT_STATE))) + break; + + msleep(cnt); + } + + s_state = readl(sock_addr + SOCK_PRESENT_STATE); + if (!(TIFM_SOCK_STATE_OCCUPIED & s_state)) + return 0; + + writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED, + sock_addr + SOCK_CONTROL); + + /* xd needs some extra time before power on */ + if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7) + == TIFM_TYPE_XD) + msleep(40); + + writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00, + sock_addr + SOCK_CONTROL); + /* wait for power to stabilize */ + msleep(20); + for (cnt = 16; cnt <= 256; cnt <<= 1) { + if ((TIFM_SOCK_STATE_POWERED + & readl(sock_addr + SOCK_PRESENT_STATE))) + break; + + msleep(cnt); + } + + writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED), + sock_addr + SOCK_CONTROL); + + return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7; +} + +inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr) +{ + writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL), + sock_addr + SOCK_CONTROL); +} + +inline static char __iomem * +tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num) +{ + return base_addr + ((sock_num + 1) << 10); +} + +static void tifm_7xx1_switch_media(struct work_struct *work) +{ + struct tifm_adapter *fm = container_of(work, struct tifm_adapter, + media_switcher); + struct tifm_dev *sock; + char __iomem *sock_addr; + unsigned long flags; + unsigned char media_id; + unsigned int socket_change_set, cnt; + + spin_lock_irqsave(&fm->lock, flags); + socket_change_set = fm->socket_change_set; + fm->socket_change_set = 0; + + dev_dbg(fm->dev.parent, "checking media set %x\n", + socket_change_set); + + if (!socket_change_set) { + spin_unlock_irqrestore(&fm->lock, flags); + return; + } + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + if (!(socket_change_set & (1 << cnt))) + continue; + sock = fm->sockets[cnt]; + if (sock) { + printk(KERN_INFO + "%s : demand removing card from socket %u:%u\n", + dev_name(&fm->dev), fm->id, cnt); + fm->sockets[cnt] = NULL; + sock_addr = sock->addr; + spin_unlock_irqrestore(&fm->lock, flags); + device_unregister(&sock->dev); + spin_lock_irqsave(&fm->lock, flags); + tifm_7xx1_sock_power_off(sock_addr); + writel(0x0e00, sock_addr + SOCK_CONTROL); + } + + spin_unlock_irqrestore(&fm->lock, flags); + + media_id = tifm_7xx1_toggle_sock_power( + tifm_7xx1_sock_addr(fm->addr, cnt)); + + // tifm_alloc_device will check if media_id is valid + sock = tifm_alloc_device(fm, cnt, media_id); + if (sock) { + sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt); + + if (!device_register(&sock->dev)) { + spin_lock_irqsave(&fm->lock, flags); + if (!fm->sockets[cnt]) { + fm->sockets[cnt] = sock; + sock = NULL; + } + spin_unlock_irqrestore(&fm->lock, flags); + } + if (sock) + tifm_free_device(&sock->dev); + } + spin_lock_irqsave(&fm->lock, flags); + } + + writel(TIFM_IRQ_FIFOMASK(socket_change_set) + | TIFM_IRQ_CARDMASK(socket_change_set), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + + writel(TIFM_IRQ_FIFOMASK(socket_change_set) + | TIFM_IRQ_CARDMASK(socket_change_set), + fm->addr + FM_SET_INTERRUPT_ENABLE); + + writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE); + spin_unlock_irqrestore(&fm->lock, flags); +} + +#ifdef CONFIG_PM + +static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int cnt; + + dev_dbg(&dev->dev, "suspending host\n"); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + if (fm->sockets[cnt]) + tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr); + } + + pci_save_state(dev); + pci_enable_wake(dev, pci_choose_state(dev, state), 0); + pci_disable_device(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + return 0; +} + +static int tifm_7xx1_resume(struct pci_dev *dev) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int rc; + unsigned long timeout; + unsigned int good_sockets = 0, bad_sockets = 0; + unsigned long flags; + unsigned char new_ids[fm->num_sockets]; + DECLARE_COMPLETION_ONSTACK(finish_resume); + + pci_set_power_state(dev, PCI_D0); + pci_restore_state(dev); + rc = pci_enable_device(dev); + if (rc) + return rc; + pci_set_master(dev); + + dev_dbg(&dev->dev, "resuming host\n"); + + for (rc = 0; rc < fm->num_sockets; rc++) + new_ids[rc] = tifm_7xx1_toggle_sock_power( + tifm_7xx1_sock_addr(fm->addr, rc)); + spin_lock_irqsave(&fm->lock, flags); + for (rc = 0; rc < fm->num_sockets; rc++) { + if (fm->sockets[rc]) { + if (fm->sockets[rc]->type == new_ids[rc]) + good_sockets |= 1 << rc; + else + bad_sockets |= 1 << rc; + } + } + + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_SET_INTERRUPT_ENABLE); + dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n", + good_sockets, bad_sockets); + + fm->socket_change_set = 0; + if (good_sockets) { + fm->finish_me = &finish_resume; + spin_unlock_irqrestore(&fm->lock, flags); + timeout = wait_for_completion_timeout(&finish_resume, HZ); + dev_dbg(&dev->dev, "wait returned %lu\n", timeout); + writel(TIFM_IRQ_FIFOMASK(good_sockets) + | TIFM_IRQ_CARDMASK(good_sockets), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + writel(TIFM_IRQ_FIFOMASK(good_sockets) + | TIFM_IRQ_CARDMASK(good_sockets), + fm->addr + FM_SET_INTERRUPT_ENABLE); + spin_lock_irqsave(&fm->lock, flags); + fm->finish_me = NULL; + fm->socket_change_set ^= good_sockets & fm->socket_change_set; + } + + fm->socket_change_set |= bad_sockets; + if (fm->socket_change_set) + tifm_queue_work(&fm->media_switcher); + + spin_unlock_irqrestore(&fm->lock, flags); + writel(TIFM_IRQ_ENABLE, + fm->addr + FM_SET_INTERRUPT_ENABLE); + + return 0; +} + +#else + +#define tifm_7xx1_suspend NULL +#define tifm_7xx1_resume NULL + +#endif /* CONFIG_PM */ + +static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm, + struct tifm_dev *sock) +{ + return 0; +} + +static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock) +{ + if (((fm->num_sockets == 4) && (sock->socket_id == 2)) + || ((fm->num_sockets == 2) && (sock->socket_id == 0))) + return 1; + + return 0; +} + +static int tifm_7xx1_probe(struct pci_dev *dev, + const struct pci_device_id *dev_id) +{ + struct tifm_adapter *fm; + int pci_dev_busy = 0; + int rc; + + rc = pci_set_dma_mask(dev, DMA_BIT_MASK(32)); + if (rc) + return rc; + + rc = pci_enable_device(dev); + if (rc) + return rc; + + pci_set_master(dev); + + rc = pci_request_regions(dev, DRIVER_NAME); + if (rc) { + pci_dev_busy = 1; + goto err_out; + } + + pci_intx(dev, 1); + + fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM + ? 4 : 2, &dev->dev); + if (!fm) { + rc = -ENOMEM; + goto err_out_int; + } + + INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media); + fm->eject = tifm_7xx1_eject; + fm->has_ms_pif = tifm_7xx1_has_ms_pif; + pci_set_drvdata(dev, fm); + + fm->addr = pci_ioremap_bar(dev, 0); + if (!fm->addr) { + rc = -ENODEV; + goto err_out_free; + } + + rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm); + if (rc) + goto err_out_unmap; + + rc = tifm_add_adapter(fm); + if (rc) + goto err_out_irq; + + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_SET_INTERRUPT_ENABLE); + return 0; + +err_out_irq: + free_irq(dev->irq, fm); +err_out_unmap: + iounmap(fm->addr); +err_out_free: + tifm_free_adapter(fm); +err_out_int: + pci_intx(dev, 0); + pci_release_regions(dev); +err_out: + if (!pci_dev_busy) + pci_disable_device(dev); + return rc; +} + +static void tifm_7xx1_remove(struct pci_dev *dev) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int cnt; + + fm->eject = tifm_7xx1_dummy_eject; + fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif; + writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + mmiowb(); + free_irq(dev->irq, fm); + + tifm_remove_adapter(fm); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) + tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt)); + + iounmap(fm->addr); + pci_intx(dev, 0); + pci_release_regions(dev); + + pci_disable_device(dev); + tifm_free_adapter(fm); +} + +static struct pci_device_id tifm_7xx1_pci_tbl [] = { + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */ + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { } +}; + +static struct pci_driver tifm_7xx1_driver = { + .name = DRIVER_NAME, + .id_table = tifm_7xx1_pci_tbl, + .probe = tifm_7xx1_probe, + .remove = tifm_7xx1_remove, + .suspend = tifm_7xx1_suspend, + .resume = tifm_7xx1_resume, +}; + +module_pci_driver(tifm_7xx1_driver); +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("TI FlashMedia host driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl); +MODULE_VERSION(DRIVER_VERSION); diff --git a/kernel/drivers/misc/tifm_core.c b/kernel/drivers/misc/tifm_core.c new file mode 100644 index 000000000..a511b2a71 --- /dev/null +++ b/kernel/drivers/misc/tifm_core.c @@ -0,0 +1,371 @@ +/* + * tifm_core.c - TI FlashMedia driver + * + * Copyright (C) 2006 Alex Dubov <oakad@yahoo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/tifm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/idr.h> +#include <linux/module.h> + +#define DRIVER_NAME "tifm_core" +#define DRIVER_VERSION "0.8" + +static struct workqueue_struct *workqueue; +static DEFINE_IDR(tifm_adapter_idr); +static DEFINE_SPINLOCK(tifm_adapter_lock); + +static const char *tifm_media_type_name(unsigned char type, unsigned char nt) +{ + const char *card_type_name[3][3] = { + { "SmartMedia/xD", "MemoryStick", "MMC/SD" }, + { "XD", "MS", "SD"}, + { "xd", "ms", "sd"} + }; + + if (nt > 2 || type < 1 || type > 3) + return NULL; + return card_type_name[nt][type - 1]; +} + +static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id) +{ + if (sock->type == id->type) + return 1; + return 0; +} + +static int tifm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver, + driver); + struct tifm_device_id *ids = fm_drv->id_table; + + if (ids) { + while (ids->type) { + if (tifm_dev_match(sock, ids)) + return 1; + ++ids; + } + } + return 0; +} + +static int tifm_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + + if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1))) + return -ENOMEM; + + return 0; +} + +static int tifm_device_probe(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + int rc = -ENODEV; + + get_device(dev); + if (dev->driver && drv->probe) { + rc = drv->probe(sock); + if (!rc) + return 0; + } + put_device(dev); + return rc; +} + +static void tifm_dummy_event(struct tifm_dev *sock) +{ + return; +} + +static int tifm_device_remove(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->remove) { + sock->card_event = tifm_dummy_event; + sock->data_event = tifm_dummy_event; + drv->remove(sock); + sock->dev.driver = NULL; + } + + put_device(dev); + return 0; +} + +#ifdef CONFIG_PM + +static int tifm_device_suspend(struct device *dev, pm_message_t state) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->suspend) + return drv->suspend(sock, state); + return 0; +} + +static int tifm_device_resume(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->resume) + return drv->resume(sock); + return 0; +} + +#else + +#define tifm_device_suspend NULL +#define tifm_device_resume NULL + +#endif /* CONFIG_PM */ + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + return sprintf(buf, "%x", sock->type); +} +static DEVICE_ATTR_RO(type); + +static struct attribute *tifm_dev_attrs[] = { + &dev_attr_type.attr, + NULL, +}; +ATTRIBUTE_GROUPS(tifm_dev); + +static struct bus_type tifm_bus_type = { + .name = "tifm", + .dev_groups = tifm_dev_groups, + .match = tifm_bus_match, + .uevent = tifm_uevent, + .probe = tifm_device_probe, + .remove = tifm_device_remove, + .suspend = tifm_device_suspend, + .resume = tifm_device_resume +}; + +static void tifm_free(struct device *dev) +{ + struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev); + + kfree(fm); +} + +static struct class tifm_adapter_class = { + .name = "tifm_adapter", + .dev_release = tifm_free +}; + +struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets, + struct device *dev) +{ + struct tifm_adapter *fm; + + fm = kzalloc(sizeof(struct tifm_adapter) + + sizeof(struct tifm_dev*) * num_sockets, GFP_KERNEL); + if (fm) { + fm->dev.class = &tifm_adapter_class; + fm->dev.parent = dev; + device_initialize(&fm->dev); + spin_lock_init(&fm->lock); + fm->num_sockets = num_sockets; + } + return fm; +} +EXPORT_SYMBOL(tifm_alloc_adapter); + +int tifm_add_adapter(struct tifm_adapter *fm) +{ + int rc; + + idr_preload(GFP_KERNEL); + spin_lock(&tifm_adapter_lock); + rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT); + if (rc >= 0) + fm->id = rc; + spin_unlock(&tifm_adapter_lock); + idr_preload_end(); + if (rc < 0) + return rc; + + dev_set_name(&fm->dev, "tifm%u", fm->id); + rc = device_add(&fm->dev); + if (rc) { + spin_lock(&tifm_adapter_lock); + idr_remove(&tifm_adapter_idr, fm->id); + spin_unlock(&tifm_adapter_lock); + } + + return rc; +} +EXPORT_SYMBOL(tifm_add_adapter); + +void tifm_remove_adapter(struct tifm_adapter *fm) +{ + unsigned int cnt; + + flush_workqueue(workqueue); + for (cnt = 0; cnt < fm->num_sockets; ++cnt) { + if (fm->sockets[cnt]) + device_unregister(&fm->sockets[cnt]->dev); + } + + spin_lock(&tifm_adapter_lock); + idr_remove(&tifm_adapter_idr, fm->id); + spin_unlock(&tifm_adapter_lock); + device_del(&fm->dev); +} +EXPORT_SYMBOL(tifm_remove_adapter); + +void tifm_free_adapter(struct tifm_adapter *fm) +{ + put_device(&fm->dev); +} +EXPORT_SYMBOL(tifm_free_adapter); + +void tifm_free_device(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + kfree(sock); +} +EXPORT_SYMBOL(tifm_free_device); + +struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id, + unsigned char type) +{ + struct tifm_dev *sock = NULL; + + if (!tifm_media_type_name(type, 0)) + return sock; + + sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL); + if (sock) { + spin_lock_init(&sock->lock); + sock->type = type; + sock->socket_id = id; + sock->card_event = tifm_dummy_event; + sock->data_event = tifm_dummy_event; + + sock->dev.parent = fm->dev.parent; + sock->dev.bus = &tifm_bus_type; + sock->dev.dma_mask = fm->dev.parent->dma_mask; + sock->dev.release = tifm_free_device; + + dev_set_name(&sock->dev, "tifm_%s%u:%u", + tifm_media_type_name(type, 2), fm->id, id); + printk(KERN_INFO DRIVER_NAME + ": %s card detected in socket %u:%u\n", + tifm_media_type_name(type, 0), fm->id, id); + } + return sock; +} +EXPORT_SYMBOL(tifm_alloc_device); + +void tifm_eject(struct tifm_dev *sock) +{ + struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent); + fm->eject(fm, sock); +} +EXPORT_SYMBOL(tifm_eject); + +int tifm_has_ms_pif(struct tifm_dev *sock) +{ + struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent); + return fm->has_ms_pif(fm, sock); +} +EXPORT_SYMBOL(tifm_has_ms_pif); + +int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents, + int direction) +{ + return pci_map_sg(to_pci_dev(sock->dev.parent), sg, nents, direction); +} +EXPORT_SYMBOL(tifm_map_sg); + +void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents, + int direction) +{ + pci_unmap_sg(to_pci_dev(sock->dev.parent), sg, nents, direction); +} +EXPORT_SYMBOL(tifm_unmap_sg); + +void tifm_queue_work(struct work_struct *work) +{ + queue_work(workqueue, work); +} +EXPORT_SYMBOL(tifm_queue_work); + +int tifm_register_driver(struct tifm_driver *drv) +{ + drv->driver.bus = &tifm_bus_type; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL(tifm_register_driver); + +void tifm_unregister_driver(struct tifm_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(tifm_unregister_driver); + +static int __init tifm_init(void) +{ + int rc; + + workqueue = create_freezable_workqueue("tifm"); + if (!workqueue) + return -ENOMEM; + + rc = bus_register(&tifm_bus_type); + + if (rc) + goto err_out_wq; + + rc = class_register(&tifm_adapter_class); + if (!rc) + return 0; + + bus_unregister(&tifm_bus_type); + +err_out_wq: + destroy_workqueue(workqueue); + + return rc; +} + +static void __exit tifm_exit(void) +{ + class_unregister(&tifm_adapter_class); + bus_unregister(&tifm_bus_type); + destroy_workqueue(workqueue); +} + +subsys_initcall(tifm_init); +module_exit(tifm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("TI FlashMedia core driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/kernel/drivers/misc/tsl2550.c b/kernel/drivers/misc/tsl2550.c new file mode 100644 index 000000000..b00335652 --- /dev/null +++ b/kernel/drivers/misc/tsl2550.c @@ -0,0 +1,462 @@ +/* + * tsl2550.c - Linux kernel modules for ambient light sensor + * + * Copyright (C) 2007 Rodolfo Giometti <giometti@linux.it> + * Copyright (C) 2007 Eurotech S.p.A. <info@eurotech.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/mutex.h> + +#define TSL2550_DRV_NAME "tsl2550" +#define DRIVER_VERSION "1.2" + +/* + * Defines + */ + +#define TSL2550_POWER_DOWN 0x00 +#define TSL2550_POWER_UP 0x03 +#define TSL2550_STANDARD_RANGE 0x18 +#define TSL2550_EXTENDED_RANGE 0x1d +#define TSL2550_READ_ADC0 0x43 +#define TSL2550_READ_ADC1 0x83 + +/* + * Structs + */ + +struct tsl2550_data { + struct i2c_client *client; + struct mutex update_lock; + + unsigned int power_state:1; + unsigned int operating_mode:1; +}; + +/* + * Global data + */ + +static const u8 TSL2550_MODE_RANGE[2] = { + TSL2550_STANDARD_RANGE, TSL2550_EXTENDED_RANGE, +}; + +/* + * Management functions + */ + +static int tsl2550_set_operating_mode(struct i2c_client *client, int mode) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + + int ret = i2c_smbus_write_byte(client, TSL2550_MODE_RANGE[mode]); + + data->operating_mode = mode; + + return ret; +} + +static int tsl2550_set_power_state(struct i2c_client *client, int state) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + int ret; + + if (state == 0) + ret = i2c_smbus_write_byte(client, TSL2550_POWER_DOWN); + else { + ret = i2c_smbus_write_byte(client, TSL2550_POWER_UP); + + /* On power up we should reset operating mode also... */ + tsl2550_set_operating_mode(client, data->operating_mode); + } + + data->power_state = state; + + return ret; +} + +static int tsl2550_get_adc_value(struct i2c_client *client, u8 cmd) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, cmd); + if (ret < 0) + return ret; + if (!(ret & 0x80)) + return -EAGAIN; + return ret & 0x7f; /* remove the "valid" bit */ +} + +/* + * LUX calculation + */ + +#define TSL2550_MAX_LUX 1846 + +static const u8 ratio_lut[] = { + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 98, 98, 98, 98, 98, + 98, 98, 97, 97, 97, 97, 97, 96, + 96, 96, 96, 95, 95, 95, 94, 94, + 93, 93, 93, 92, 92, 91, 91, 90, + 89, 89, 88, 87, 87, 86, 85, 84, + 83, 82, 81, 80, 79, 78, 77, 75, + 74, 73, 71, 69, 68, 66, 64, 62, + 60, 58, 56, 54, 52, 49, 47, 44, + 42, 41, 40, 40, 39, 39, 38, 38, + 37, 37, 37, 36, 36, 36, 35, 35, + 35, 35, 34, 34, 34, 34, 33, 33, + 33, 33, 32, 32, 32, 32, 32, 31, + 31, 31, 31, 31, 30, 30, 30, 30, + 30, +}; + +static const u16 count_lut[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 49, 53, 57, 61, 65, 69, 73, 77, + 81, 85, 89, 93, 97, 101, 105, 109, + 115, 123, 131, 139, 147, 155, 163, 171, + 179, 187, 195, 203, 211, 219, 227, 235, + 247, 263, 279, 295, 311, 327, 343, 359, + 375, 391, 407, 423, 439, 455, 471, 487, + 511, 543, 575, 607, 639, 671, 703, 735, + 767, 799, 831, 863, 895, 927, 959, 991, + 1039, 1103, 1167, 1231, 1295, 1359, 1423, 1487, + 1551, 1615, 1679, 1743, 1807, 1871, 1935, 1999, + 2095, 2223, 2351, 2479, 2607, 2735, 2863, 2991, + 3119, 3247, 3375, 3503, 3631, 3759, 3887, 4015, +}; + +/* + * This function is described into Taos TSL2550 Designer's Notebook + * pages 2, 3. + */ +static int tsl2550_calculate_lux(u8 ch0, u8 ch1) +{ + unsigned int lux; + + /* Look up count from channel values */ + u16 c0 = count_lut[ch0]; + u16 c1 = count_lut[ch1]; + + /* + * Calculate ratio. + * Note: the "128" is a scaling factor + */ + u8 r = 128; + + /* Avoid division by 0 and count 1 cannot be greater than count 0 */ + if (c1 <= c0) + if (c0) { + r = c1 * 128 / c0; + + /* Calculate LUX */ + lux = ((c0 - c1) * ratio_lut[r]) / 256; + } else + lux = 0; + else + return -EAGAIN; + + /* LUX range check */ + return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux; +} + +/* + * SysFS support + */ + +static ssize_t tsl2550_show_power_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev)); + + return sprintf(buf, "%u\n", data->power_state); +} + +static ssize_t tsl2550_store_power_state(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct tsl2550_data *data = i2c_get_clientdata(client); + unsigned long val = simple_strtoul(buf, NULL, 10); + int ret; + + if (val > 1) + return -EINVAL; + + mutex_lock(&data->update_lock); + ret = tsl2550_set_power_state(client, val); + mutex_unlock(&data->update_lock); + + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + tsl2550_show_power_state, tsl2550_store_power_state); + +static ssize_t tsl2550_show_operating_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev)); + + return sprintf(buf, "%u\n", data->operating_mode); +} + +static ssize_t tsl2550_store_operating_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct tsl2550_data *data = i2c_get_clientdata(client); + unsigned long val = simple_strtoul(buf, NULL, 10); + int ret; + + if (val > 1) + return -EINVAL; + + if (data->power_state == 0) + return -EBUSY; + + mutex_lock(&data->update_lock); + ret = tsl2550_set_operating_mode(client, val); + mutex_unlock(&data->update_lock); + + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(operating_mode, S_IWUSR | S_IRUGO, + tsl2550_show_operating_mode, tsl2550_store_operating_mode); + +static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + u8 ch0, ch1; + int ret; + + ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC0); + if (ret < 0) + return ret; + ch0 = ret; + + ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC1); + if (ret < 0) + return ret; + ch1 = ret; + + /* Do the job */ + ret = tsl2550_calculate_lux(ch0, ch1); + if (ret < 0) + return ret; + if (data->operating_mode == 1) + ret *= 5; + + return sprintf(buf, "%d\n", ret); +} + +static ssize_t tsl2550_show_lux1_input(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct tsl2550_data *data = i2c_get_clientdata(client); + int ret; + + /* No LUX data if not operational */ + if (!data->power_state) + return -EBUSY; + + mutex_lock(&data->update_lock); + ret = __tsl2550_show_lux(client, buf); + mutex_unlock(&data->update_lock); + + return ret; +} + +static DEVICE_ATTR(lux1_input, S_IRUGO, + tsl2550_show_lux1_input, NULL); + +static struct attribute *tsl2550_attributes[] = { + &dev_attr_power_state.attr, + &dev_attr_operating_mode.attr, + &dev_attr_lux1_input.attr, + NULL +}; + +static const struct attribute_group tsl2550_attr_group = { + .attrs = tsl2550_attributes, +}; + +/* + * Initialization function + */ + +static int tsl2550_init_client(struct i2c_client *client) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + int err; + + /* + * Probe the chip. To do so we try to power up the device and then to + * read back the 0x03 code + */ + err = i2c_smbus_read_byte_data(client, TSL2550_POWER_UP); + if (err < 0) + return err; + if (err != TSL2550_POWER_UP) + return -ENODEV; + data->power_state = 1; + + /* Set the default operating mode */ + err = i2c_smbus_write_byte(client, + TSL2550_MODE_RANGE[data->operating_mode]); + if (err < 0) + return err; + + return 0; +} + +/* + * I2C init/probing/exit functions + */ + +static struct i2c_driver tsl2550_driver; +static int tsl2550_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct tsl2550_data *data; + int *opmode, err = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE + | I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + err = -EIO; + goto exit; + } + + data = kzalloc(sizeof(struct tsl2550_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + data->client = client; + i2c_set_clientdata(client, data); + + /* Check platform data */ + opmode = client->dev.platform_data; + if (opmode) { + if (*opmode < 0 || *opmode > 1) { + dev_err(&client->dev, "invalid operating_mode (%d)\n", + *opmode); + err = -EINVAL; + goto exit_kfree; + } + data->operating_mode = *opmode; + } else + data->operating_mode = 0; /* default mode is standard */ + dev_info(&client->dev, "%s operating mode\n", + data->operating_mode ? "extended" : "standard"); + + mutex_init(&data->update_lock); + + /* Initialize the TSL2550 chip */ + err = tsl2550_init_client(client); + if (err) + goto exit_kfree; + + /* Register sysfs hooks */ + err = sysfs_create_group(&client->dev.kobj, &tsl2550_attr_group); + if (err) + goto exit_kfree; + + dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION); + + return 0; + +exit_kfree: + kfree(data); +exit: + return err; +} + +static int tsl2550_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &tsl2550_attr_group); + + /* Power down the device */ + tsl2550_set_power_state(client, 0); + + kfree(i2c_get_clientdata(client)); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP + +static int tsl2550_suspend(struct device *dev) +{ + return tsl2550_set_power_state(to_i2c_client(dev), 0); +} + +static int tsl2550_resume(struct device *dev) +{ + return tsl2550_set_power_state(to_i2c_client(dev), 1); +} + +static SIMPLE_DEV_PM_OPS(tsl2550_pm_ops, tsl2550_suspend, tsl2550_resume); +#define TSL2550_PM_OPS (&tsl2550_pm_ops) + +#else + +#define TSL2550_PM_OPS NULL + +#endif /* CONFIG_PM_SLEEP */ + +static const struct i2c_device_id tsl2550_id[] = { + { "tsl2550", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tsl2550_id); + +static struct i2c_driver tsl2550_driver = { + .driver = { + .name = TSL2550_DRV_NAME, + .owner = THIS_MODULE, + .pm = TSL2550_PM_OPS, + }, + .probe = tsl2550_probe, + .remove = tsl2550_remove, + .id_table = tsl2550_id, +}; + +module_i2c_driver(tsl2550_driver); + +MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>"); +MODULE_DESCRIPTION("TSL2550 ambient light sensor driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/kernel/drivers/misc/vexpress-syscfg.c b/kernel/drivers/misc/vexpress-syscfg.c new file mode 100644 index 000000000..c344483fa --- /dev/null +++ b/kernel/drivers/misc/vexpress-syscfg.c @@ -0,0 +1,294 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2014 ARM Limited + */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> +#include <linux/vexpress.h> + + +#define SYS_CFGDATA 0x0 + +#define SYS_CFGCTRL 0x4 +#define SYS_CFGCTRL_START (1 << 31) +#define SYS_CFGCTRL_WRITE (1 << 30) +#define SYS_CFGCTRL_DCC(n) (((n) & 0xf) << 26) +#define SYS_CFGCTRL_FUNC(n) (((n) & 0x3f) << 20) +#define SYS_CFGCTRL_SITE(n) (((n) & 0x3) << 16) +#define SYS_CFGCTRL_POSITION(n) (((n) & 0xf) << 12) +#define SYS_CFGCTRL_DEVICE(n) (((n) & 0xfff) << 0) + +#define SYS_CFGSTAT 0x8 +#define SYS_CFGSTAT_ERR (1 << 1) +#define SYS_CFGSTAT_COMPLETE (1 << 0) + + +struct vexpress_syscfg { + struct device *dev; + void __iomem *base; + struct list_head funcs; +}; + +struct vexpress_syscfg_func { + struct list_head list; + struct vexpress_syscfg *syscfg; + struct regmap *regmap; + int num_templates; + u32 template[0]; /* Keep it last! */ +}; + + +static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func, + int index, bool write, u32 *data) +{ + struct vexpress_syscfg *syscfg = func->syscfg; + u32 command, status; + int tries; + long timeout; + + if (WARN_ON(index > func->num_templates)) + return -EINVAL; + + command = readl(syscfg->base + SYS_CFGCTRL); + if (WARN_ON(command & SYS_CFGCTRL_START)) + return -EBUSY; + + command = func->template[index]; + command |= SYS_CFGCTRL_START; + command |= write ? SYS_CFGCTRL_WRITE : 0; + + /* Use a canary for reads */ + if (!write) + *data = 0xdeadbeef; + + dev_dbg(syscfg->dev, "func %p, command %x, data %x\n", + func, command, *data); + writel(*data, syscfg->base + SYS_CFGDATA); + writel(0, syscfg->base + SYS_CFGSTAT); + writel(command, syscfg->base + SYS_CFGCTRL); + mb(); + + /* The operation can take ages... Go to sleep, 100us initially */ + tries = 100; + timeout = 100; + do { + if (!irqs_disabled()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(timeout)); + if (signal_pending(current)) + return -EINTR; + } else { + udelay(timeout); + } + + status = readl(syscfg->base + SYS_CFGSTAT); + if (status & SYS_CFGSTAT_ERR) + return -EFAULT; + + if (timeout > 20) + timeout -= 20; + } while (--tries && !(status & SYS_CFGSTAT_COMPLETE)); + if (WARN_ON_ONCE(!tries)) + return -ETIMEDOUT; + + if (!write) { + *data = readl(syscfg->base + SYS_CFGDATA); + dev_dbg(syscfg->dev, "func %p, read data %x\n", func, *data); + } + + return 0; +} + +static int vexpress_syscfg_read(void *context, unsigned int index, + unsigned int *val) +{ + struct vexpress_syscfg_func *func = context; + + return vexpress_syscfg_exec(func, index, false, val); +} + +static int vexpress_syscfg_write(void *context, unsigned int index, + unsigned int val) +{ + struct vexpress_syscfg_func *func = context; + + return vexpress_syscfg_exec(func, index, true, &val); +} + +static struct regmap_config vexpress_syscfg_regmap_config = { + .lock = vexpress_config_lock, + .unlock = vexpress_config_unlock, + .reg_bits = 32, + .val_bits = 32, + .reg_read = vexpress_syscfg_read, + .reg_write = vexpress_syscfg_write, + .reg_format_endian = REGMAP_ENDIAN_LITTLE, + .val_format_endian = REGMAP_ENDIAN_LITTLE, +}; + + +static struct regmap *vexpress_syscfg_regmap_init(struct device *dev, + void *context) +{ + int err; + struct vexpress_syscfg *syscfg = context; + struct vexpress_syscfg_func *func; + struct property *prop; + const __be32 *val = NULL; + __be32 energy_quirk[4]; + int num; + u32 site, position, dcc; + int i; + + err = vexpress_config_get_topo(dev->of_node, &site, + &position, &dcc); + if (err) + return ERR_PTR(err); + + prop = of_find_property(dev->of_node, + "arm,vexpress-sysreg,func", NULL); + if (!prop) + return ERR_PTR(-EINVAL); + + num = prop->length / sizeof(u32) / 2; + val = prop->value; + + /* + * "arm,vexpress-energy" function used to be described + * by its first device only, now it requires both + */ + if (num == 1 && of_device_is_compatible(dev->of_node, + "arm,vexpress-energy")) { + num = 2; + energy_quirk[0] = *val; + energy_quirk[2] = *val++; + energy_quirk[1] = *val; + energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1); + val = energy_quirk; + } + + func = kzalloc(sizeof(*func) + sizeof(*func->template) * num, + GFP_KERNEL); + if (!func) + return ERR_PTR(-ENOMEM); + + func->syscfg = syscfg; + func->num_templates = num; + + for (i = 0; i < num; i++) { + u32 function, device; + + function = be32_to_cpup(val++); + device = be32_to_cpup(val++); + + dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n", + func, site, position, dcc, + function, device); + + func->template[i] = SYS_CFGCTRL_DCC(dcc); + func->template[i] |= SYS_CFGCTRL_SITE(site); + func->template[i] |= SYS_CFGCTRL_POSITION(position); + func->template[i] |= SYS_CFGCTRL_FUNC(function); + func->template[i] |= SYS_CFGCTRL_DEVICE(device); + } + + vexpress_syscfg_regmap_config.max_register = num - 1; + + func->regmap = regmap_init(dev, NULL, func, + &vexpress_syscfg_regmap_config); + + if (IS_ERR(func->regmap)) { + void *err = func->regmap; + + kfree(func); + return err; + } + + list_add(&func->list, &syscfg->funcs); + + return func->regmap; +} + +static void vexpress_syscfg_regmap_exit(struct regmap *regmap, void *context) +{ + struct vexpress_syscfg *syscfg = context; + struct vexpress_syscfg_func *func, *tmp; + + regmap_exit(regmap); + + list_for_each_entry_safe(func, tmp, &syscfg->funcs, list) { + if (func->regmap == regmap) { + list_del(&syscfg->funcs); + kfree(func); + break; + } + } +} + +static struct vexpress_config_bridge_ops vexpress_syscfg_bridge_ops = { + .regmap_init = vexpress_syscfg_regmap_init, + .regmap_exit = vexpress_syscfg_regmap_exit, +}; + + +static int vexpress_syscfg_probe(struct platform_device *pdev) +{ + struct vexpress_syscfg *syscfg; + struct resource *res; + struct device *bridge; + + syscfg = devm_kzalloc(&pdev->dev, sizeof(*syscfg), GFP_KERNEL); + if (!syscfg) + return -ENOMEM; + syscfg->dev = &pdev->dev; + INIT_LIST_HEAD(&syscfg->funcs); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!devm_request_mem_region(&pdev->dev, res->start, + resource_size(res), pdev->name)) + return -EBUSY; + + syscfg->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!syscfg->base) + return -EFAULT; + + /* Must use dev.parent (MFD), as that's where DT phandle points at... */ + bridge = vexpress_config_bridge_register(pdev->dev.parent, + &vexpress_syscfg_bridge_ops, syscfg); + if (IS_ERR(bridge)) + return PTR_ERR(bridge); + + return 0; +} + +static const struct platform_device_id vexpress_syscfg_id_table[] = { + { "vexpress-syscfg", }, + {}, +}; + +static struct platform_driver vexpress_syscfg_driver = { + .driver.name = "vexpress-syscfg", + .id_table = vexpress_syscfg_id_table, + .probe = vexpress_syscfg_probe, +}; + +static int __init vexpress_syscfg_init(void) +{ + return platform_driver_register(&vexpress_syscfg_driver); +} +core_initcall(vexpress_syscfg_init); diff --git a/kernel/drivers/misc/vmw_balloon.c b/kernel/drivers/misc/vmw_balloon.c new file mode 100644 index 000000000..191617492 --- /dev/null +++ b/kernel/drivers/misc/vmw_balloon.c @@ -0,0 +1,839 @@ +/* + * VMware Balloon driver. + * + * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Maintained by: Xavier Deguillard <xdeguillard@vmware.com> + * Philip Moltmann <moltmann@vmware.com> + */ + +/* + * This is VMware physical memory management driver for Linux. The driver + * acts like a "balloon" that can be inflated to reclaim physical pages by + * reserving them in the guest and invalidating them in the monitor, + * freeing up the underlying machine pages so they can be allocated to + * other guests. The balloon can also be deflated to allow the guest to + * use more physical memory. Higher level policies can control the sizes + * of balloons in VMs in order to manage physical memory resources. + */ + +//#define DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/workqueue.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <asm/hypervisor.h> + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); +MODULE_VERSION("1.2.1.3-k"); +MODULE_ALIAS("dmi:*:svnVMware*:*"); +MODULE_ALIAS("vmware_vmmemctl"); +MODULE_LICENSE("GPL"); + +/* + * Various constants controlling rate of inflaint/deflating balloon, + * measured in pages. + */ + +/* + * Rate of allocating memory when there is no memory pressure + * (driver performs non-sleeping allocations). + */ +#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U + +/* + * Rates of memory allocaton when guest experiences memory pressure + * (driver performs sleeping allocations). + */ +#define VMW_BALLOON_RATE_ALLOC_MIN 512U +#define VMW_BALLOON_RATE_ALLOC_MAX 2048U +#define VMW_BALLOON_RATE_ALLOC_INC 16U + +/* + * Rates for releasing pages while deflating balloon. + */ +#define VMW_BALLOON_RATE_FREE_MIN 512U +#define VMW_BALLOON_RATE_FREE_MAX 16384U +#define VMW_BALLOON_RATE_FREE_INC 16U + +/* + * When guest is under memory pressure, use a reduced page allocation + * rate for next several cycles. + */ +#define VMW_BALLOON_SLOW_CYCLES 4 + +/* + * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't + * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use + * __GFP_NOWARN, to suppress page allocation failure warnings. + */ +#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) + +/* + * Use GFP_HIGHUSER when executing in a separate kernel thread + * context and allocation can sleep. This is less stressful to + * the guest memory system, since it allows the thread to block + * while memory is reclaimed, and won't take pages from emergency + * low-memory pools. + */ +#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) + +/* Maximum number of page allocations without yielding processor */ +#define VMW_BALLOON_YIELD_THRESHOLD 1024 + +/* Maximum number of refused pages we accumulate during inflation cycle */ +#define VMW_BALLOON_MAX_REFUSED 16 + +/* + * Hypervisor communication port definitions. + */ +#define VMW_BALLOON_HV_PORT 0x5670 +#define VMW_BALLOON_HV_MAGIC 0x456c6d6f +#define VMW_BALLOON_PROTOCOL_VERSION 2 +#define VMW_BALLOON_GUEST_ID 1 /* Linux */ + +#define VMW_BALLOON_CMD_START 0 +#define VMW_BALLOON_CMD_GET_TARGET 1 +#define VMW_BALLOON_CMD_LOCK 2 +#define VMW_BALLOON_CMD_UNLOCK 3 +#define VMW_BALLOON_CMD_GUEST_ID 4 + +/* error codes */ +#define VMW_BALLOON_SUCCESS 0 +#define VMW_BALLOON_FAILURE -1 +#define VMW_BALLOON_ERROR_CMD_INVALID 1 +#define VMW_BALLOON_ERROR_PPN_INVALID 2 +#define VMW_BALLOON_ERROR_PPN_LOCKED 3 +#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 +#define VMW_BALLOON_ERROR_PPN_PINNED 5 +#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 +#define VMW_BALLOON_ERROR_RESET 7 +#define VMW_BALLOON_ERROR_BUSY 8 + +#define VMWARE_BALLOON_CMD(cmd, data, result) \ +({ \ + unsigned long __stat, __dummy1, __dummy2; \ + __asm__ __volatile__ ("inl %%dx" : \ + "=a"(__stat), \ + "=c"(__dummy1), \ + "=d"(__dummy2), \ + "=b"(result) : \ + "0"(VMW_BALLOON_HV_MAGIC), \ + "1"(VMW_BALLOON_CMD_##cmd), \ + "2"(VMW_BALLOON_HV_PORT), \ + "3"(data) : \ + "memory"); \ + result &= -1UL; \ + __stat & -1UL; \ +}) + +#ifdef CONFIG_DEBUG_FS +struct vmballoon_stats { + unsigned int timer; + + /* allocation statistics */ + unsigned int alloc; + unsigned int alloc_fail; + unsigned int sleep_alloc; + unsigned int sleep_alloc_fail; + unsigned int refused_alloc; + unsigned int refused_free; + unsigned int free; + + /* monitor operations */ + unsigned int lock; + unsigned int lock_fail; + unsigned int unlock; + unsigned int unlock_fail; + unsigned int target; + unsigned int target_fail; + unsigned int start; + unsigned int start_fail; + unsigned int guest_type; + unsigned int guest_type_fail; +}; + +#define STATS_INC(stat) (stat)++ +#else +#define STATS_INC(stat) +#endif + +struct vmballoon { + + /* list of reserved physical pages */ + struct list_head pages; + + /* transient list of non-balloonable pages */ + struct list_head refused_pages; + unsigned int n_refused_pages; + + /* balloon size in pages */ + unsigned int size; + unsigned int target; + + /* reset flag */ + bool reset_required; + + /* adjustment rates (pages per second) */ + unsigned int rate_alloc; + unsigned int rate_free; + + /* slowdown page allocations for next few cycles */ + unsigned int slow_allocation_cycles; + +#ifdef CONFIG_DEBUG_FS + /* statistics */ + struct vmballoon_stats stats; + + /* debugfs file exporting statistics */ + struct dentry *dbg_entry; +#endif + + struct sysinfo sysinfo; + + struct delayed_work dwork; +}; + +static struct vmballoon balloon; + +/* + * Send "start" command to the host, communicating supported version + * of the protocol. + */ +static bool vmballoon_send_start(struct vmballoon *b) +{ + unsigned long status, dummy; + + STATS_INC(b->stats.start); + + status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_PROTOCOL_VERSION, dummy); + if (status == VMW_BALLOON_SUCCESS) + return true; + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.start_fail); + return false; +} + +static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) +{ + switch (status) { + case VMW_BALLOON_SUCCESS: + return true; + + case VMW_BALLOON_ERROR_RESET: + b->reset_required = true; + /* fall through */ + + default: + return false; + } +} + +/* + * Communicate guest type to the host so that it can adjust ballooning + * algorithm to the one most appropriate for the guest. This command + * is normally issued after sending "start" command and is part of + * standard reset sequence. + */ +static bool vmballoon_send_guest_id(struct vmballoon *b) +{ + unsigned long status, dummy; + + status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy); + + STATS_INC(b->stats.guest_type); + + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.guest_type_fail); + return false; +} + +/* + * Retrieve desired balloon size from the host. + */ +static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) +{ + unsigned long status; + unsigned long target; + unsigned long limit; + u32 limit32; + + /* + * si_meminfo() is cheap. Moreover, we want to provide dynamic + * max balloon size later. So let us call si_meminfo() every + * iteration. + */ + si_meminfo(&b->sysinfo); + limit = b->sysinfo.totalram; + + /* Ensure limit fits in 32-bits */ + limit32 = (u32)limit; + if (limit != limit32) + return false; + + /* update stats */ + STATS_INC(b->stats.target); + + status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target); + if (vmballoon_check_status(b, status)) { + *new_target = target; + return true; + } + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.target_fail); + return false; +} + +/* + * Notify the host about allocated page so that host can use it without + * fear that guest will need it. Host may reject some pages, we need to + * check the return value and maybe submit a different page. + */ +static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, + unsigned int *hv_status) +{ + unsigned long status, dummy; + u32 pfn32; + + pfn32 = (u32)pfn; + if (pfn32 != pfn) + return -1; + + STATS_INC(b->stats.lock); + + *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy); + if (vmballoon_check_status(b, status)) + return 0; + + pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.lock_fail); + return 1; +} + +/* + * Notify the host that guest intends to release given page back into + * the pool of available (to the guest) pages. + */ +static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) +{ + unsigned long status, dummy; + u32 pfn32; + + pfn32 = (u32)pfn; + if (pfn32 != pfn) + return false; + + STATS_INC(b->stats.unlock); + + status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy); + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.unlock_fail); + return false; +} + +/* + * Quickly release all pages allocated for the balloon. This function is + * called when host decides to "reset" balloon for one reason or another. + * Unlike normal "deflate" we do not (shall not) notify host of the pages + * being released. + */ +static void vmballoon_pop(struct vmballoon *b) +{ + struct page *page, *next; + unsigned int count = 0; + + list_for_each_entry_safe(page, next, &b->pages, lru) { + list_del(&page->lru); + __free_page(page); + STATS_INC(b->stats.free); + b->size--; + + if (++count >= b->rate_free) { + count = 0; + cond_resched(); + } + } +} + +/* + * Perform standard reset sequence by popping the balloon (in case it + * is not empty) and then restarting protocol. This operation normally + * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. + */ +static void vmballoon_reset(struct vmballoon *b) +{ + /* free all pages, skipping monitor unlock */ + vmballoon_pop(b); + + if (vmballoon_send_start(b)) { + b->reset_required = false; + if (!vmballoon_send_guest_id(b)) + pr_err("failed to send guest ID to the host\n"); + } +} + +/* + * Allocate (or reserve) a page for the balloon and notify the host. If host + * refuses the page put it on "refuse" list and allocate another one until host + * is satisfied. "Refused" pages are released at the end of inflation cycle + * (when we allocate b->rate_alloc pages). + */ +static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep) +{ + struct page *page; + gfp_t flags; + unsigned int hv_status; + int locked; + flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP; + + do { + if (!can_sleep) + STATS_INC(b->stats.alloc); + else + STATS_INC(b->stats.sleep_alloc); + + page = alloc_page(flags); + if (!page) { + if (!can_sleep) + STATS_INC(b->stats.alloc_fail); + else + STATS_INC(b->stats.sleep_alloc_fail); + return -ENOMEM; + } + + /* inform monitor */ + locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status); + if (locked > 0) { + STATS_INC(b->stats.refused_alloc); + + if (hv_status == VMW_BALLOON_ERROR_RESET || + hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { + __free_page(page); + return -EIO; + } + + /* + * Place page on the list of non-balloonable pages + * and retry allocation, unless we already accumulated + * too many of them, in which case take a breather. + */ + list_add(&page->lru, &b->refused_pages); + if (++b->n_refused_pages >= VMW_BALLOON_MAX_REFUSED) + return -EIO; + } + } while (locked != 0); + + /* track allocated page */ + list_add(&page->lru, &b->pages); + + /* update balloon size */ + b->size++; + + return 0; +} + +/* + * Release the page allocated for the balloon. Note that we first notify + * the host so it can make sure the page will be available for the guest + * to use, if needed. + */ +static int vmballoon_release_page(struct vmballoon *b, struct page *page) +{ + if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) + return -EIO; + + list_del(&page->lru); + + /* deallocate page */ + __free_page(page); + STATS_INC(b->stats.free); + + /* update balloon size */ + b->size--; + + return 0; +} + +/* + * Release pages that were allocated while attempting to inflate the + * balloon but were refused by the host for one reason or another. + */ +static void vmballoon_release_refused_pages(struct vmballoon *b) +{ + struct page *page, *next; + + list_for_each_entry_safe(page, next, &b->refused_pages, lru) { + list_del(&page->lru); + __free_page(page); + STATS_INC(b->stats.refused_free); + } + + b->n_refused_pages = 0; +} + +/* + * Inflate the balloon towards its target size. Note that we try to limit + * the rate of allocation to make sure we are not choking the rest of the + * system. + */ +static void vmballoon_inflate(struct vmballoon *b) +{ + unsigned int goal; + unsigned int rate; + unsigned int i; + unsigned int allocations = 0; + int error = 0; + bool alloc_can_sleep = false; + + pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); + + /* + * First try NOSLEEP page allocations to inflate balloon. + * + * If we do not throttle nosleep allocations, we can drain all + * free pages in the guest quickly (if the balloon target is high). + * As a side-effect, draining free pages helps to inform (force) + * the guest to start swapping if balloon target is not met yet, + * which is a desired behavior. However, balloon driver can consume + * all available CPU cycles if too many pages are allocated in a + * second. Therefore, we throttle nosleep allocations even when + * the guest is not under memory pressure. OTOH, if we have already + * predicted that the guest is under memory pressure, then we + * slowdown page allocations considerably. + */ + + goal = b->target - b->size; + /* + * Start with no sleep allocation rate which may be higher + * than sleeping allocation rate. + */ + rate = b->slow_allocation_cycles ? + b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX; + + pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n", + __func__, goal, rate, b->rate_alloc); + + for (i = 0; i < goal; i++) { + + error = vmballoon_reserve_page(b, alloc_can_sleep); + if (error) { + if (error != -ENOMEM) { + /* + * Not a page allocation failure, stop this + * cycle. Maybe we'll get new target from + * the host soon. + */ + break; + } + + if (alloc_can_sleep) { + /* + * CANSLEEP page allocation failed, so guest + * is under severe memory pressure. Quickly + * decrease allocation rate. + */ + b->rate_alloc = max(b->rate_alloc / 2, + VMW_BALLOON_RATE_ALLOC_MIN); + break; + } + + /* + * NOSLEEP page allocation failed, so the guest is + * under memory pressure. Let us slow down page + * allocations for next few cycles so that the guest + * gets out of memory pressure. Also, if we already + * allocated b->rate_alloc pages, let's pause, + * otherwise switch to sleeping allocations. + */ + b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; + + if (i >= b->rate_alloc) + break; + + alloc_can_sleep = true; + /* Lower rate for sleeping allocations. */ + rate = b->rate_alloc; + } + + if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) { + cond_resched(); + allocations = 0; + } + + if (i >= rate) { + /* We allocated enough pages, let's take a break. */ + break; + } + } + + /* + * We reached our goal without failures so try increasing + * allocation rate. + */ + if (error == 0 && i >= b->rate_alloc) { + unsigned int mult = i / b->rate_alloc; + + b->rate_alloc = + min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, + VMW_BALLOON_RATE_ALLOC_MAX); + } + + vmballoon_release_refused_pages(b); +} + +/* + * Decrease the size of the balloon allowing guest to use more memory. + */ +static void vmballoon_deflate(struct vmballoon *b) +{ + struct page *page, *next; + unsigned int i = 0; + unsigned int goal; + int error; + + pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); + + /* limit deallocation rate */ + goal = min(b->size - b->target, b->rate_free); + + pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free); + + /* free pages to reach target */ + list_for_each_entry_safe(page, next, &b->pages, lru) { + error = vmballoon_release_page(b, page); + if (error) { + /* quickly decrease rate in case of error */ + b->rate_free = max(b->rate_free / 2, + VMW_BALLOON_RATE_FREE_MIN); + return; + } + + if (++i >= goal) + break; + } + + /* slowly increase rate if there were no errors */ + b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC, + VMW_BALLOON_RATE_FREE_MAX); +} + +/* + * Balloon work function: reset protocol, if needed, get the new size and + * adjust balloon as needed. Repeat in 1 sec. + */ +static void vmballoon_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); + unsigned int target; + + STATS_INC(b->stats.timer); + + if (b->reset_required) + vmballoon_reset(b); + + if (b->slow_allocation_cycles > 0) + b->slow_allocation_cycles--; + + if (vmballoon_send_get_target(b, &target)) { + /* update target, adjust size */ + b->target = target; + + if (b->size < target) + vmballoon_inflate(b); + else if (b->size > target) + vmballoon_deflate(b); + } + + /* + * We are using a freezable workqueue so that balloon operations are + * stopped while the system transitions to/from sleep/hibernation. + */ + queue_delayed_work(system_freezable_wq, + dwork, round_jiffies_relative(HZ)); +} + +/* + * DEBUGFS Interface + */ +#ifdef CONFIG_DEBUG_FS + +static int vmballoon_debug_show(struct seq_file *f, void *offset) +{ + struct vmballoon *b = f->private; + struct vmballoon_stats *stats = &b->stats; + + /* format size info */ + seq_printf(f, + "target: %8d pages\n" + "current: %8d pages\n", + b->target, b->size); + + /* format rate info */ + seq_printf(f, + "rateNoSleepAlloc: %8d pages/sec\n" + "rateSleepAlloc: %8d pages/sec\n" + "rateFree: %8d pages/sec\n", + VMW_BALLOON_NOSLEEP_ALLOC_MAX, + b->rate_alloc, b->rate_free); + + seq_printf(f, + "\n" + "timer: %8u\n" + "start: %8u (%4u failed)\n" + "guestType: %8u (%4u failed)\n" + "lock: %8u (%4u failed)\n" + "unlock: %8u (%4u failed)\n" + "target: %8u (%4u failed)\n" + "primNoSleepAlloc: %8u (%4u failed)\n" + "primCanSleepAlloc: %8u (%4u failed)\n" + "primFree: %8u\n" + "errAlloc: %8u\n" + "errFree: %8u\n", + stats->timer, + stats->start, stats->start_fail, + stats->guest_type, stats->guest_type_fail, + stats->lock, stats->lock_fail, + stats->unlock, stats->unlock_fail, + stats->target, stats->target_fail, + stats->alloc, stats->alloc_fail, + stats->sleep_alloc, stats->sleep_alloc_fail, + stats->free, + stats->refused_alloc, stats->refused_free); + + return 0; +} + +static int vmballoon_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vmballoon_debug_show, inode->i_private); +} + +static const struct file_operations vmballoon_debug_fops = { + .owner = THIS_MODULE, + .open = vmballoon_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vmballoon_debugfs_init(struct vmballoon *b) +{ + int error; + + b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, + &vmballoon_debug_fops); + if (IS_ERR(b->dbg_entry)) { + error = PTR_ERR(b->dbg_entry); + pr_err("failed to create debugfs entry, error: %d\n", error); + return error; + } + + return 0; +} + +static void __exit vmballoon_debugfs_exit(struct vmballoon *b) +{ + debugfs_remove(b->dbg_entry); +} + +#else + +static inline int vmballoon_debugfs_init(struct vmballoon *b) +{ + return 0; +} + +static inline void vmballoon_debugfs_exit(struct vmballoon *b) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +static int __init vmballoon_init(void) +{ + int error; + + /* + * Check if we are running on VMware's hypervisor and bail out + * if we are not. + */ + if (x86_hyper != &x86_hyper_vmware) + return -ENODEV; + + INIT_LIST_HEAD(&balloon.pages); + INIT_LIST_HEAD(&balloon.refused_pages); + + /* initialize rates */ + balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; + balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX; + + INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); + + /* + * Start balloon. + */ + if (!vmballoon_send_start(&balloon)) { + pr_err("failed to send start command to the host\n"); + return -EIO; + } + + if (!vmballoon_send_guest_id(&balloon)) { + pr_err("failed to send guest ID to the host\n"); + return -EIO; + } + + error = vmballoon_debugfs_init(&balloon); + if (error) + return error; + + queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); + + return 0; +} +module_init(vmballoon_init); + +static void __exit vmballoon_exit(void) +{ + cancel_delayed_work_sync(&balloon.dwork); + + vmballoon_debugfs_exit(&balloon); + + /* + * Deallocate all reserved memory, and reset connection with monitor. + * Reset connection before deallocating memory to avoid potential for + * additional spurious resets from guest touching deallocated pages. + */ + vmballoon_send_start(&balloon); + vmballoon_pop(&balloon); +} +module_exit(vmballoon_exit); diff --git a/kernel/drivers/misc/vmw_vmci/Kconfig b/kernel/drivers/misc/vmw_vmci/Kconfig new file mode 100644 index 000000000..39c2ecadb --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/Kconfig @@ -0,0 +1,16 @@ +# +# VMware VMCI device +# + +config VMWARE_VMCI + tristate "VMware VMCI Driver" + depends on X86 && PCI + help + This is VMware's Virtual Machine Communication Interface. It enables + high-speed communication between host and guest in a virtual + environment via the VMCI virtual device. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmw_vmci. diff --git a/kernel/drivers/misc/vmw_vmci/Makefile b/kernel/drivers/misc/vmw_vmci/Makefile new file mode 100644 index 000000000..4da9893c3 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o +vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \ + vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \ + vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o diff --git a/kernel/drivers/misc/vmw_vmci/vmci_context.c b/kernel/drivers/misc/vmw_vmci/vmci_context.c new file mode 100644 index 000000000..f866a4bae --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_context.c @@ -0,0 +1,1214 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +/* + * List of current VMCI contexts. Contexts can be added by + * vmci_ctx_create() and removed via vmci_ctx_destroy(). + * These, along with context lookup, are protected by the + * list structure's lock. + */ +static struct { + struct list_head head; + spinlock_t lock; /* Spinlock for context list operations */ +} ctx_list = { + .head = LIST_HEAD_INIT(ctx_list.head), + .lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock), +}; + +/* Used by contexts that did not set up notify flag pointers */ +static bool ctx_dummy_notify; + +static void ctx_signal_notify(struct vmci_ctx *context) +{ + *context->notify = true; +} + +static void ctx_clear_notify(struct vmci_ctx *context) +{ + *context->notify = false; +} + +/* + * If nothing requires the attention of the guest, clears both + * notify flag and call. + */ +static void ctx_clear_notify_call(struct vmci_ctx *context) +{ + if (context->pending_datagrams == 0 && + vmci_handle_arr_get_size(context->pending_doorbell_array) == 0) + ctx_clear_notify(context); +} + +/* + * Sets the context's notify flag iff datagrams are pending for this + * context. Called from vmci_setup_notify(). + */ +void vmci_ctx_check_signal_notify(struct vmci_ctx *context) +{ + spin_lock(&context->lock); + if (context->pending_datagrams) + ctx_signal_notify(context); + spin_unlock(&context->lock); +} + +/* + * Allocates and initializes a VMCI context. + */ +struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags, + uintptr_t event_hnd, + int user_version, + const struct cred *cred) +{ + struct vmci_ctx *context; + int error; + + if (cid == VMCI_INVALID_ID) { + pr_devel("Invalid context ID for VMCI context\n"); + error = -EINVAL; + goto err_out; + } + + if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) { + pr_devel("Invalid flag (flags=0x%x) for VMCI context\n", + priv_flags); + error = -EINVAL; + goto err_out; + } + + if (user_version == 0) { + pr_devel("Invalid suer_version %d\n", user_version); + error = -EINVAL; + goto err_out; + } + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) { + pr_warn("Failed to allocate memory for VMCI context\n"); + error = -EINVAL; + goto err_out; + } + + kref_init(&context->kref); + spin_lock_init(&context->lock); + INIT_LIST_HEAD(&context->list_item); + INIT_LIST_HEAD(&context->datagram_queue); + INIT_LIST_HEAD(&context->notifier_list); + + /* Initialize host-specific VMCI context. */ + init_waitqueue_head(&context->host_context.wait_queue); + + context->queue_pair_array = vmci_handle_arr_create(0); + if (!context->queue_pair_array) { + error = -ENOMEM; + goto err_free_ctx; + } + + context->doorbell_array = vmci_handle_arr_create(0); + if (!context->doorbell_array) { + error = -ENOMEM; + goto err_free_qp_array; + } + + context->pending_doorbell_array = vmci_handle_arr_create(0); + if (!context->pending_doorbell_array) { + error = -ENOMEM; + goto err_free_db_array; + } + + context->user_version = user_version; + + context->priv_flags = priv_flags; + + if (cred) + context->cred = get_cred(cred); + + context->notify = &ctx_dummy_notify; + context->notify_page = NULL; + + /* + * If we collide with an existing context we generate a new + * and use it instead. The VMX will determine if regeneration + * is okay. Since there isn't 4B - 16 VMs running on a given + * host, the below loop will terminate. + */ + spin_lock(&ctx_list.lock); + + while (vmci_ctx_exists(cid)) { + /* We reserve the lowest 16 ids for fixed contexts. */ + cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1; + if (cid == VMCI_INVALID_ID) + cid = VMCI_RESERVED_CID_LIMIT; + } + context->cid = cid; + + list_add_tail_rcu(&context->list_item, &ctx_list.head); + spin_unlock(&ctx_list.lock); + + return context; + + err_free_db_array: + vmci_handle_arr_destroy(context->doorbell_array); + err_free_qp_array: + vmci_handle_arr_destroy(context->queue_pair_array); + err_free_ctx: + kfree(context); + err_out: + return ERR_PTR(error); +} + +/* + * Destroy VMCI context. + */ +void vmci_ctx_destroy(struct vmci_ctx *context) +{ + spin_lock(&ctx_list.lock); + list_del_rcu(&context->list_item); + spin_unlock(&ctx_list.lock); + synchronize_rcu(); + + vmci_ctx_put(context); +} + +/* + * Fire notification for all contexts interested in given cid. + */ +static int ctx_fire_notification(u32 context_id, u32 priv_flags) +{ + u32 i, array_size; + struct vmci_ctx *sub_ctx; + struct vmci_handle_arr *subscriber_array; + struct vmci_handle context_handle = + vmci_make_handle(context_id, VMCI_EVENT_HANDLER); + + /* + * We create an array to hold the subscribers we find when + * scanning through all contexts. + */ + subscriber_array = vmci_handle_arr_create(0); + if (subscriber_array == NULL) + return VMCI_ERROR_NO_MEM; + + /* + * Scan all contexts to find who is interested in being + * notified about given contextID. + */ + rcu_read_lock(); + list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) { + struct vmci_handle_list *node; + + /* + * We only deliver notifications of the removal of + * contexts, if the two contexts are allowed to + * interact. + */ + if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags)) + continue; + + list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) { + if (!vmci_handle_is_equal(node->handle, context_handle)) + continue; + + vmci_handle_arr_append_entry(&subscriber_array, + vmci_make_handle(sub_ctx->cid, + VMCI_EVENT_HANDLER)); + } + } + rcu_read_unlock(); + + /* Fire event to all subscribers. */ + array_size = vmci_handle_arr_get_size(subscriber_array); + for (i = 0; i < array_size; i++) { + int result; + struct vmci_event_ctx ev; + + ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED; + ev.payload.context_id = context_id; + + result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID, + &ev.msg.hdr, false); + if (result < VMCI_SUCCESS) { + pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n", + ev.msg.event_data.event, + ev.msg.hdr.dst.context); + /* We continue to enqueue on next subscriber. */ + } + } + vmci_handle_arr_destroy(subscriber_array); + + return VMCI_SUCCESS; +} + +/* + * Returns the current number of pending datagrams. The call may + * also serve as a synchronization point for the datagram queue, + * as no enqueue operations can occur concurrently. + */ +int vmci_ctx_pending_datagrams(u32 cid, u32 *pending) +{ + struct vmci_ctx *context; + + context = vmci_ctx_get(cid); + if (context == NULL) + return VMCI_ERROR_INVALID_ARGS; + + spin_lock(&context->lock); + if (pending) + *pending = context->pending_datagrams; + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return VMCI_SUCCESS; +} + +/* + * Queues a VMCI datagram for the appropriate target VM context. + */ +int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg) +{ + struct vmci_datagram_queue_entry *dq_entry; + struct vmci_ctx *context; + struct vmci_handle dg_src; + size_t vmci_dg_size; + + vmci_dg_size = VMCI_DG_SIZE(dg); + if (vmci_dg_size > VMCI_MAX_DG_SIZE) { + pr_devel("Datagram too large (bytes=%Zu)\n", vmci_dg_size); + return VMCI_ERROR_INVALID_ARGS; + } + + /* Get the target VM's VMCI context. */ + context = vmci_ctx_get(cid); + if (!context) { + pr_devel("Invalid context (ID=0x%x)\n", cid); + return VMCI_ERROR_INVALID_ARGS; + } + + /* Allocate guest call entry and add it to the target VM's queue. */ + dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL); + if (dq_entry == NULL) { + pr_warn("Failed to allocate memory for datagram\n"); + vmci_ctx_put(context); + return VMCI_ERROR_NO_MEM; + } + dq_entry->dg = dg; + dq_entry->dg_size = vmci_dg_size; + dg_src = dg->src; + INIT_LIST_HEAD(&dq_entry->list_item); + + spin_lock(&context->lock); + + /* + * We put a higher limit on datagrams from the hypervisor. If + * the pending datagram is not from hypervisor, then we check + * if enqueueing it would exceed the + * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination. If + * the pending datagram is from hypervisor, we allow it to be + * queued at the destination side provided we don't reach the + * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit. + */ + if (context->datagram_queue_size + vmci_dg_size >= + VMCI_MAX_DATAGRAM_QUEUE_SIZE && + (!vmci_handle_is_equal(dg_src, + vmci_make_handle + (VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID)) || + context->datagram_queue_size + vmci_dg_size >= + VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) { + spin_unlock(&context->lock); + vmci_ctx_put(context); + kfree(dq_entry); + pr_devel("Context (ID=0x%x) receive queue is full\n", cid); + return VMCI_ERROR_NO_RESOURCES; + } + + list_add(&dq_entry->list_item, &context->datagram_queue); + context->pending_datagrams++; + context->datagram_queue_size += vmci_dg_size; + ctx_signal_notify(context); + wake_up(&context->host_context.wait_queue); + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return vmci_dg_size; +} + +/* + * Verifies whether a context with the specified context ID exists. + * FIXME: utility is dubious as no decisions can be reliably made + * using this data as context can appear and disappear at any time. + */ +bool vmci_ctx_exists(u32 cid) +{ + struct vmci_ctx *context; + bool exists = false; + + rcu_read_lock(); + + list_for_each_entry_rcu(context, &ctx_list.head, list_item) { + if (context->cid == cid) { + exists = true; + break; + } + } + + rcu_read_unlock(); + return exists; +} + +/* + * Retrieves VMCI context corresponding to the given cid. + */ +struct vmci_ctx *vmci_ctx_get(u32 cid) +{ + struct vmci_ctx *c, *context = NULL; + + if (cid == VMCI_INVALID_ID) + return NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(c, &ctx_list.head, list_item) { + if (c->cid == cid) { + /* + * The context owner drops its own reference to the + * context only after removing it from the list and + * waiting for RCU grace period to expire. This + * means that we are not about to increase the + * reference count of something that is in the + * process of being destroyed. + */ + context = c; + kref_get(&context->kref); + break; + } + } + rcu_read_unlock(); + + return context; +} + +/* + * Deallocates all parts of a context data structure. This + * function doesn't lock the context, because it assumes that + * the caller was holding the last reference to context. + */ +static void ctx_free_ctx(struct kref *kref) +{ + struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref); + struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp; + struct vmci_handle temp_handle; + struct vmci_handle_list *notifier, *tmp; + + /* + * Fire event to all contexts interested in knowing this + * context is dying. + */ + ctx_fire_notification(context->cid, context->priv_flags); + + /* + * Cleanup all queue pair resources attached to context. If + * the VM dies without cleaning up, this code will make sure + * that no resources are leaked. + */ + temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0); + while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) { + if (vmci_qp_broker_detach(temp_handle, + context) < VMCI_SUCCESS) { + /* + * When vmci_qp_broker_detach() succeeds it + * removes the handle from the array. If + * detach fails, we must remove the handle + * ourselves. + */ + vmci_handle_arr_remove_entry(context->queue_pair_array, + temp_handle); + } + temp_handle = + vmci_handle_arr_get_entry(context->queue_pair_array, 0); + } + + /* + * It is fine to destroy this without locking the callQueue, as + * this is the only thread having a reference to the context. + */ + list_for_each_entry_safe(dq_entry, dq_entry_tmp, + &context->datagram_queue, list_item) { + WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg)); + list_del(&dq_entry->list_item); + kfree(dq_entry->dg); + kfree(dq_entry); + } + + list_for_each_entry_safe(notifier, tmp, + &context->notifier_list, node) { + list_del(¬ifier->node); + kfree(notifier); + } + + vmci_handle_arr_destroy(context->queue_pair_array); + vmci_handle_arr_destroy(context->doorbell_array); + vmci_handle_arr_destroy(context->pending_doorbell_array); + vmci_ctx_unset_notify(context); + if (context->cred) + put_cred(context->cred); + kfree(context); +} + +/* + * Drops reference to VMCI context. If this is the last reference to + * the context it will be deallocated. A context is created with + * a reference count of one, and on destroy, it is removed from + * the context list before its reference count is decremented. Thus, + * if we reach zero, we are sure that nobody else are about to increment + * it (they need the entry in the context list for that), and so there + * is no need for locking. + */ +void vmci_ctx_put(struct vmci_ctx *context) +{ + kref_put(&context->kref, ctx_free_ctx); +} + +/* + * Dequeues the next datagram and returns it to caller. + * The caller passes in a pointer to the max size datagram + * it can handle and the datagram is only unqueued if the + * size is less than max_size. If larger max_size is set to + * the size of the datagram to give the caller a chance to + * set up a larger buffer for the guestcall. + */ +int vmci_ctx_dequeue_datagram(struct vmci_ctx *context, + size_t *max_size, + struct vmci_datagram **dg) +{ + struct vmci_datagram_queue_entry *dq_entry; + struct list_head *list_item; + int rv; + + /* Dequeue the next datagram entry. */ + spin_lock(&context->lock); + if (context->pending_datagrams == 0) { + ctx_clear_notify_call(context); + spin_unlock(&context->lock); + pr_devel("No datagrams pending\n"); + return VMCI_ERROR_NO_MORE_DATAGRAMS; + } + + list_item = context->datagram_queue.next; + + dq_entry = + list_entry(list_item, struct vmci_datagram_queue_entry, list_item); + + /* Check size of caller's buffer. */ + if (*max_size < dq_entry->dg_size) { + *max_size = dq_entry->dg_size; + spin_unlock(&context->lock); + pr_devel("Caller's buffer should be at least (size=%u bytes)\n", + (u32) *max_size); + return VMCI_ERROR_NO_MEM; + } + + list_del(list_item); + context->pending_datagrams--; + context->datagram_queue_size -= dq_entry->dg_size; + if (context->pending_datagrams == 0) { + ctx_clear_notify_call(context); + rv = VMCI_SUCCESS; + } else { + /* + * Return the size of the next datagram. + */ + struct vmci_datagram_queue_entry *next_entry; + + list_item = context->datagram_queue.next; + next_entry = + list_entry(list_item, struct vmci_datagram_queue_entry, + list_item); + + /* + * The following size_t -> int truncation is fine as + * the maximum size of a (routable) datagram is 68KB. + */ + rv = (int)next_entry->dg_size; + } + spin_unlock(&context->lock); + + /* Caller must free datagram. */ + *dg = dq_entry->dg; + dq_entry->dg = NULL; + kfree(dq_entry); + + return rv; +} + +/* + * Reverts actions set up by vmci_setup_notify(). Unmaps and unlocks the + * page mapped/locked by vmci_setup_notify(). + */ +void vmci_ctx_unset_notify(struct vmci_ctx *context) +{ + struct page *notify_page; + + spin_lock(&context->lock); + + notify_page = context->notify_page; + context->notify = &ctx_dummy_notify; + context->notify_page = NULL; + + spin_unlock(&context->lock); + + if (notify_page) { + kunmap(notify_page); + put_page(notify_page); + } +} + +/* + * Add remote_cid to list of contexts current contexts wants + * notifications from/about. + */ +int vmci_ctx_add_notification(u32 context_id, u32 remote_cid) +{ + struct vmci_ctx *context; + struct vmci_handle_list *notifier, *n; + int result; + bool exists = false; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_ERROR_NOT_FOUND; + + if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) { + pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n", + context_id, remote_cid); + result = VMCI_ERROR_DST_UNREACHABLE; + goto out; + } + + if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) { + result = VMCI_ERROR_NO_ACCESS; + goto out; + } + + notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL); + if (!notifier) { + result = VMCI_ERROR_NO_MEM; + goto out; + } + + INIT_LIST_HEAD(¬ifier->node); + notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER); + + spin_lock(&context->lock); + + list_for_each_entry(n, &context->notifier_list, node) { + if (vmci_handle_is_equal(n->handle, notifier->handle)) { + exists = true; + break; + } + } + + if (exists) { + kfree(notifier); + result = VMCI_ERROR_ALREADY_EXISTS; + } else { + list_add_tail_rcu(¬ifier->node, &context->notifier_list); + context->n_notifiers++; + result = VMCI_SUCCESS; + } + + spin_unlock(&context->lock); + + out: + vmci_ctx_put(context); + return result; +} + +/* + * Remove remote_cid from current context's list of contexts it is + * interested in getting notifications from/about. + */ +int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid) +{ + struct vmci_ctx *context; + struct vmci_handle_list *notifier, *tmp; + struct vmci_handle handle; + bool found = false; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_ERROR_NOT_FOUND; + + handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER); + + spin_lock(&context->lock); + list_for_each_entry_safe(notifier, tmp, + &context->notifier_list, node) { + if (vmci_handle_is_equal(notifier->handle, handle)) { + list_del_rcu(¬ifier->node); + context->n_notifiers--; + found = true; + break; + } + } + spin_unlock(&context->lock); + + if (found) { + synchronize_rcu(); + kfree(notifier); + } + + vmci_ctx_put(context); + + return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND; +} + +static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context, + u32 *buf_size, void **pbuf) +{ + u32 *notifiers; + size_t data_size; + struct vmci_handle_list *entry; + int i = 0; + + if (context->n_notifiers == 0) { + *buf_size = 0; + *pbuf = NULL; + return VMCI_SUCCESS; + } + + data_size = context->n_notifiers * sizeof(*notifiers); + if (*buf_size < data_size) { + *buf_size = data_size; + return VMCI_ERROR_MORE_DATA; + } + + notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */ + if (!notifiers) + return VMCI_ERROR_NO_MEM; + + list_for_each_entry(entry, &context->notifier_list, node) + notifiers[i++] = entry->handle.context; + + *buf_size = data_size; + *pbuf = notifiers; + return VMCI_SUCCESS; +} + +static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context, + u32 *buf_size, void **pbuf) +{ + struct dbell_cpt_state *dbells; + size_t n_doorbells; + int i; + + n_doorbells = vmci_handle_arr_get_size(context->doorbell_array); + if (n_doorbells > 0) { + size_t data_size = n_doorbells * sizeof(*dbells); + if (*buf_size < data_size) { + *buf_size = data_size; + return VMCI_ERROR_MORE_DATA; + } + + dbells = kmalloc(data_size, GFP_ATOMIC); + if (!dbells) + return VMCI_ERROR_NO_MEM; + + for (i = 0; i < n_doorbells; i++) + dbells[i].handle = vmci_handle_arr_get_entry( + context->doorbell_array, i); + + *buf_size = data_size; + *pbuf = dbells; + } else { + *buf_size = 0; + *pbuf = NULL; + } + + return VMCI_SUCCESS; +} + +/* + * Get current context's checkpoint state of given type. + */ +int vmci_ctx_get_chkpt_state(u32 context_id, + u32 cpt_type, + u32 *buf_size, + void **pbuf) +{ + struct vmci_ctx *context; + int result; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + + switch (cpt_type) { + case VMCI_NOTIFICATION_CPT_STATE: + result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf); + break; + + case VMCI_WELLKNOWN_CPT_STATE: + /* + * For compatibility with VMX'en with VM to VM communication, we + * always return zero wellknown handles. + */ + + *buf_size = 0; + *pbuf = NULL; + result = VMCI_SUCCESS; + break; + + case VMCI_DOORBELL_CPT_STATE: + result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf); + break; + + default: + pr_devel("Invalid cpt state (type=%d)\n", cpt_type); + result = VMCI_ERROR_INVALID_ARGS; + break; + } + + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return result; +} + +/* + * Set current context's checkpoint state of given type. + */ +int vmci_ctx_set_chkpt_state(u32 context_id, + u32 cpt_type, + u32 buf_size, + void *cpt_buf) +{ + u32 i; + u32 current_id; + int result = VMCI_SUCCESS; + u32 num_ids = buf_size / sizeof(u32); + + if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) { + /* + * We would end up here if VMX with VM to VM communication + * attempts to restore a checkpoint with wellknown handles. + */ + pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n"); + return VMCI_ERROR_OBSOLETE; + } + + if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) { + pr_devel("Invalid cpt state (type=%d)\n", cpt_type); + return VMCI_ERROR_INVALID_ARGS; + } + + for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) { + current_id = ((u32 *)cpt_buf)[i]; + result = vmci_ctx_add_notification(context_id, current_id); + if (result != VMCI_SUCCESS) + break; + } + if (result != VMCI_SUCCESS) + pr_devel("Failed to set cpt state (type=%d) (error=%d)\n", + cpt_type, result); + + return result; +} + +/* + * Retrieves the specified context's pending notifications in the + * form of a handle array. The handle arrays returned are the + * actual data - not a copy and should not be modified by the + * caller. They must be released using + * vmci_ctx_rcv_notifications_release. + */ +int vmci_ctx_rcv_notifications_get(u32 context_id, + struct vmci_handle_arr **db_handle_array, + struct vmci_handle_arr **qp_handle_array) +{ + struct vmci_ctx *context; + int result = VMCI_SUCCESS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + + *db_handle_array = context->pending_doorbell_array; + context->pending_doorbell_array = vmci_handle_arr_create(0); + if (!context->pending_doorbell_array) { + context->pending_doorbell_array = *db_handle_array; + *db_handle_array = NULL; + result = VMCI_ERROR_NO_MEM; + } + *qp_handle_array = NULL; + + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return result; +} + +/* + * Releases handle arrays with pending notifications previously + * retrieved using vmci_ctx_rcv_notifications_get. If the + * notifications were not successfully handed over to the guest, + * success must be false. + */ +void vmci_ctx_rcv_notifications_release(u32 context_id, + struct vmci_handle_arr *db_handle_array, + struct vmci_handle_arr *qp_handle_array, + bool success) +{ + struct vmci_ctx *context = vmci_ctx_get(context_id); + + spin_lock(&context->lock); + if (!success) { + struct vmci_handle handle; + + /* + * New notifications may have been added while we were not + * holding the context lock, so we transfer any new pending + * doorbell notifications to the old array, and reinstate the + * old array. + */ + + handle = vmci_handle_arr_remove_tail( + context->pending_doorbell_array); + while (!vmci_handle_is_invalid(handle)) { + if (!vmci_handle_arr_has_entry(db_handle_array, + handle)) { + vmci_handle_arr_append_entry( + &db_handle_array, handle); + } + handle = vmci_handle_arr_remove_tail( + context->pending_doorbell_array); + } + vmci_handle_arr_destroy(context->pending_doorbell_array); + context->pending_doorbell_array = db_handle_array; + db_handle_array = NULL; + } else { + ctx_clear_notify_call(context); + } + spin_unlock(&context->lock); + vmci_ctx_put(context); + + if (db_handle_array) + vmci_handle_arr_destroy(db_handle_array); + + if (qp_handle_array) + vmci_handle_arr_destroy(qp_handle_array); +} + +/* + * Registers that a new doorbell handle has been allocated by the + * context. Only doorbell handles registered can be notified. + */ +int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle) +{ + struct vmci_ctx *context; + int result; + + if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) { + vmci_handle_arr_append_entry(&context->doorbell_array, handle); + result = VMCI_SUCCESS; + } else { + result = VMCI_ERROR_DUPLICATE_ENTRY; + } + + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return result; +} + +/* + * Unregisters a doorbell handle that was previously registered + * with vmci_ctx_dbell_create. + */ +int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle) +{ + struct vmci_ctx *context; + struct vmci_handle removed_handle; + + if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + removed_handle = + vmci_handle_arr_remove_entry(context->doorbell_array, handle); + vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle); + spin_unlock(&context->lock); + + vmci_ctx_put(context); + + return vmci_handle_is_invalid(removed_handle) ? + VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS; +} + +/* + * Unregisters all doorbell handles that were previously + * registered with vmci_ctx_dbell_create. + */ +int vmci_ctx_dbell_destroy_all(u32 context_id) +{ + struct vmci_ctx *context; + struct vmci_handle handle; + + if (context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + do { + struct vmci_handle_arr *arr = context->doorbell_array; + handle = vmci_handle_arr_remove_tail(arr); + } while (!vmci_handle_is_invalid(handle)); + do { + struct vmci_handle_arr *arr = context->pending_doorbell_array; + handle = vmci_handle_arr_remove_tail(arr); + } while (!vmci_handle_is_invalid(handle)); + spin_unlock(&context->lock); + + vmci_ctx_put(context); + + return VMCI_SUCCESS; +} + +/* + * Registers a notification of a doorbell handle initiated by the + * specified source context. The notification of doorbells are + * subject to the same isolation rules as datagram delivery. To + * allow host side senders of notifications a finer granularity + * of sender rights than those assigned to the sending context + * itself, the host context is required to specify a different + * set of privilege flags that will override the privileges of + * the source context. + */ +int vmci_ctx_notify_dbell(u32 src_cid, + struct vmci_handle handle, + u32 src_priv_flags) +{ + struct vmci_ctx *dst_context; + int result; + + if (vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + /* Get the target VM's VMCI context. */ + dst_context = vmci_ctx_get(handle.context); + if (!dst_context) { + pr_devel("Invalid context (ID=0x%x)\n", handle.context); + return VMCI_ERROR_NOT_FOUND; + } + + if (src_cid != handle.context) { + u32 dst_priv_flags; + + if (VMCI_CONTEXT_IS_VM(src_cid) && + VMCI_CONTEXT_IS_VM(handle.context)) { + pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n", + src_cid, handle.context); + result = VMCI_ERROR_DST_UNREACHABLE; + goto out; + } + + result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags); + if (result < VMCI_SUCCESS) { + pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + goto out; + } + + if (src_cid != VMCI_HOST_CONTEXT_ID || + src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) { + src_priv_flags = vmci_context_get_priv_flags(src_cid); + } + + if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) { + result = VMCI_ERROR_NO_ACCESS; + goto out; + } + } + + if (handle.context == VMCI_HOST_CONTEXT_ID) { + result = vmci_dbell_host_context_notify(src_cid, handle); + } else { + spin_lock(&dst_context->lock); + + if (!vmci_handle_arr_has_entry(dst_context->doorbell_array, + handle)) { + result = VMCI_ERROR_NOT_FOUND; + } else { + if (!vmci_handle_arr_has_entry( + dst_context->pending_doorbell_array, + handle)) { + vmci_handle_arr_append_entry( + &dst_context->pending_doorbell_array, + handle); + + ctx_signal_notify(dst_context); + wake_up(&dst_context->host_context.wait_queue); + + } + result = VMCI_SUCCESS; + } + spin_unlock(&dst_context->lock); + } + + out: + vmci_ctx_put(dst_context); + + return result; +} + +bool vmci_ctx_supports_host_qp(struct vmci_ctx *context) +{ + return context && context->user_version >= VMCI_VERSION_HOSTQP; +} + +/* + * Registers that a new queue pair handle has been allocated by + * the context. + */ +int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle) +{ + int result; + + if (context == NULL || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) { + vmci_handle_arr_append_entry(&context->queue_pair_array, + handle); + result = VMCI_SUCCESS; + } else { + result = VMCI_ERROR_DUPLICATE_ENTRY; + } + + return result; +} + +/* + * Unregisters a queue pair handle that was previously registered + * with vmci_ctx_qp_create. + */ +int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle) +{ + struct vmci_handle hndl; + + if (context == NULL || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle); + + return vmci_handle_is_invalid(hndl) ? + VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS; +} + +/* + * Determines whether a given queue pair handle is registered + * with the given context. + */ +bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle) +{ + if (context == NULL || vmci_handle_is_invalid(handle)) + return false; + + return vmci_handle_arr_has_entry(context->queue_pair_array, handle); +} + +/* + * vmci_context_get_priv_flags() - Retrieve privilege flags. + * @context_id: The context ID of the VMCI context. + * + * Retrieves privilege flags of the given VMCI context ID. + */ +u32 vmci_context_get_priv_flags(u32 context_id) +{ + if (vmci_host_code_active()) { + u32 flags; + struct vmci_ctx *context; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_LEAST_PRIVILEGE_FLAGS; + + flags = context->priv_flags; + vmci_ctx_put(context); + return flags; + } + return VMCI_NO_PRIVILEGE_FLAGS; +} +EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags); + +/* + * vmci_is_context_owner() - Determimnes if user is the context owner + * @context_id: The context ID of the VMCI context. + * @uid: The host user id (real kernel value). + * + * Determines whether a given UID is the owner of given VMCI context. + */ +bool vmci_is_context_owner(u32 context_id, kuid_t uid) +{ + bool is_owner = false; + + if (vmci_host_code_active()) { + struct vmci_ctx *context = vmci_ctx_get(context_id); + if (context) { + if (context->cred) + is_owner = uid_eq(context->cred->uid, uid); + vmci_ctx_put(context); + } + } + + return is_owner; +} +EXPORT_SYMBOL_GPL(vmci_is_context_owner); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_context.h b/kernel/drivers/misc/vmw_vmci/vmci_context.h new file mode 100644 index 000000000..24a88e68a --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_context.h @@ -0,0 +1,182 @@ +/* + * VMware VMCI driver (vmciContext.h) + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_CONTEXT_H_ +#define _VMCI_CONTEXT_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/atomic.h> +#include <linux/kref.h> +#include <linux/types.h> +#include <linux/wait.h> + +#include "vmci_handle_array.h" +#include "vmci_datagram.h" + +/* Used to determine what checkpoint state to get and set. */ +enum { + VMCI_NOTIFICATION_CPT_STATE = 1, + VMCI_WELLKNOWN_CPT_STATE = 2, + VMCI_DG_OUT_STATE = 3, + VMCI_DG_IN_STATE = 4, + VMCI_DG_IN_SIZE_STATE = 5, + VMCI_DOORBELL_CPT_STATE = 6, +}; + +/* Host specific struct used for signalling */ +struct vmci_host { + wait_queue_head_t wait_queue; +}; + +struct vmci_handle_list { + struct list_head node; + struct vmci_handle handle; +}; + +struct vmci_ctx { + struct list_head list_item; /* For global VMCI list. */ + u32 cid; + struct kref kref; + struct list_head datagram_queue; /* Head of per VM queue. */ + u32 pending_datagrams; + size_t datagram_queue_size; /* Size of datagram queue in bytes. */ + + /* + * Version of the code that created + * this context; e.g., VMX. + */ + int user_version; + spinlock_t lock; /* Locks callQueue and handle_arrays. */ + + /* + * queue_pairs attached to. The array of + * handles for queue pairs is accessed + * from the code for QP API, and there + * it is protected by the QP lock. It + * is also accessed from the context + * clean up path, which does not + * require a lock. VMCILock is not + * used to protect the QP array field. + */ + struct vmci_handle_arr *queue_pair_array; + + /* Doorbells created by context. */ + struct vmci_handle_arr *doorbell_array; + + /* Doorbells pending for context. */ + struct vmci_handle_arr *pending_doorbell_array; + + /* Contexts current context is subscribing to. */ + struct list_head notifier_list; + unsigned int n_notifiers; + + struct vmci_host host_context; + u32 priv_flags; + + const struct cred *cred; + bool *notify; /* Notify flag pointer - hosted only. */ + struct page *notify_page; /* Page backing the notify UVA. */ +}; + +/* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */ +struct vmci_ctx_info { + u32 remote_cid; + int result; +}; + +/* VMCICptBufInfo: Used to set/get current context's checkpoint state. */ +struct vmci_ctx_chkpt_buf_info { + u64 cpt_buf; + u32 cpt_type; + u32 buf_size; + s32 result; + u32 _pad; +}; + +/* + * VMCINotificationReceiveInfo: Used to recieve pending notifications + * for doorbells and queue pairs. + */ +struct vmci_ctx_notify_recv_info { + u64 db_handle_buf_uva; + u64 db_handle_buf_size; + u64 qp_handle_buf_uva; + u64 qp_handle_buf_size; + s32 result; + u32 _pad; +}; + +/* + * Utilility function that checks whether two entities are allowed + * to interact. If one of them is restricted, the other one must + * be trusted. + */ +static inline bool vmci_deny_interaction(u32 part_one, u32 part_two) +{ + return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) && + !(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) || + ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) && + !(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED)); +} + +struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags, + uintptr_t event_hnd, int version, + const struct cred *cred); +void vmci_ctx_destroy(struct vmci_ctx *context); + +bool vmci_ctx_supports_host_qp(struct vmci_ctx *context); +int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg); +int vmci_ctx_dequeue_datagram(struct vmci_ctx *context, + size_t *max_size, struct vmci_datagram **dg); +int vmci_ctx_pending_datagrams(u32 cid, u32 *pending); +struct vmci_ctx *vmci_ctx_get(u32 cid); +void vmci_ctx_put(struct vmci_ctx *context); +bool vmci_ctx_exists(u32 cid); + +int vmci_ctx_add_notification(u32 context_id, u32 remote_cid); +int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid); +int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type, + u32 *num_cids, void **cpt_buf_ptr); +int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type, + u32 num_cids, void *cpt_buf); + +int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle); +int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle); +bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle); + +void vmci_ctx_check_signal_notify(struct vmci_ctx *context); +void vmci_ctx_unset_notify(struct vmci_ctx *context); + +int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle); +int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle); +int vmci_ctx_dbell_destroy_all(u32 context_id); +int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle, + u32 src_priv_flags); + +int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr + **db_handle_array, struct vmci_handle_arr + **qp_handle_array); +void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr + *db_handle_array, struct vmci_handle_arr + *qp_handle_array, bool success); + +static inline u32 vmci_ctx_get_id(struct vmci_ctx *context) +{ + if (!context) + return VMCI_INVALID_ID; + return context->cid; +} + +#endif /* _VMCI_CONTEXT_H_ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_datagram.c b/kernel/drivers/misc/vmw_vmci/vmci_datagram.c new file mode 100644 index 000000000..822665245 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_datagram.c @@ -0,0 +1,503 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/bug.h> + +#include "vmci_datagram.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_route.h" + +/* + * struct datagram_entry describes the datagram entity. It is used for datagram + * entities created only on the host. + */ +struct datagram_entry { + struct vmci_resource resource; + u32 flags; + bool run_delayed; + vmci_datagram_recv_cb recv_cb; + void *client_data; + u32 priv_flags; +}; + +struct delayed_datagram_info { + struct datagram_entry *entry; + struct work_struct work; + bool in_dg_host_queue; + /* msg and msg_payload must be together. */ + struct vmci_datagram msg; + u8 msg_payload[]; +}; + +/* Number of in-flight host->host datagrams */ +static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0); + +/* + * Create a datagram entry given a handle pointer. + */ +static int dg_create_handle(u32 resource_id, + u32 flags, + u32 priv_flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, struct vmci_handle *out_handle) +{ + int result; + u32 context_id; + struct vmci_handle handle; + struct datagram_entry *entry; + + if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0) + return VMCI_ERROR_INVALID_ARGS; + + if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) { + context_id = VMCI_INVALID_ID; + } else { + context_id = vmci_get_context_id(); + if (context_id == VMCI_INVALID_ID) + return VMCI_ERROR_NO_RESOURCES; + } + + handle = vmci_make_handle(context_id, resource_id); + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + pr_warn("Failed allocating memory for datagram entry\n"); + return VMCI_ERROR_NO_MEM; + } + + entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false; + entry->flags = flags; + entry->recv_cb = recv_cb; + entry->client_data = client_data; + entry->priv_flags = priv_flags; + + /* Make datagram resource live. */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_DATAGRAM, + handle); + if (result != VMCI_SUCCESS) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n", + handle.context, handle.resource, result); + kfree(entry); + return result; + } + + *out_handle = vmci_resource_handle(&entry->resource); + return VMCI_SUCCESS; +} + +/* + * Internal utility function with the same purpose as + * vmci_datagram_get_priv_flags that also takes a context_id. + */ +static int vmci_datagram_get_priv_flags(u32 context_id, + struct vmci_handle handle, + u32 *priv_flags) +{ + if (context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + if (context_id == VMCI_HOST_CONTEXT_ID) { + struct datagram_entry *src_entry; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) + return VMCI_ERROR_INVALID_ARGS; + + src_entry = container_of(resource, struct datagram_entry, + resource); + *priv_flags = src_entry->priv_flags; + vmci_resource_put(resource); + } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID) + *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS; + else + *priv_flags = vmci_context_get_priv_flags(context_id); + + return VMCI_SUCCESS; +} + +/* + * Calls the specified callback in a delayed context. + */ +static void dg_delayed_dispatch(struct work_struct *work) +{ + struct delayed_datagram_info *dg_info = + container_of(work, struct delayed_datagram_info, work); + + dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg); + + vmci_resource_put(&dg_info->entry->resource); + + if (dg_info->in_dg_host_queue) + atomic_dec(&delayed_dg_host_queue_size); + + kfree(dg_info); +} + +/* + * Dispatch datagram as a host, to the host, or other vm context. This + * function cannot dispatch to hypervisor context handlers. This should + * have been handled before we get here by vmci_datagram_dispatch. + * Returns number of bytes sent on success, error code otherwise. + */ +static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) +{ + int retval; + size_t dg_size; + u32 src_priv_flags; + + dg_size = VMCI_DG_SIZE(dg); + + /* Host cannot send to the hypervisor. */ + if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID) + return VMCI_ERROR_DST_UNREACHABLE; + + /* Check that source handle matches sending context. */ + if (dg->src.context != context_id) { + pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n", + context_id, dg->src.context, dg->src.resource); + return VMCI_ERROR_NO_ACCESS; + } + + /* Get hold of privileges of sending endpoint. */ + retval = vmci_datagram_get_priv_flags(context_id, dg->src, + &src_priv_flags); + if (retval != VMCI_SUCCESS) { + pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n", + dg->src.context, dg->src.resource); + return retval; + } + + /* Determine if we should route to host or guest destination. */ + if (dg->dst.context == VMCI_HOST_CONTEXT_ID) { + /* Route to host datagram entry. */ + struct datagram_entry *dst_entry; + struct vmci_resource *resource; + + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) { + return vmci_event_dispatch(dg); + } + + resource = vmci_resource_by_handle(dg->dst, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) { + pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n", + dg->dst.context, dg->dst.resource); + return VMCI_ERROR_INVALID_RESOURCE; + } + dst_entry = container_of(resource, struct datagram_entry, + resource); + if (vmci_deny_interaction(src_priv_flags, + dst_entry->priv_flags)) { + vmci_resource_put(resource); + return VMCI_ERROR_NO_ACCESS; + } + + /* + * If a VMCI datagram destined for the host is also sent by the + * host, we always run it delayed. This ensures that no locks + * are held when the datagram callback runs. + */ + if (dst_entry->run_delayed || + dg->src.context == VMCI_HOST_CONTEXT_ID) { + struct delayed_datagram_info *dg_info; + + if (atomic_add_return(1, &delayed_dg_host_queue_size) + == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) { + atomic_dec(&delayed_dg_host_queue_size); + vmci_resource_put(resource); + return VMCI_ERROR_NO_MEM; + } + + dg_info = kmalloc(sizeof(*dg_info) + + (size_t) dg->payload_size, GFP_ATOMIC); + if (!dg_info) { + atomic_dec(&delayed_dg_host_queue_size); + vmci_resource_put(resource); + return VMCI_ERROR_NO_MEM; + } + + dg_info->in_dg_host_queue = true; + dg_info->entry = dst_entry; + memcpy(&dg_info->msg, dg, dg_size); + + INIT_WORK(&dg_info->work, dg_delayed_dispatch); + schedule_work(&dg_info->work); + retval = VMCI_SUCCESS; + + } else { + retval = dst_entry->recv_cb(dst_entry->client_data, dg); + vmci_resource_put(resource); + if (retval < VMCI_SUCCESS) + return retval; + } + } else { + /* Route to destination VM context. */ + struct vmci_datagram *new_dg; + + if (context_id != dg->dst.context) { + if (vmci_deny_interaction(src_priv_flags, + vmci_context_get_priv_flags + (dg->dst.context))) { + return VMCI_ERROR_NO_ACCESS; + } else if (VMCI_CONTEXT_IS_VM(context_id)) { + /* + * If the sending context is a VM, it + * cannot reach another VM. + */ + + pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n", + context_id, dg->dst.context); + return VMCI_ERROR_DST_UNREACHABLE; + } + } + + /* We make a copy to enqueue. */ + new_dg = kmalloc(dg_size, GFP_KERNEL); + if (new_dg == NULL) + return VMCI_ERROR_NO_MEM; + + memcpy(new_dg, dg, dg_size); + retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); + if (retval < VMCI_SUCCESS) { + kfree(new_dg); + return retval; + } + } + + /* + * We currently truncate the size to signed 32 bits. This doesn't + * matter for this handler as it only support 4Kb messages. + */ + return (int)dg_size; +} + +/* + * Dispatch datagram as a guest, down through the VMX and potentially to + * the host. + * Returns number of bytes sent on success, error code otherwise. + */ +static int dg_dispatch_as_guest(struct vmci_datagram *dg) +{ + int retval; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(dg->src, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) + return VMCI_ERROR_NO_HANDLE; + + retval = vmci_send_datagram(dg); + vmci_resource_put(resource); + return retval; +} + +/* + * Dispatch datagram. This will determine the routing for the datagram + * and dispatch it accordingly. + * Returns number of bytes sent on success, error code otherwise. + */ +int vmci_datagram_dispatch(u32 context_id, + struct vmci_datagram *dg, bool from_guest) +{ + int retval; + enum vmci_route route; + + BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24); + + if (dg->payload_size > VMCI_MAX_DG_SIZE || + VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) { + pr_devel("Payload (size=%llu bytes) too big to send\n", + (unsigned long long)dg->payload_size); + return VMCI_ERROR_INVALID_ARGS; + } + + retval = vmci_route(&dg->src, &dg->dst, from_guest, &route); + if (retval < VMCI_SUCCESS) { + pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n", + dg->src.context, dg->dst.context, retval); + return retval; + } + + if (VMCI_ROUTE_AS_HOST == route) { + if (VMCI_INVALID_ID == context_id) + context_id = VMCI_HOST_CONTEXT_ID; + return dg_dispatch_as_host(context_id, dg); + } + + if (VMCI_ROUTE_AS_GUEST == route) + return dg_dispatch_as_guest(dg); + + pr_warn("Unknown route (%d) for datagram\n", route); + return VMCI_ERROR_DST_UNREACHABLE; +} + +/* + * Invoke the handler for the given datagram. This is intended to be + * called only when acting as a guest and receiving a datagram from the + * virtual device. + */ +int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) +{ + struct vmci_resource *resource; + struct datagram_entry *dst_entry; + + resource = vmci_resource_by_handle(dg->dst, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) { + pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n", + dg->dst.context, dg->dst.resource); + return VMCI_ERROR_NO_HANDLE; + } + + dst_entry = container_of(resource, struct datagram_entry, resource); + if (dst_entry->run_delayed) { + struct delayed_datagram_info *dg_info; + + dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size, + GFP_ATOMIC); + if (!dg_info) { + vmci_resource_put(resource); + return VMCI_ERROR_NO_MEM; + } + + dg_info->in_dg_host_queue = false; + dg_info->entry = dst_entry; + memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); + + INIT_WORK(&dg_info->work, dg_delayed_dispatch); + schedule_work(&dg_info->work); + } else { + dst_entry->recv_cb(dst_entry->client_data, dg); + vmci_resource_put(resource); + } + + return VMCI_SUCCESS; +} + +/* + * vmci_datagram_create_handle_priv() - Create host context datagram endpoint + * @resource_id: The resource ID. + * @flags: Datagram Flags. + * @priv_flags: Privilege Flags. + * @recv_cb: Callback when receiving datagrams. + * @client_data: Pointer for a datagram_entry struct + * @out_handle: vmci_handle that is populated as a result of this function. + * + * Creates a host context datagram endpoint and returns a handle to it. + */ +int vmci_datagram_create_handle_priv(u32 resource_id, + u32 flags, + u32 priv_flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + if (out_handle == NULL) + return VMCI_ERROR_INVALID_ARGS; + + if (recv_cb == NULL) { + pr_devel("Client callback needed when creating datagram\n"); + return VMCI_ERROR_INVALID_ARGS; + } + + if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) + return VMCI_ERROR_INVALID_ARGS; + + return dg_create_handle(resource_id, flags, priv_flags, recv_cb, + client_data, out_handle); +} +EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv); + +/* + * vmci_datagram_create_handle() - Create host context datagram endpoint + * @resource_id: Resource ID. + * @flags: Datagram Flags. + * @recv_cb: Callback when receiving datagrams. + * @client_ata: Pointer for a datagram_entry struct + * @out_handle: vmci_handle that is populated as a result of this function. + * + * Creates a host context datagram endpoint and returns a handle to + * it. Same as vmci_datagram_create_handle_priv without the priviledge + * flags argument. + */ +int vmci_datagram_create_handle(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + return vmci_datagram_create_handle_priv( + resource_id, flags, + VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, + recv_cb, client_data, + out_handle); +} +EXPORT_SYMBOL_GPL(vmci_datagram_create_handle); + +/* + * vmci_datagram_destroy_handle() - Destroys datagram handle + * @handle: vmci_handle to be destroyed and reaped. + * + * Use this function to destroy any datagram handles created by + * vmci_datagram_create_handle{,Priv} functions. + */ +int vmci_datagram_destroy_handle(struct vmci_handle handle) +{ + struct datagram_entry *entry; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) { + pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_NOT_FOUND; + } + + entry = container_of(resource, struct datagram_entry, resource); + + vmci_resource_put(&entry->resource); + vmci_resource_remove(&entry->resource); + kfree(entry); + + return VMCI_SUCCESS; +} +EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle); + +/* + * vmci_datagram_send() - Send a datagram + * @msg: The datagram to send. + * + * Sends the provided datagram on its merry way. + */ +int vmci_datagram_send(struct vmci_datagram *msg) +{ + if (msg == NULL) + return VMCI_ERROR_INVALID_ARGS; + + return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false); +} +EXPORT_SYMBOL_GPL(vmci_datagram_send); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_datagram.h b/kernel/drivers/misc/vmw_vmci/vmci_datagram.h new file mode 100644 index 000000000..eb4aab7f6 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_datagram.h @@ -0,0 +1,52 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_DATAGRAM_H_ +#define _VMCI_DATAGRAM_H_ + +#include <linux/types.h> +#include <linux/list.h> + +#include "vmci_context.h" + +#define VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE 256 + +/* + * The struct vmci_datagram_queue_entry is a queue header for the in-kernel VMCI + * datagram queues. It is allocated in non-paged memory, as the + * content is accessed while holding a spinlock. The pending datagram + * itself may be allocated from paged memory. We shadow the size of + * the datagram in the non-paged queue entry as this size is used + * while holding the same spinlock as above. + */ +struct vmci_datagram_queue_entry { + struct list_head list_item; /* For queuing. */ + size_t dg_size; /* Size of datagram. */ + struct vmci_datagram *dg; /* Pending datagram. */ +}; + +/* VMCIDatagramSendRecvInfo */ +struct vmci_datagram_snd_rcv_info { + u64 addr; + u32 len; + s32 result; +}; + +/* Datagram API for non-public use. */ +int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg, + bool from_guest); +int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg); + +#endif /* _VMCI_DATAGRAM_H_ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_doorbell.c b/kernel/drivers/misc/vmw_vmci/vmci_doorbell.c new file mode 100644 index 000000000..a8cee33ae --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -0,0 +1,601 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/completion.h> +#include <linux/hash.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_resource.h" +#include "vmci_driver.h" +#include "vmci_route.h" + + +#define VMCI_DOORBELL_INDEX_BITS 6 +#define VMCI_DOORBELL_INDEX_TABLE_SIZE (1 << VMCI_DOORBELL_INDEX_BITS) +#define VMCI_DOORBELL_HASH(_idx) hash_32(_idx, VMCI_DOORBELL_INDEX_BITS) + +/* + * DoorbellEntry describes the a doorbell notification handle allocated by the + * host. + */ +struct dbell_entry { + struct vmci_resource resource; + struct hlist_node node; + struct work_struct work; + vmci_callback notify_cb; + void *client_data; + u32 idx; + u32 priv_flags; + bool run_delayed; + atomic_t active; /* Only used by guest personality */ +}; + +/* The VMCI index table keeps track of currently registered doorbells. */ +struct dbell_index_table { + spinlock_t lock; /* Index table lock */ + struct hlist_head entries[VMCI_DOORBELL_INDEX_TABLE_SIZE]; +}; + +static struct dbell_index_table vmci_doorbell_it = { + .lock = __SPIN_LOCK_UNLOCKED(vmci_doorbell_it.lock), +}; + +/* + * The max_notify_idx is one larger than the currently known bitmap index in + * use, and is used to determine how much of the bitmap needs to be scanned. + */ +static u32 max_notify_idx; + +/* + * The notify_idx_count is used for determining whether there are free entries + * within the bitmap (if notify_idx_count + 1 < max_notify_idx). + */ +static u32 notify_idx_count; + +/* + * The last_notify_idx_reserved is used to track the last index handed out - in + * the case where multiple handles share a notification index, we hand out + * indexes round robin based on last_notify_idx_reserved. + */ +static u32 last_notify_idx_reserved; + +/* This is a one entry cache used to by the index allocation. */ +static u32 last_notify_idx_released = PAGE_SIZE; + + +/* + * Utility function that retrieves the privilege flags associated + * with a given doorbell handle. For guest endpoints, the + * privileges are determined by the context ID, but for host + * endpoints privileges are associated with the complete + * handle. Hypervisor endpoints are not yet supported. + */ +int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags) +{ + if (priv_flags == NULL || handle.context == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + if (handle.context == VMCI_HOST_CONTEXT_ID) { + struct dbell_entry *entry; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (!resource) + return VMCI_ERROR_NOT_FOUND; + + entry = container_of(resource, struct dbell_entry, resource); + *priv_flags = entry->priv_flags; + vmci_resource_put(resource); + } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) { + /* + * Hypervisor endpoints for notifications are not + * supported (yet). + */ + return VMCI_ERROR_INVALID_ARGS; + } else { + *priv_flags = vmci_context_get_priv_flags(handle.context); + } + + return VMCI_SUCCESS; +} + +/* + * Find doorbell entry by bitmap index. + */ +static struct dbell_entry *dbell_index_table_find(u32 idx) +{ + u32 bucket = VMCI_DOORBELL_HASH(idx); + struct dbell_entry *dbell; + + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], + node) { + if (idx == dbell->idx) + return dbell; + } + + return NULL; +} + +/* + * Add the given entry to the index table. This willi take a reference to the + * entry's resource so that the entry is not deleted before it is removed from + * the * table. + */ +static void dbell_index_table_add(struct dbell_entry *entry) +{ + u32 bucket; + u32 new_notify_idx; + + vmci_resource_get(&entry->resource); + + spin_lock_bh(&vmci_doorbell_it.lock); + + /* + * Below we try to allocate an index in the notification + * bitmap with "not too much" sharing between resources. If we + * use less that the full bitmap, we either add to the end if + * there are no unused flags within the currently used area, + * or we search for unused ones. If we use the full bitmap, we + * allocate the index round robin. + */ + if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) { + if (last_notify_idx_released < max_notify_idx && + !dbell_index_table_find(last_notify_idx_released)) { + new_notify_idx = last_notify_idx_released; + last_notify_idx_released = PAGE_SIZE; + } else { + bool reused = false; + new_notify_idx = last_notify_idx_reserved; + if (notify_idx_count + 1 < max_notify_idx) { + do { + if (!dbell_index_table_find + (new_notify_idx)) { + reused = true; + break; + } + new_notify_idx = (new_notify_idx + 1) % + max_notify_idx; + } while (new_notify_idx != + last_notify_idx_released); + } + if (!reused) { + new_notify_idx = max_notify_idx; + max_notify_idx++; + } + } + } else { + new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE; + } + + last_notify_idx_reserved = new_notify_idx; + notify_idx_count++; + + entry->idx = new_notify_idx; + bucket = VMCI_DOORBELL_HASH(entry->idx); + hlist_add_head(&entry->node, &vmci_doorbell_it.entries[bucket]); + + spin_unlock_bh(&vmci_doorbell_it.lock); +} + +/* + * Remove the given entry from the index table. This will release() the + * entry's resource. + */ +static void dbell_index_table_remove(struct dbell_entry *entry) +{ + spin_lock_bh(&vmci_doorbell_it.lock); + + hlist_del_init(&entry->node); + + notify_idx_count--; + if (entry->idx == max_notify_idx - 1) { + /* + * If we delete an entry with the maximum known + * notification index, we take the opportunity to + * prune the current max. As there might be other + * unused indices immediately below, we lower the + * maximum until we hit an index in use. + */ + while (max_notify_idx > 0 && + !dbell_index_table_find(max_notify_idx - 1)) + max_notify_idx--; + } + + last_notify_idx_released = entry->idx; + + spin_unlock_bh(&vmci_doorbell_it.lock); + + vmci_resource_put(&entry->resource); +} + +/* + * Creates a link between the given doorbell handle and the given + * index in the bitmap in the device backend. A notification state + * is created in hypervisor. + */ +static int dbell_link(struct vmci_handle handle, u32 notify_idx) +{ + struct vmci_doorbell_link_msg link_msg; + + link_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_LINK); + link_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE; + link_msg.handle = handle; + link_msg.notify_idx = notify_idx; + + return vmci_send_datagram(&link_msg.hdr); +} + +/* + * Unlinks the given doorbell handle from an index in the bitmap in + * the device backend. The notification state is destroyed in hypervisor. + */ +static int dbell_unlink(struct vmci_handle handle) +{ + struct vmci_doorbell_unlink_msg unlink_msg; + + unlink_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_UNLINK); + unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE; + unlink_msg.handle = handle; + + return vmci_send_datagram(&unlink_msg.hdr); +} + +/* + * Notify another guest or the host. We send a datagram down to the + * host via the hypervisor with the notification info. + */ +static int dbell_notify_as_guest(struct vmci_handle handle, u32 priv_flags) +{ + struct vmci_doorbell_notify_msg notify_msg; + + notify_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_NOTIFY); + notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE; + notify_msg.handle = handle; + + return vmci_send_datagram(¬ify_msg.hdr); +} + +/* + * Calls the specified callback in a delayed context. + */ +static void dbell_delayed_dispatch(struct work_struct *work) +{ + struct dbell_entry *entry = container_of(work, + struct dbell_entry, work); + + entry->notify_cb(entry->client_data); + vmci_resource_put(&entry->resource); +} + +/* + * Dispatches a doorbell notification to the host context. + */ +int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle) +{ + struct dbell_entry *entry; + struct vmci_resource *resource; + + if (vmci_handle_is_invalid(handle)) { + pr_devel("Notifying an invalid doorbell (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_INVALID_ARGS; + } + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (!resource) { + pr_devel("Notifying an unknown doorbell (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_NOT_FOUND; + } + + entry = container_of(resource, struct dbell_entry, resource); + if (entry->run_delayed) { + schedule_work(&entry->work); + } else { + entry->notify_cb(entry->client_data); + vmci_resource_put(resource); + } + + return VMCI_SUCCESS; +} + +/* + * Register the notification bitmap with the host. + */ +bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn) +{ + int result; + struct vmci_notify_bm_set_msg bitmap_set_msg; + + bitmap_set_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_SET_NOTIFY_BITMAP); + bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) - + VMCI_DG_HEADERSIZE; + bitmap_set_msg.bitmap_ppn = bitmap_ppn; + + result = vmci_send_datagram(&bitmap_set_msg.hdr); + if (result != VMCI_SUCCESS) { + pr_devel("Failed to register (PPN=%u) as notification bitmap (error=%d)\n", + bitmap_ppn, result); + return false; + } + return true; +} + +/* + * Executes or schedules the handlers for a given notify index. + */ +static void dbell_fire_entries(u32 notify_idx) +{ + u32 bucket = VMCI_DOORBELL_HASH(notify_idx); + struct dbell_entry *dbell; + + spin_lock_bh(&vmci_doorbell_it.lock); + + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { + if (dbell->idx == notify_idx && + atomic_read(&dbell->active) == 1) { + if (dbell->run_delayed) { + vmci_resource_get(&dbell->resource); + schedule_work(&dbell->work); + } else { + dbell->notify_cb(dbell->client_data); + } + } + } + + spin_unlock_bh(&vmci_doorbell_it.lock); +} + +/* + * Scans the notification bitmap, collects pending notifications, + * resets the bitmap and invokes appropriate callbacks. + */ +void vmci_dbell_scan_notification_entries(u8 *bitmap) +{ + u32 idx; + + for (idx = 0; idx < max_notify_idx; idx++) { + if (bitmap[idx] & 0x1) { + bitmap[idx] &= ~1; + dbell_fire_entries(idx); + } + } +} + +/* + * vmci_doorbell_create() - Creates a doorbell + * @handle: A handle used to track the resource. Can be invalid. + * @flags: Flag that determines context of callback. + * @priv_flags: Privileges flags. + * @notify_cb: The callback to be ivoked when the doorbell fires. + * @client_data: A parameter to be passed to the callback. + * + * Creates a doorbell with the given callback. If the handle is + * VMCI_INVALID_HANDLE, a free handle will be assigned, if + * possible. The callback can be run immediately (potentially with + * locks held - the default) or delayed (in a kernel thread) by + * specifying the flag VMCI_FLAG_DELAYED_CB. If delayed execution + * is selected, a given callback may not be run if the kernel is + * unable to allocate memory for the delayed execution (highly + * unlikely). + */ +int vmci_doorbell_create(struct vmci_handle *handle, + u32 flags, + u32 priv_flags, + vmci_callback notify_cb, void *client_data) +{ + struct dbell_entry *entry; + struct vmci_handle new_handle; + int result; + + if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB || + priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) + return VMCI_ERROR_INVALID_ARGS; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + pr_warn("Failed allocating memory for datagram entry\n"); + return VMCI_ERROR_NO_MEM; + } + + if (vmci_handle_is_invalid(*handle)) { + u32 context_id = vmci_get_context_id(); + + /* Let resource code allocate a free ID for us */ + new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID); + } else { + bool valid_context = false; + + /* + * Validate the handle. We must do both of the checks below + * because we can be acting as both a host and a guest at the + * same time. We always allow the host context ID, since the + * host functionality is in practice always there with the + * unified driver. + */ + if (handle->context == VMCI_HOST_CONTEXT_ID || + (vmci_guest_code_active() && + vmci_get_context_id() == handle->context)) { + valid_context = true; + } + + if (!valid_context || handle->resource == VMCI_INVALID_ID) { + pr_devel("Invalid argument (handle=0x%x:0x%x)\n", + handle->context, handle->resource); + result = VMCI_ERROR_INVALID_ARGS; + goto free_mem; + } + + new_handle = *handle; + } + + entry->idx = 0; + INIT_HLIST_NODE(&entry->node); + entry->priv_flags = priv_flags; + INIT_WORK(&entry->work, dbell_delayed_dispatch); + entry->run_delayed = flags & VMCI_FLAG_DELAYED_CB; + entry->notify_cb = notify_cb; + entry->client_data = client_data; + atomic_set(&entry->active, 0); + + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_DOORBELL, + new_handle); + if (result != VMCI_SUCCESS) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n", + new_handle.context, new_handle.resource, result); + goto free_mem; + } + + new_handle = vmci_resource_handle(&entry->resource); + if (vmci_guest_code_active()) { + dbell_index_table_add(entry); + result = dbell_link(new_handle, entry->idx); + if (VMCI_SUCCESS != result) + goto destroy_resource; + + atomic_set(&entry->active, 1); + } + + *handle = new_handle; + + return result; + + destroy_resource: + dbell_index_table_remove(entry); + vmci_resource_remove(&entry->resource); + free_mem: + kfree(entry); + return result; +} +EXPORT_SYMBOL_GPL(vmci_doorbell_create); + +/* + * vmci_doorbell_destroy() - Destroy a doorbell. + * @handle: The handle tracking the resource. + * + * Destroys a doorbell previously created with vmcii_doorbell_create. This + * operation may block waiting for a callback to finish. + */ +int vmci_doorbell_destroy(struct vmci_handle handle) +{ + struct dbell_entry *entry; + struct vmci_resource *resource; + + if (vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (!resource) { + pr_devel("Failed to destroy doorbell (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_NOT_FOUND; + } + + entry = container_of(resource, struct dbell_entry, resource); + + if (vmci_guest_code_active()) { + int result; + + dbell_index_table_remove(entry); + + result = dbell_unlink(handle); + if (VMCI_SUCCESS != result) { + + /* + * The only reason this should fail would be + * an inconsistency between guest and + * hypervisor state, where the guest believes + * it has an active registration whereas the + * hypervisor doesn't. One case where this may + * happen is if a doorbell is unregistered + * following a hibernation at a time where the + * doorbell state hasn't been restored on the + * hypervisor side yet. Since the handle has + * now been removed in the guest, we just + * print a warning and return success. + */ + pr_devel("Unlink of doorbell (handle=0x%x:0x%x) unknown by hypervisor (error=%d)\n", + handle.context, handle.resource, result); + } + } + + /* + * Now remove the resource from the table. It might still be in use + * after this, in a callback or still on the delayed work queue. + */ + vmci_resource_put(&entry->resource); + vmci_resource_remove(&entry->resource); + + kfree(entry); + + return VMCI_SUCCESS; +} +EXPORT_SYMBOL_GPL(vmci_doorbell_destroy); + +/* + * vmci_doorbell_notify() - Ring the doorbell (and hide in the bushes). + * @dst: The handlle identifying the doorbell resource + * @priv_flags: Priviledge flags. + * + * Generates a notification on the doorbell identified by the + * handle. For host side generation of notifications, the caller + * can specify what the privilege of the calling side is. + */ +int vmci_doorbell_notify(struct vmci_handle dst, u32 priv_flags) +{ + int retval; + enum vmci_route route; + struct vmci_handle src; + + if (vmci_handle_is_invalid(dst) || + (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)) + return VMCI_ERROR_INVALID_ARGS; + + src = VMCI_INVALID_HANDLE; + retval = vmci_route(&src, &dst, false, &route); + if (retval < VMCI_SUCCESS) + return retval; + + if (VMCI_ROUTE_AS_HOST == route) + return vmci_ctx_notify_dbell(VMCI_HOST_CONTEXT_ID, + dst, priv_flags); + + if (VMCI_ROUTE_AS_GUEST == route) + return dbell_notify_as_guest(dst, priv_flags); + + pr_warn("Unknown route (%d) for doorbell\n", route); + return VMCI_ERROR_DST_UNREACHABLE; +} +EXPORT_SYMBOL_GPL(vmci_doorbell_notify); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_doorbell.h b/kernel/drivers/misc/vmw_vmci/vmci_doorbell.h new file mode 100644 index 000000000..e4c0b1748 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_doorbell.h @@ -0,0 +1,51 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef VMCI_DOORBELL_H +#define VMCI_DOORBELL_H + +#include <linux/vmw_vmci_defs.h> +#include <linux/types.h> + +#include "vmci_driver.h" + +/* + * VMCINotifyResourceInfo: Used to create and destroy doorbells, and + * generate a notification for a doorbell or queue pair. + */ +struct vmci_dbell_notify_resource_info { + struct vmci_handle handle; + u16 resource; + u16 action; + s32 result; +}; + +/* + * Structure used for checkpointing the doorbell mappings. It is + * written to the checkpoint as is, so changing this structure will + * break checkpoint compatibility. + */ +struct dbell_cpt_state { + struct vmci_handle handle; + u64 bitmap_idx; +}; + +int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle); +int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags); + +bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn); +void vmci_dbell_scan_notification_entries(u8 *bitmap); + +#endif /* VMCI_DOORBELL_H */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_driver.c b/kernel/drivers/misc/vmw_vmci/vmci_driver.c new file mode 100644 index 000000000..b823f9a6e --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_driver.c @@ -0,0 +1,117 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/atomic.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> + +#include "vmci_driver.h" +#include "vmci_event.h" + +static bool vmci_disable_host; +module_param_named(disable_host, vmci_disable_host, bool, 0); +MODULE_PARM_DESC(disable_host, + "Disable driver host personality (default=enabled)"); + +static bool vmci_disable_guest; +module_param_named(disable_guest, vmci_disable_guest, bool, 0); +MODULE_PARM_DESC(disable_guest, + "Disable driver guest personality (default=enabled)"); + +static bool vmci_guest_personality_initialized; +static bool vmci_host_personality_initialized; + +/* + * vmci_get_context_id() - Gets the current context ID. + * + * Returns the current context ID. Note that since this is accessed only + * from code running in the host, this always returns the host context ID. + */ +u32 vmci_get_context_id(void) +{ + if (vmci_guest_code_active()) + return vmci_get_vm_context_id(); + else if (vmci_host_code_active()) + return VMCI_HOST_CONTEXT_ID; + + return VMCI_INVALID_ID; +} +EXPORT_SYMBOL_GPL(vmci_get_context_id); + +static int __init vmci_drv_init(void) +{ + int vmci_err; + int error; + + vmci_err = vmci_event_init(); + if (vmci_err < VMCI_SUCCESS) { + pr_err("Failed to initialize VMCIEvent (result=%d)\n", + vmci_err); + return -EINVAL; + } + + if (!vmci_disable_guest) { + error = vmci_guest_init(); + if (error) { + pr_warn("Failed to initialize guest personality (err=%d)\n", + error); + } else { + vmci_guest_personality_initialized = true; + pr_info("Guest personality initialized and is %s\n", + vmci_guest_code_active() ? + "active" : "inactive"); + } + } + + if (!vmci_disable_host) { + error = vmci_host_init(); + if (error) { + pr_warn("Unable to initialize host personality (err=%d)\n", + error); + } else { + vmci_host_personality_initialized = true; + pr_info("Initialized host personality\n"); + } + } + + if (!vmci_guest_personality_initialized && + !vmci_host_personality_initialized) { + vmci_event_exit(); + return -ENODEV; + } + + return 0; +} +module_init(vmci_drv_init); + +static void __exit vmci_drv_exit(void) +{ + if (vmci_guest_personality_initialized) + vmci_guest_exit(); + + if (vmci_host_personality_initialized) + vmci_host_exit(); + + vmci_event_exit(); +} +module_exit(vmci_drv_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); +MODULE_VERSION("1.1.3.0-k"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_driver.h b/kernel/drivers/misc/vmw_vmci/vmci_driver.h new file mode 100644 index 000000000..cee9e977d --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_driver.h @@ -0,0 +1,57 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_DRIVER_H_ +#define _VMCI_DRIVER_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/wait.h> + +#include "vmci_queue_pair.h" +#include "vmci_context.h" + +enum vmci_obj_type { + VMCIOBJ_VMX_VM = 10, + VMCIOBJ_CONTEXT, + VMCIOBJ_SOCKET, + VMCIOBJ_NOT_SET, +}; + +/* For storing VMCI structures in file handles. */ +struct vmci_obj { + void *ptr; + enum vmci_obj_type type; +}; + +/* + * Needed by other components of this module. It's okay to have one global + * instance of this because there can only ever be one VMCI device. Our + * virtual hardware enforces this. + */ +extern struct pci_dev *vmci_pdev; + +u32 vmci_get_context_id(void); +int vmci_send_datagram(struct vmci_datagram *dg); + +int vmci_host_init(void); +void vmci_host_exit(void); +bool vmci_host_code_active(void); + +int vmci_guest_init(void); +void vmci_guest_exit(void); +bool vmci_guest_code_active(void); +u32 vmci_get_vm_context_id(void); + +#endif /* _VMCI_DRIVER_H_ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_event.c b/kernel/drivers/misc/vmw_vmci/vmci_event.c new file mode 100644 index 000000000..8449516d6 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_event.c @@ -0,0 +1,224 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include "vmci_driver.h" +#include "vmci_event.h" + +#define EVENT_MAGIC 0xEABE0000 +#define VMCI_EVENT_MAX_ATTEMPTS 10 + +struct vmci_subscription { + u32 id; + u32 event; + vmci_event_cb callback; + void *callback_data; + struct list_head node; /* on one of subscriber lists */ +}; + +static struct list_head subscriber_array[VMCI_EVENT_MAX]; +static DEFINE_MUTEX(subscriber_mutex); + +int __init vmci_event_init(void) +{ + int i; + + for (i = 0; i < VMCI_EVENT_MAX; i++) + INIT_LIST_HEAD(&subscriber_array[i]); + + return VMCI_SUCCESS; +} + +void vmci_event_exit(void) +{ + int e; + + /* We free all memory at exit. */ + for (e = 0; e < VMCI_EVENT_MAX; e++) { + struct vmci_subscription *cur, *p2; + list_for_each_entry_safe(cur, p2, &subscriber_array[e], node) { + + /* + * We should never get here because all events + * should have been unregistered before we try + * to unload the driver module. + */ + pr_warn("Unexpected free events occurring\n"); + list_del(&cur->node); + kfree(cur); + } + } +} + +/* + * Find entry. Assumes subscriber_mutex is held. + */ +static struct vmci_subscription *event_find(u32 sub_id) +{ + int e; + + for (e = 0; e < VMCI_EVENT_MAX; e++) { + struct vmci_subscription *cur; + list_for_each_entry(cur, &subscriber_array[e], node) { + if (cur->id == sub_id) + return cur; + } + } + return NULL; +} + +/* + * Actually delivers the events to the subscribers. + * The callback function for each subscriber is invoked. + */ +static void event_deliver(struct vmci_event_msg *event_msg) +{ + struct vmci_subscription *cur; + struct list_head *subscriber_list; + + rcu_read_lock(); + subscriber_list = &subscriber_array[event_msg->event_data.event]; + list_for_each_entry_rcu(cur, subscriber_list, node) { + cur->callback(cur->id, &event_msg->event_data, + cur->callback_data); + } + rcu_read_unlock(); +} + +/* + * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all + * subscribers for given event. + */ +int vmci_event_dispatch(struct vmci_datagram *msg) +{ + struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg; + + if (msg->payload_size < sizeof(u32) || + msg->payload_size > sizeof(struct vmci_event_data_max)) + return VMCI_ERROR_INVALID_ARGS; + + if (!VMCI_EVENT_VALID(event_msg->event_data.event)) + return VMCI_ERROR_EVENT_UNKNOWN; + + event_deliver(event_msg); + return VMCI_SUCCESS; +} + +/* + * vmci_event_subscribe() - Subscribe to a given event. + * @event: The event to subscribe to. + * @callback: The callback to invoke upon the event. + * @callback_data: Data to pass to the callback. + * @subscription_id: ID used to track subscription. Used with + * vmci_event_unsubscribe() + * + * Subscribes to the provided event. The callback specified will be + * fired from RCU critical section and therefore must not sleep. + */ +int vmci_event_subscribe(u32 event, + vmci_event_cb callback, + void *callback_data, + u32 *new_subscription_id) +{ + struct vmci_subscription *sub; + int attempts; + int retval; + bool have_new_id = false; + + if (!new_subscription_id) { + pr_devel("%s: Invalid subscription (NULL)\n", __func__); + return VMCI_ERROR_INVALID_ARGS; + } + + if (!VMCI_EVENT_VALID(event) || !callback) { + pr_devel("%s: Failed to subscribe to event (type=%d) (callback=%p) (data=%p)\n", + __func__, event, callback, callback_data); + return VMCI_ERROR_INVALID_ARGS; + } + + sub = kzalloc(sizeof(*sub), GFP_KERNEL); + if (!sub) + return VMCI_ERROR_NO_MEM; + + sub->id = VMCI_EVENT_MAX; + sub->event = event; + sub->callback = callback; + sub->callback_data = callback_data; + INIT_LIST_HEAD(&sub->node); + + mutex_lock(&subscriber_mutex); + + /* Creation of a new event is always allowed. */ + for (attempts = 0; attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) { + static u32 subscription_id; + /* + * We try to get an id a couple of time before + * claiming we are out of resources. + */ + + /* Test for duplicate id. */ + if (!event_find(++subscription_id)) { + sub->id = subscription_id; + have_new_id = true; + break; + } + } + + if (have_new_id) { + list_add_rcu(&sub->node, &subscriber_array[event]); + retval = VMCI_SUCCESS; + } else { + retval = VMCI_ERROR_NO_RESOURCES; + } + + mutex_unlock(&subscriber_mutex); + + *new_subscription_id = sub->id; + return retval; +} +EXPORT_SYMBOL_GPL(vmci_event_subscribe); + +/* + * vmci_event_unsubscribe() - unsubscribe from an event. + * @sub_id: A subscription ID as provided by vmci_event_subscribe() + * + * Unsubscribe from given event. Removes it from list and frees it. + * Will return callback_data if requested by caller. + */ +int vmci_event_unsubscribe(u32 sub_id) +{ + struct vmci_subscription *s; + + mutex_lock(&subscriber_mutex); + s = event_find(sub_id); + if (s) + list_del_rcu(&s->node); + mutex_unlock(&subscriber_mutex); + + if (!s) + return VMCI_ERROR_NOT_FOUND; + + synchronize_rcu(); + kfree(s); + + return VMCI_SUCCESS; +} +EXPORT_SYMBOL_GPL(vmci_event_unsubscribe); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_event.h b/kernel/drivers/misc/vmw_vmci/vmci_event.h new file mode 100644 index 000000000..7df9b1c0a --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_event.h @@ -0,0 +1,25 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef __VMCI_EVENT_H__ +#define __VMCI_EVENT_H__ + +#include <linux/vmw_vmci_api.h> + +int vmci_event_init(void); +void vmci_event_exit(void); +int vmci_event_dispatch(struct vmci_datagram *msg); + +#endif /*__VMCI_EVENT_H__ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_guest.c b/kernel/drivers/misc/vmw_vmci/vmci_guest.c new file mode 100644 index 000000000..189b32519 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_guest.c @@ -0,0 +1,771 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/moduleparam.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/io.h> +#include <linux/vmalloc.h> + +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 + +#define VMCI_UTIL_NUM_RESOURCES 1 + +static bool vmci_disable_msi; +module_param_named(disable_msi, vmci_disable_msi, bool, 0); +MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); + +static bool vmci_disable_msix; +module_param_named(disable_msix, vmci_disable_msix, bool, 0); +MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); + +static u32 ctx_update_sub_id = VMCI_INVALID_ID; +static u32 vm_context_id = VMCI_INVALID_ID; + +struct vmci_guest_device { + struct device *dev; /* PCI device we are attached to */ + void __iomem *iobase; + + unsigned int irq; + unsigned int intr_type; + bool exclusive_vectors; + struct msix_entry msix_entries[VMCI_MAX_INTRS]; + + struct tasklet_struct datagram_tasklet; + struct tasklet_struct bm_tasklet; + + void *data_buffer; + void *notification_bitmap; + dma_addr_t notification_base; +}; + +/* vmci_dev singleton device and supporting data*/ +struct pci_dev *vmci_pdev; +static struct vmci_guest_device *vmci_dev_g; +static DEFINE_SPINLOCK(vmci_dev_spinlock); + +static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); + +bool vmci_guest_code_active(void) +{ + return atomic_read(&vmci_num_guest_devices) != 0; +} + +u32 vmci_get_vm_context_id(void) +{ + if (vm_context_id == VMCI_INVALID_ID) { + struct vmci_datagram get_cid_msg; + get_cid_msg.dst = + vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_GET_CONTEXT_ID); + get_cid_msg.src = VMCI_ANON_SRC_HANDLE; + get_cid_msg.payload_size = 0; + vm_context_id = vmci_send_datagram(&get_cid_msg); + } + return vm_context_id; +} + +/* + * VM to hypervisor call mechanism. We use the standard VMware naming + * convention since shared code is calling this function as well. + */ +int vmci_send_datagram(struct vmci_datagram *dg) +{ + unsigned long flags; + int result; + + /* Check args. */ + if (dg == NULL) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Need to acquire spinlock on the device because the datagram + * data may be spread over multiple pages and the monitor may + * interleave device user rpc calls from multiple + * VCPUs. Acquiring the spinlock precludes that + * possibility. Disabling interrupts to avoid incoming + * datagrams during a "rep out" and possibly landing up in + * this function. + */ + spin_lock_irqsave(&vmci_dev_spinlock, flags); + + if (vmci_dev_g) { + iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR, + dg, VMCI_DG_SIZE(dg)); + result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR); + } else { + result = VMCI_ERROR_UNAVAILABLE; + } + + spin_unlock_irqrestore(&vmci_dev_spinlock, flags); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_send_datagram); + +/* + * Gets called with the new context id if updated or resumed. + * Context id. + */ +static void vmci_guest_cid_update(u32 sub_id, + const struct vmci_event_data *event_data, + void *client_data) +{ + const struct vmci_event_payld_ctx *ev_payload = + vmci_event_data_const_payload(event_data); + + if (sub_id != ctx_update_sub_id) { + pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); + return; + } + + if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { + pr_devel("Invalid event data\n"); + return; + } + + pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", + vm_context_id, ev_payload->context_id, event_data->event); + + vm_context_id = ev_payload->context_id; +} + +/* + * Verify that the host supports the hypercalls we need. If it does not, + * try to find fallback hypercalls and use those instead. Returns + * true if required hypercalls (or fallback hypercalls) are + * supported by the host, false otherwise. + */ +static int vmci_check_host_caps(struct pci_dev *pdev) +{ + bool result; + struct vmci_resource_query_msg *msg; + u32 msg_size = sizeof(struct vmci_resource_query_hdr) + + VMCI_UTIL_NUM_RESOURCES * sizeof(u32); + struct vmci_datagram *check_msg; + + check_msg = kmalloc(msg_size, GFP_KERNEL); + if (!check_msg) { + dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); + return -ENOMEM; + } + + check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_RESOURCES_QUERY); + check_msg->src = VMCI_ANON_SRC_HANDLE; + check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; + msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); + + msg->num_resources = VMCI_UTIL_NUM_RESOURCES; + msg->resources[0] = VMCI_GET_CONTEXT_ID; + + /* Checks that hyper calls are supported */ + result = vmci_send_datagram(check_msg) == 0x01; + kfree(check_msg); + + dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", + __func__, result ? "PASSED" : "FAILED"); + + /* We need the vector. There are no fallbacks. */ + return result ? 0 : -ENXIO; +} + +/* + * Reads datagrams from the data in port and dispatches them. We + * always start reading datagrams into only the first page of the + * datagram buffer. If the datagrams don't fit into one page, we + * use the maximum datagram buffer size for the remainder of the + * invocation. This is a simple heuristic for not penalizing + * small datagrams. + * + * This function assumes that it has exclusive access to the data + * in port for the duration of the call. + */ +static void vmci_dispatch_dgs(unsigned long data) +{ + struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; + u8 *dg_in_buffer = vmci_dev->data_buffer; + struct vmci_datagram *dg; + size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; + size_t current_dg_in_buffer_size = PAGE_SIZE; + size_t remaining_bytes; + + BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); + + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + + while (dg->dst.resource != VMCI_INVALID_ID || + remaining_bytes > PAGE_SIZE) { + unsigned dg_in_size; + + /* + * When the input buffer spans multiple pages, a datagram can + * start on any page boundary in the buffer. + */ + if (dg->dst.resource == VMCI_INVALID_ID) { + dg = (struct vmci_datagram *)roundup( + (uintptr_t)dg + 1, PAGE_SIZE); + remaining_bytes = + (size_t)(dg_in_buffer + + current_dg_in_buffer_size - + (u8 *)dg); + continue; + } + + dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); + + if (dg_in_size <= dg_in_buffer_size) { + int result; + + /* + * If the remaining bytes in the datagram + * buffer doesn't contain the complete + * datagram, we first make sure we have enough + * room for it and then we read the reminder + * of the datagram and possibly any following + * datagrams. + */ + if (dg_in_size > remaining_bytes) { + if (remaining_bytes != + current_dg_in_buffer_size) { + + /* + * We move the partial + * datagram to the front and + * read the reminder of the + * datagram and possibly + * following calls into the + * following bytes. + */ + memmove(dg_in_buffer, dg_in_buffer + + current_dg_in_buffer_size - + remaining_bytes, + remaining_bytes); + dg = (struct vmci_datagram *) + dg_in_buffer; + } + + if (current_dg_in_buffer_size != + dg_in_buffer_size) + current_dg_in_buffer_size = + dg_in_buffer_size; + + ioread8_rep(vmci_dev->iobase + + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer + + remaining_bytes, + current_dg_in_buffer_size - + remaining_bytes); + } + + /* + * We special case event datagrams from the + * hypervisor. + */ + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) { + result = vmci_event_dispatch(dg); + } else { + result = vmci_datagram_invoke_guest_handler(dg); + } + if (result < VMCI_SUCCESS) + dev_dbg(vmci_dev->dev, + "Datagram with resource (ID=0x%x) failed (err=%d)\n", + dg->dst.resource, result); + + /* On to the next datagram. */ + dg = (struct vmci_datagram *)((u8 *)dg + + dg_in_size); + } else { + size_t bytes_to_skip; + + /* + * Datagram doesn't fit in datagram buffer of maximal + * size. We drop it. + */ + dev_dbg(vmci_dev->dev, + "Failed to receive datagram (size=%u bytes)\n", + dg_in_size); + + bytes_to_skip = dg_in_size - remaining_bytes; + if (current_dg_in_buffer_size != dg_in_buffer_size) + current_dg_in_buffer_size = dg_in_buffer_size; + + for (;;) { + ioread8_rep(vmci_dev->iobase + + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, + current_dg_in_buffer_size); + if (bytes_to_skip <= current_dg_in_buffer_size) + break; + + bytes_to_skip -= current_dg_in_buffer_size; + } + dg = (struct vmci_datagram *)(dg_in_buffer + + bytes_to_skip); + } + + remaining_bytes = + (size_t) (dg_in_buffer + current_dg_in_buffer_size - + (u8 *)dg); + + if (remaining_bytes < VMCI_DG_HEADERSIZE) { + /* Get the next batch of datagrams. */ + + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, + current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + } + } +} + +/* + * Scans the notification bitmap for raised flags, clears them + * and handles the notifications. + */ +static void vmci_process_bitmap(unsigned long data) +{ + struct vmci_guest_device *dev = (struct vmci_guest_device *)data; + + if (!dev->notification_bitmap) { + dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); + return; + } + + vmci_dbell_scan_notification_entries(dev->notification_bitmap); +} + +/* + * Enable MSI-X. Try exclusive vectors first, then shared vectors. + */ +static int vmci_enable_msix(struct pci_dev *pdev, + struct vmci_guest_device *vmci_dev) +{ + int i; + int result; + + for (i = 0; i < VMCI_MAX_INTRS; ++i) { + vmci_dev->msix_entries[i].entry = i; + vmci_dev->msix_entries[i].vector = i; + } + + result = pci_enable_msix_exact(pdev, + vmci_dev->msix_entries, VMCI_MAX_INTRS); + if (result == 0) + vmci_dev->exclusive_vectors = true; + else if (result == -ENOSPC) + result = pci_enable_msix_exact(pdev, vmci_dev->msix_entries, 1); + + return result; +} + +/* + * Interrupt handler for legacy or MSI interrupt, or for first MSI-X + * interrupt (vector VMCI_INTR_DATAGRAM). + */ +static irqreturn_t vmci_interrupt(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* + * If we are using MSI-X with exclusive vectors then we simply schedule + * the datagram tasklet, since we know the interrupt was meant for us. + * Otherwise we must read the ICR to determine what to do. + */ + + if (dev->intr_type == VMCI_INTR_TYPE_MSIX && dev->exclusive_vectors) { + tasklet_schedule(&dev->datagram_tasklet); + } else { + unsigned int icr; + + /* Acknowledge interrupt and determine what needs doing. */ + icr = ioread32(dev->iobase + VMCI_ICR_ADDR); + if (icr == 0 || icr == ~0) + return IRQ_NONE; + + if (icr & VMCI_ICR_DATAGRAM) { + tasklet_schedule(&dev->datagram_tasklet); + icr &= ~VMCI_ICR_DATAGRAM; + } + + if (icr & VMCI_ICR_NOTIFICATION) { + tasklet_schedule(&dev->bm_tasklet); + icr &= ~VMCI_ICR_NOTIFICATION; + } + + if (icr != 0) + dev_warn(dev->dev, + "Ignoring unknown interrupt cause (%d)\n", + icr); + } + + return IRQ_HANDLED; +} + +/* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, + * which is for the notification bitmap. Will only get called if we are + * using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* For MSI-X we can just assume it was meant for us. */ + tasklet_schedule(&dev->bm_tasklet); + + return IRQ_HANDLED; +} + +/* + * Most of the initialization at module load time is done here. + */ +static int vmci_guest_probe_device(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct vmci_guest_device *vmci_dev; + void __iomem *iobase; + unsigned int capabilities; + unsigned long cmd; + int vmci_err; + int error; + + dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); + + error = pcim_enable_device(pdev); + if (error) { + dev_err(&pdev->dev, + "Failed to enable VMCI device: %d\n", error); + return error; + } + + error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME); + if (error) { + dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); + return error; + } + + iobase = pcim_iomap_table(pdev)[0]; + + dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n", + (unsigned long)iobase, pdev->irq); + + vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); + if (!vmci_dev) { + dev_err(&pdev->dev, + "Can't allocate memory for VMCI device\n"); + return -ENOMEM; + } + + vmci_dev->dev = &pdev->dev; + vmci_dev->intr_type = VMCI_INTR_TYPE_INTX; + vmci_dev->exclusive_vectors = false; + vmci_dev->iobase = iobase; + + tasklet_init(&vmci_dev->datagram_tasklet, + vmci_dispatch_dgs, (unsigned long)vmci_dev); + tasklet_init(&vmci_dev->bm_tasklet, + vmci_process_bitmap, (unsigned long)vmci_dev); + + vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + if (!vmci_dev->data_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram buffer\n"); + return -ENOMEM; + } + + pci_set_master(pdev); /* To enable queue_pair functionality. */ + + /* + * Verify that the VMCI Device supports the capabilities that + * we need. If the device is missing capabilities that we would + * like to use, check for fallback capabilities and use those + * instead (so we can run a new VM on old hosts). Fail the load if + * a required capability is missing and there is no fallback. + * + * Right now, we need datagrams. There are no fallbacks. + */ + capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR); + if (!(capabilities & VMCI_CAPS_DATAGRAM)) { + dev_err(&pdev->dev, "Device does not support datagrams\n"); + error = -ENXIO; + goto err_free_data_buffer; + } + + /* + * If the hardware supports notifications, we will use that as + * well. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + vmci_dev->notification_bitmap = dma_alloc_coherent( + &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, + GFP_KERNEL); + if (!vmci_dev->notification_bitmap) { + dev_warn(&pdev->dev, + "Unable to allocate notification bitmap\n"); + } else { + memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE); + capabilities |= VMCI_CAPS_NOTIFICATIONS; + } + } + + dev_info(&pdev->dev, "Using capabilities 0x%x\n", capabilities); + + /* Let the host know which capabilities we intend to use. */ + iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR); + + /* Set up global device so that we can start sending datagrams */ + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = vmci_dev; + vmci_pdev = pdev; + spin_unlock_irq(&vmci_dev_spinlock); + + /* + * Register notification bitmap with device if that capability is + * used. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + unsigned long bitmap_ppn = + vmci_dev->notification_base >> PAGE_SHIFT; + if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { + dev_warn(&pdev->dev, + "VMCI device unable to register notification bitmap with PPN 0x%x\n", + (u32) bitmap_ppn); + error = -ENXIO; + goto err_remove_vmci_dev_g; + } + } + + /* Check host capabilities. */ + error = vmci_check_host_caps(pdev); + if (error) + goto err_remove_bitmap; + + /* Enable device. */ + + /* + * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can + * update the internal context id when needed. + */ + vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, + vmci_guest_cid_update, NULL, + &ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to subscribe to event (type=%d): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, vmci_err); + + /* + * Enable interrupts. Try MSI-X first, then MSI, and then fallback on + * legacy interrupts. + */ + if (!vmci_disable_msix && !vmci_enable_msix(pdev, vmci_dev)) { + vmci_dev->intr_type = VMCI_INTR_TYPE_MSIX; + vmci_dev->irq = vmci_dev->msix_entries[0].vector; + } else if (!vmci_disable_msi && !pci_enable_msi(pdev)) { + vmci_dev->intr_type = VMCI_INTR_TYPE_MSI; + vmci_dev->irq = pdev->irq; + } else { + vmci_dev->intr_type = VMCI_INTR_TYPE_INTX; + vmci_dev->irq = pdev->irq; + } + + /* + * Request IRQ for legacy or MSI interrupts, or for first + * MSI-X vector. + */ + error = request_irq(vmci_dev->irq, vmci_interrupt, IRQF_SHARED, + KBUILD_MODNAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, "Irq %u in use: %d\n", + vmci_dev->irq, error); + goto err_disable_msi; + } + + /* + * For MSI-X with exclusive vectors we need to request an + * interrupt for each vector so that we get a separate + * interrupt handler routine. This allows us to distinguish + * between the vectors. + */ + if (vmci_dev->exclusive_vectors) { + error = request_irq(vmci_dev->msix_entries[1].vector, + vmci_interrupt_bm, 0, KBUILD_MODNAME, + vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + vmci_dev->msix_entries[1].vector, error); + goto err_free_irq; + } + } + + dev_dbg(&pdev->dev, "Registered device\n"); + + atomic_inc(&vmci_num_guest_devices); + + /* Enable specific interrupt bits. */ + cmd = VMCI_IMR_DATAGRAM; + if (capabilities & VMCI_CAPS_NOTIFICATIONS) + cmd |= VMCI_IMR_NOTIFICATION; + iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR); + + /* Enable interrupts. */ + iowrite32(VMCI_CONTROL_INT_ENABLE, + vmci_dev->iobase + VMCI_CONTROL_ADDR); + + pci_set_drvdata(pdev, vmci_dev); + return 0; + +err_free_irq: + free_irq(vmci_dev->irq, vmci_dev); + tasklet_kill(&vmci_dev->datagram_tasklet); + tasklet_kill(&vmci_dev->bm_tasklet); + +err_disable_msi: + if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX) + pci_disable_msix(pdev); + else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI) + pci_disable_msi(pdev); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + +err_remove_bitmap: + if (vmci_dev->notification_bitmap) { + iowrite32(VMCI_CONTROL_RESET, + vmci_dev->iobase + VMCI_CONTROL_ADDR); + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + +err_remove_vmci_dev_g: + spin_lock_irq(&vmci_dev_spinlock); + vmci_pdev = NULL; + vmci_dev_g = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + +err_free_data_buffer: + vfree(vmci_dev->data_buffer); + + /* The rest are managed resources and will be freed by PCI core */ + return error; +} + +static void vmci_guest_remove_device(struct pci_dev *pdev) +{ + struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); + int vmci_err; + + dev_dbg(&pdev->dev, "Removing device\n"); + + atomic_dec(&vmci_num_guest_devices); + + vmci_qp_guest_endpoints_exit(); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = NULL; + vmci_pdev = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + + dev_dbg(&pdev->dev, "Resetting vmci device\n"); + iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR); + + /* + * Free IRQ and then disable MSI/MSI-X as appropriate. For + * MSI-X, we might have multiple vectors, each with their own + * IRQ, which we must free too. + */ + free_irq(vmci_dev->irq, vmci_dev); + if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX) { + if (vmci_dev->exclusive_vectors) + free_irq(vmci_dev->msix_entries[1].vector, vmci_dev); + pci_disable_msix(pdev); + } else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI) { + pci_disable_msi(pdev); + } + + tasklet_kill(&vmci_dev->datagram_tasklet); + tasklet_kill(&vmci_dev->bm_tasklet); + + if (vmci_dev->notification_bitmap) { + /* + * The device reset above cleared the bitmap state of the + * device, so we can safely free it here. + */ + + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + + vfree(vmci_dev->data_buffer); + + /* The rest are managed resources and will be freed by PCI core */ +} + +static const struct pci_device_id vmci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, vmci_ids); + +static struct pci_driver vmci_guest_driver = { + .name = KBUILD_MODNAME, + .id_table = vmci_ids, + .probe = vmci_guest_probe_device, + .remove = vmci_guest_remove_device, +}; + +int __init vmci_guest_init(void) +{ + return pci_register_driver(&vmci_guest_driver); +} + +void __exit vmci_guest_exit(void) +{ + pci_unregister_driver(&vmci_guest_driver); +} diff --git a/kernel/drivers/misc/vmw_vmci/vmci_handle_array.c b/kernel/drivers/misc/vmw_vmci/vmci_handle_array.c new file mode 100644 index 000000000..344973a0f --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_handle_array.c @@ -0,0 +1,142 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/slab.h> +#include "vmci_handle_array.h" + +static size_t handle_arr_calc_size(size_t capacity) +{ + return sizeof(struct vmci_handle_arr) + + capacity * sizeof(struct vmci_handle); +} + +struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity) +{ + struct vmci_handle_arr *array; + + if (capacity == 0) + capacity = VMCI_HANDLE_ARRAY_DEFAULT_SIZE; + + array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC); + if (!array) + return NULL; + + array->capacity = capacity; + array->size = 0; + + return array; +} + +void vmci_handle_arr_destroy(struct vmci_handle_arr *array) +{ + kfree(array); +} + +void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, + struct vmci_handle handle) +{ + struct vmci_handle_arr *array = *array_ptr; + + if (unlikely(array->size >= array->capacity)) { + /* reallocate. */ + struct vmci_handle_arr *new_array; + size_t new_capacity = array->capacity * VMCI_ARR_CAP_MULT; + size_t new_size = handle_arr_calc_size(new_capacity); + + new_array = krealloc(array, new_size, GFP_ATOMIC); + if (!new_array) + return; + + new_array->capacity = new_capacity; + *array_ptr = array = new_array; + } + + array->entries[array->size] = handle; + array->size++; +} + +/* + * Handle that was removed, VMCI_INVALID_HANDLE if entry not found. + */ +struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array, + struct vmci_handle entry_handle) +{ + struct vmci_handle handle = VMCI_INVALID_HANDLE; + size_t i; + + for (i = 0; i < array->size; i++) { + if (vmci_handle_is_equal(array->entries[i], entry_handle)) { + handle = array->entries[i]; + array->size--; + array->entries[i] = array->entries[array->size]; + array->entries[array->size] = VMCI_INVALID_HANDLE; + break; + } + } + + return handle; +} + +/* + * Handle that was removed, VMCI_INVALID_HANDLE if array was empty. + */ +struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array) +{ + struct vmci_handle handle = VMCI_INVALID_HANDLE; + + if (array->size) { + array->size--; + handle = array->entries[array->size]; + array->entries[array->size] = VMCI_INVALID_HANDLE; + } + + return handle; +} + +/* + * Handle at given index, VMCI_INVALID_HANDLE if invalid index. + */ +struct vmci_handle +vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index) +{ + if (unlikely(index >= array->size)) + return VMCI_INVALID_HANDLE; + + return array->entries[index]; +} + +bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array, + struct vmci_handle entry_handle) +{ + size_t i; + + for (i = 0; i < array->size; i++) + if (vmci_handle_is_equal(array->entries[i], entry_handle)) + return true; + + return false; +} + +/* + * NULL if the array is empty. Otherwise, a pointer to the array + * of VMCI handles in the handle array. + */ +struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array) +{ + if (array->size) + return array->entries; + + return NULL; +} diff --git a/kernel/drivers/misc/vmw_vmci/vmci_handle_array.h b/kernel/drivers/misc/vmw_vmci/vmci_handle_array.h new file mode 100644 index 000000000..b5f3a7f98 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_handle_array.h @@ -0,0 +1,52 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_HANDLE_ARRAY_H_ +#define _VMCI_HANDLE_ARRAY_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/types.h> + +#define VMCI_HANDLE_ARRAY_DEFAULT_SIZE 4 +#define VMCI_ARR_CAP_MULT 2 /* Array capacity multiplier */ + +struct vmci_handle_arr { + size_t capacity; + size_t size; + struct vmci_handle entries[]; +}; + +struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity); +void vmci_handle_arr_destroy(struct vmci_handle_arr *array); +void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, + struct vmci_handle handle); +struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array, + struct vmci_handle + entry_handle); +struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array); +struct vmci_handle +vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index); +bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array, + struct vmci_handle entry_handle); +struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array); + +static inline size_t vmci_handle_arr_get_size( + const struct vmci_handle_arr *array) +{ + return array->size; +} + + +#endif /* _VMCI_HANDLE_ARRAY_H_ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_host.c b/kernel/drivers/misc/vmw_vmci/vmci_host.c new file mode 100644 index 000000000..a721b5d8a --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_host.c @@ -0,0 +1,1046 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/moduleparam.h> +#include <linux/miscdevice.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/atomic.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/fs.h> +#include <linux/io.h> + +#include "vmci_handle_array.h" +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +#define VMCI_UTIL_NUM_RESOURCES 1 + +enum { + VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0, + VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1, +}; + +enum { + VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0, + VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1, + VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2, +}; + +/* + * VMCI driver initialization. This block can also be used to + * pass initial group membership etc. + */ +struct vmci_init_blk { + u32 cid; + u32 flags; +}; + +/* VMCIqueue_pairAllocInfo_VMToVM */ +struct vmci_qp_alloc_info_vmvm { + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u64 produce_page_file; /* User VA. */ + u64 consume_page_file; /* User VA. */ + u64 produce_page_file_size; /* Size of the file name array. */ + u64 consume_page_file_size; /* Size of the file name array. */ + s32 result; + u32 _pad; +}; + +/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */ +struct vmci_set_notify_info { + u64 notify_uva; + s32 result; + u32 _pad; +}; + +/* + * Per-instance host state + */ +struct vmci_host_dev { + struct vmci_ctx *context; + int user_version; + enum vmci_obj_type ct_type; + struct mutex lock; /* Mutex lock for vmci context access */ +}; + +static struct vmci_ctx *host_context; +static bool vmci_host_device_initialized; +static atomic_t vmci_host_active_users = ATOMIC_INIT(0); + +/* + * Determines whether the VMCI host personality is + * available. Since the core functionality of the host driver is + * always present, all guests could possibly use the host + * personality. However, to minimize the deviation from the + * pre-unified driver state of affairs, we only consider the host + * device active if there is no active guest device or if there + * are VMX'en with active VMCI contexts using the host device. + */ +bool vmci_host_code_active(void) +{ + return vmci_host_device_initialized && + (!vmci_guest_code_active() || + atomic_read(&vmci_host_active_users) > 0); +} + +/* + * Called on open of /dev/vmci. + */ +static int vmci_host_open(struct inode *inode, struct file *filp) +{ + struct vmci_host_dev *vmci_host_dev; + + vmci_host_dev = kzalloc(sizeof(struct vmci_host_dev), GFP_KERNEL); + if (vmci_host_dev == NULL) + return -ENOMEM; + + vmci_host_dev->ct_type = VMCIOBJ_NOT_SET; + mutex_init(&vmci_host_dev->lock); + filp->private_data = vmci_host_dev; + + return 0; +} + +/* + * Called on close of /dev/vmci, most often when the process + * exits. + */ +static int vmci_host_close(struct inode *inode, struct file *filp) +{ + struct vmci_host_dev *vmci_host_dev = filp->private_data; + + if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { + vmci_ctx_destroy(vmci_host_dev->context); + vmci_host_dev->context = NULL; + + /* + * The number of active contexts is used to track whether any + * VMX'en are using the host personality. It is incremented when + * a context is created through the IOCTL_VMCI_INIT_CONTEXT + * ioctl. + */ + atomic_dec(&vmci_host_active_users); + } + vmci_host_dev->ct_type = VMCIOBJ_NOT_SET; + + kfree(vmci_host_dev); + filp->private_data = NULL; + return 0; +} + +/* + * This is used to wake up the VMX when a VMCI call arrives, or + * to wake up select() or poll() at the next clock tick. + */ +static unsigned int vmci_host_poll(struct file *filp, poll_table *wait) +{ + struct vmci_host_dev *vmci_host_dev = filp->private_data; + struct vmci_ctx *context = vmci_host_dev->context; + unsigned int mask = 0; + + if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { + /* Check for VMCI calls to this VM context. */ + if (wait) + poll_wait(filp, &context->host_context.wait_queue, + wait); + + spin_lock(&context->lock); + if (context->pending_datagrams > 0 || + vmci_handle_arr_get_size( + context->pending_doorbell_array) > 0) { + mask = POLLIN; + } + spin_unlock(&context->lock); + } + return mask; +} + +/* + * Copies the handles of a handle array into a user buffer, and + * returns the new length in userBufferSize. If the copy to the + * user buffer fails, the functions still returns VMCI_SUCCESS, + * but retval != 0. + */ +static int drv_cp_harray_to_user(void __user *user_buf_uva, + u64 *user_buf_size, + struct vmci_handle_arr *handle_array, + int *retval) +{ + u32 array_size = 0; + struct vmci_handle *handles; + + if (handle_array) + array_size = vmci_handle_arr_get_size(handle_array); + + if (array_size * sizeof(*handles) > *user_buf_size) + return VMCI_ERROR_MORE_DATA; + + *user_buf_size = array_size * sizeof(*handles); + if (*user_buf_size) + *retval = copy_to_user(user_buf_uva, + vmci_handle_arr_get_handles + (handle_array), *user_buf_size); + + return VMCI_SUCCESS; +} + +/* + * Sets up a given context for notify to work. Maps the notify + * boolean in user VA into kernel space. + */ +static int vmci_host_setup_notify(struct vmci_ctx *context, + unsigned long uva) +{ + int retval; + + if (context->notify_page) { + pr_devel("%s: Notify mechanism is already set up\n", __func__); + return VMCI_ERROR_DUPLICATE_ENTRY; + } + + /* + * We are using 'bool' internally, but let's make sure we explicit + * about the size. + */ + BUILD_BUG_ON(sizeof(bool) != sizeof(u8)); + if (!access_ok(VERIFY_WRITE, (void __user *)uva, sizeof(u8))) + return VMCI_ERROR_GENERIC; + + /* + * Lock physical page backing a given user VA. + */ + retval = get_user_pages_fast(uva, 1, 1, &context->notify_page); + if (retval != 1) { + context->notify_page = NULL; + return VMCI_ERROR_GENERIC; + } + + /* + * Map the locked page and set up notify pointer. + */ + context->notify = kmap(context->notify_page) + (uva & (PAGE_SIZE - 1)); + vmci_ctx_check_signal_notify(context); + + return VMCI_SUCCESS; +} + +static int vmci_host_get_version(struct vmci_host_dev *vmci_host_dev, + unsigned int cmd, void __user *uptr) +{ + if (cmd == IOCTL_VMCI_VERSION2) { + int __user *vptr = uptr; + if (get_user(vmci_host_dev->user_version, vptr)) + return -EFAULT; + } + + /* + * The basic logic here is: + * + * If the user sends in a version of 0 tell it our version. + * If the user didn't send in a version, tell it our version. + * If the user sent in an old version, tell it -its- version. + * If the user sent in an newer version, tell it our version. + * + * The rationale behind telling the caller its version is that + * Workstation 6.5 required that VMX and VMCI kernel module were + * version sync'd. All new VMX users will be programmed to + * handle the VMCI kernel module version. + */ + + if (vmci_host_dev->user_version > 0 && + vmci_host_dev->user_version < VMCI_VERSION_HOSTQP) { + return vmci_host_dev->user_version; + } + + return VMCI_VERSION; +} + +#define vmci_ioctl_err(fmt, ...) \ + pr_devel("%s: " fmt, ioctl_name, ##__VA_ARGS__) + +static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_init_blk init_block; + const struct cred *cred; + int retval; + + if (copy_from_user(&init_block, uptr, sizeof(init_block))) { + vmci_ioctl_err("error reading init block\n"); + return -EFAULT; + } + + mutex_lock(&vmci_host_dev->lock); + + if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) { + vmci_ioctl_err("received VMCI init on initialized handle\n"); + retval = -EINVAL; + goto out; + } + + if (init_block.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) { + vmci_ioctl_err("unsupported VMCI restriction flag\n"); + retval = -EINVAL; + goto out; + } + + cred = get_current_cred(); + vmci_host_dev->context = vmci_ctx_create(init_block.cid, + init_block.flags, 0, + vmci_host_dev->user_version, + cred); + put_cred(cred); + if (IS_ERR(vmci_host_dev->context)) { + retval = PTR_ERR(vmci_host_dev->context); + vmci_ioctl_err("error initializing context\n"); + goto out; + } + + /* + * Copy cid to userlevel, we do this to allow the VMX + * to enforce its policy on cid generation. + */ + init_block.cid = vmci_ctx_get_id(vmci_host_dev->context); + if (copy_to_user(uptr, &init_block, sizeof(init_block))) { + vmci_ctx_destroy(vmci_host_dev->context); + vmci_host_dev->context = NULL; + vmci_ioctl_err("error writing init block\n"); + retval = -EFAULT; + goto out; + } + + vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; + atomic_inc(&vmci_host_active_users); + + retval = 0; + +out: + mutex_unlock(&vmci_host_dev->lock); + return retval; +} + +static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_datagram_snd_rcv_info send_info; + struct vmci_datagram *dg = NULL; + u32 cid; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&send_info, uptr, sizeof(send_info))) + return -EFAULT; + + if (send_info.len > VMCI_MAX_DG_SIZE) { + vmci_ioctl_err("datagram is too big (size=%d)\n", + send_info.len); + return -EINVAL; + } + + if (send_info.len < sizeof(*dg)) { + vmci_ioctl_err("datagram is too small (size=%d)\n", + send_info.len); + return -EINVAL; + } + + dg = kmalloc(send_info.len, GFP_KERNEL); + if (!dg) { + vmci_ioctl_err( + "cannot allocate memory to dispatch datagram\n"); + return -ENOMEM; + } + + if (copy_from_user(dg, (void __user *)(uintptr_t)send_info.addr, + send_info.len)) { + vmci_ioctl_err("error getting datagram\n"); + kfree(dg); + return -EFAULT; + } + + if (VMCI_DG_SIZE(dg) != send_info.len) { + vmci_ioctl_err("datagram size mismatch\n"); + kfree(dg); + return -EINVAL; + } + + pr_devel("Datagram dst (handle=0x%x:0x%x) src (handle=0x%x:0x%x), payload (size=%llu bytes)\n", + dg->dst.context, dg->dst.resource, + dg->src.context, dg->src.resource, + (unsigned long long)dg->payload_size); + + /* Get source context id. */ + cid = vmci_ctx_get_id(vmci_host_dev->context); + send_info.result = vmci_datagram_dispatch(cid, dg, true); + kfree(dg); + + return copy_to_user(uptr, &send_info, sizeof(send_info)) ? -EFAULT : 0; +} + +static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_datagram_snd_rcv_info recv_info; + struct vmci_datagram *dg = NULL; + int retval; + size_t size; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&recv_info, uptr, sizeof(recv_info))) + return -EFAULT; + + size = recv_info.len; + recv_info.result = vmci_ctx_dequeue_datagram(vmci_host_dev->context, + &size, &dg); + + if (recv_info.result >= VMCI_SUCCESS) { + void __user *ubuf = (void __user *)(uintptr_t)recv_info.addr; + retval = copy_to_user(ubuf, dg, VMCI_DG_SIZE(dg)); + kfree(dg); + if (retval != 0) + return -EFAULT; + } + + return copy_to_user(uptr, &recv_info, sizeof(recv_info)) ? -EFAULT : 0; +} + +static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_handle handle; + int vmci_status; + int __user *retptr; + u32 cid; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + cid = vmci_ctx_get_id(vmci_host_dev->context); + + if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) { + struct vmci_qp_alloc_info_vmvm alloc_info; + struct vmci_qp_alloc_info_vmvm __user *info = uptr; + + if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info))) + return -EFAULT; + + handle = alloc_info.handle; + retptr = &info->result; + + vmci_status = vmci_qp_broker_alloc(alloc_info.handle, + alloc_info.peer, + alloc_info.flags, + VMCI_NO_PRIVILEGE_FLAGS, + alloc_info.produce_size, + alloc_info.consume_size, + NULL, + vmci_host_dev->context); + + if (vmci_status == VMCI_SUCCESS) + vmci_status = VMCI_SUCCESS_QUEUEPAIR_CREATE; + } else { + struct vmci_qp_alloc_info alloc_info; + struct vmci_qp_alloc_info __user *info = uptr; + struct vmci_qp_page_store page_store; + + if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info))) + return -EFAULT; + + handle = alloc_info.handle; + retptr = &info->result; + + page_store.pages = alloc_info.ppn_va; + page_store.len = alloc_info.num_ppns; + + vmci_status = vmci_qp_broker_alloc(alloc_info.handle, + alloc_info.peer, + alloc_info.flags, + VMCI_NO_PRIVILEGE_FLAGS, + alloc_info.produce_size, + alloc_info.consume_size, + &page_store, + vmci_host_dev->context); + } + + if (put_user(vmci_status, retptr)) { + if (vmci_status >= VMCI_SUCCESS) { + vmci_status = vmci_qp_broker_detach(handle, + vmci_host_dev->context); + } + return -EFAULT; + } + + return 0; +} + +static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_qp_set_va_info set_va_info; + struct vmci_qp_set_va_info __user *info = uptr; + s32 result; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) { + vmci_ioctl_err("is not allowed\n"); + return -EINVAL; + } + + if (copy_from_user(&set_va_info, uptr, sizeof(set_va_info))) + return -EFAULT; + + if (set_va_info.va) { + /* + * VMX is passing down a new VA for the queue + * pair mapping. + */ + result = vmci_qp_broker_map(set_va_info.handle, + vmci_host_dev->context, + set_va_info.va); + } else { + /* + * The queue pair is about to be unmapped by + * the VMX. + */ + result = vmci_qp_broker_unmap(set_va_info.handle, + vmci_host_dev->context, 0); + } + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_qp_page_file_info page_file_info; + struct vmci_qp_page_file_info __user *info = uptr; + s32 result; + + if (vmci_host_dev->user_version < VMCI_VERSION_HOSTQP || + vmci_host_dev->user_version >= VMCI_VERSION_NOVMVM) { + vmci_ioctl_err("not supported on this VMX (version=%d)\n", + vmci_host_dev->user_version); + return -EINVAL; + } + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&page_file_info, uptr, sizeof(*info))) + return -EFAULT; + + /* + * Communicate success pre-emptively to the caller. Note that the + * basic premise is that it is incumbent upon the caller not to look at + * the info.result field until after the ioctl() returns. And then, + * only if the ioctl() result indicates no error. We send up the + * SUCCESS status before calling SetPageStore() store because failing + * to copy up the result code means unwinding the SetPageStore(). + * + * It turns out the logic to unwind a SetPageStore() opens a can of + * worms. For example, if a host had created the queue_pair and a + * guest attaches and SetPageStore() is successful but writing success + * fails, then ... the host has to be stopped from writing (anymore) + * data into the queue_pair. That means an additional test in the + * VMCI_Enqueue() code path. Ugh. + */ + + if (put_user(VMCI_SUCCESS, &info->result)) { + /* + * In this case, we can't write a result field of the + * caller's info block. So, we don't even try to + * SetPageStore(). + */ + return -EFAULT; + } + + result = vmci_qp_broker_set_page_store(page_file_info.handle, + page_file_info.produce_va, + page_file_info.consume_va, + vmci_host_dev->context); + if (result < VMCI_SUCCESS) { + if (put_user(result, &info->result)) { + /* + * Note that in this case the SetPageStore() + * call failed but we were unable to + * communicate that to the caller (because the + * copy_to_user() call failed). So, if we + * simply return an error (in this case + * -EFAULT) then the caller will know that the + * SetPageStore failed even though we couldn't + * put the result code in the result field and + * indicate exactly why it failed. + * + * That says nothing about the issue where we + * were once able to write to the caller's info + * memory and now can't. Something more + * serious is probably going on than the fact + * that SetPageStore() didn't work. + */ + return -EFAULT; + } + } + + return 0; +} + +static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_qp_dtch_info detach_info; + struct vmci_qp_dtch_info __user *info = uptr; + s32 result; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&detach_info, uptr, sizeof(detach_info))) + return -EFAULT; + + result = vmci_qp_broker_detach(detach_info.handle, + vmci_host_dev->context); + if (result == VMCI_SUCCESS && + vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) { + result = VMCI_SUCCESS_LAST_DETACH; + } + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_info ar_info; + struct vmci_ctx_info __user *info = uptr; + s32 result; + u32 cid; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&ar_info, uptr, sizeof(ar_info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + result = vmci_ctx_add_notification(cid, ar_info.remote_cid); + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_info ar_info; + struct vmci_ctx_info __user *info = uptr; + u32 cid; + int result; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&ar_info, uptr, sizeof(ar_info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + result = vmci_ctx_remove_notification(cid, + ar_info.remote_cid); + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_chkpt_buf_info get_info; + u32 cid; + void *cpt_buf; + int retval; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&get_info, uptr, sizeof(get_info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + get_info.result = vmci_ctx_get_chkpt_state(cid, get_info.cpt_type, + &get_info.buf_size, &cpt_buf); + if (get_info.result == VMCI_SUCCESS && get_info.buf_size) { + void __user *ubuf = (void __user *)(uintptr_t)get_info.cpt_buf; + retval = copy_to_user(ubuf, cpt_buf, get_info.buf_size); + kfree(cpt_buf); + + if (retval) + return -EFAULT; + } + + return copy_to_user(uptr, &get_info, sizeof(get_info)) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_chkpt_buf_info set_info; + u32 cid; + void *cpt_buf; + int retval; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&set_info, uptr, sizeof(set_info))) + return -EFAULT; + + cpt_buf = kmalloc(set_info.buf_size, GFP_KERNEL); + if (!cpt_buf) { + vmci_ioctl_err( + "cannot allocate memory to set cpt state (type=%d)\n", + set_info.cpt_type); + return -ENOMEM; + } + + if (copy_from_user(cpt_buf, (void __user *)(uintptr_t)set_info.cpt_buf, + set_info.buf_size)) { + retval = -EFAULT; + goto out; + } + + cid = vmci_ctx_get_id(vmci_host_dev->context); + set_info.result = vmci_ctx_set_chkpt_state(cid, set_info.cpt_type, + set_info.buf_size, cpt_buf); + + retval = copy_to_user(uptr, &set_info, sizeof(set_info)) ? -EFAULT : 0; + +out: + kfree(cpt_buf); + return retval; +} + +static int vmci_host_do_get_context_id(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + u32 __user *u32ptr = uptr; + + return put_user(VMCI_HOST_CONTEXT_ID, u32ptr) ? -EFAULT : 0; +} + +static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_set_notify_info notify_info; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(¬ify_info, uptr, sizeof(notify_info))) + return -EFAULT; + + if (notify_info.notify_uva) { + notify_info.result = + vmci_host_setup_notify(vmci_host_dev->context, + notify_info.notify_uva); + } else { + vmci_ctx_unset_notify(vmci_host_dev->context); + notify_info.result = VMCI_SUCCESS; + } + + return copy_to_user(uptr, ¬ify_info, sizeof(notify_info)) ? + -EFAULT : 0; +} + +static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_dbell_notify_resource_info info; + u32 cid; + + if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) { + vmci_ioctl_err("invalid for current VMX versions\n"); + return -EINVAL; + } + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&info, uptr, sizeof(info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + + switch (info.action) { + case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY: + if (info.resource == VMCI_NOTIFY_RESOURCE_DOOR_BELL) { + u32 flags = VMCI_NO_PRIVILEGE_FLAGS; + info.result = vmci_ctx_notify_dbell(cid, info.handle, + flags); + } else { + info.result = VMCI_ERROR_UNAVAILABLE; + } + break; + + case VMCI_NOTIFY_RESOURCE_ACTION_CREATE: + info.result = vmci_ctx_dbell_create(cid, info.handle); + break; + + case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY: + info.result = vmci_ctx_dbell_destroy(cid, info.handle); + break; + + default: + vmci_ioctl_err("got unknown action (action=%d)\n", + info.action); + info.result = VMCI_ERROR_INVALID_ARGS; + } + + return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0; +} + +static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_notify_recv_info info; + struct vmci_handle_arr *db_handle_array; + struct vmci_handle_arr *qp_handle_array; + void __user *ubuf; + u32 cid; + int retval = 0; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) { + vmci_ioctl_err("not supported for the current vmx version\n"); + return -EINVAL; + } + + if (copy_from_user(&info, uptr, sizeof(info))) + return -EFAULT; + + if ((info.db_handle_buf_size && !info.db_handle_buf_uva) || + (info.qp_handle_buf_size && !info.qp_handle_buf_uva)) { + return -EINVAL; + } + + cid = vmci_ctx_get_id(vmci_host_dev->context); + + info.result = vmci_ctx_rcv_notifications_get(cid, + &db_handle_array, &qp_handle_array); + if (info.result != VMCI_SUCCESS) + return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0; + + ubuf = (void __user *)(uintptr_t)info.db_handle_buf_uva; + info.result = drv_cp_harray_to_user(ubuf, &info.db_handle_buf_size, + db_handle_array, &retval); + if (info.result == VMCI_SUCCESS && !retval) { + ubuf = (void __user *)(uintptr_t)info.qp_handle_buf_uva; + info.result = drv_cp_harray_to_user(ubuf, + &info.qp_handle_buf_size, + qp_handle_array, &retval); + } + + if (!retval && copy_to_user(uptr, &info, sizeof(info))) + retval = -EFAULT; + + vmci_ctx_rcv_notifications_release(cid, + db_handle_array, qp_handle_array, + info.result == VMCI_SUCCESS && !retval); + + return retval; +} + +static long vmci_host_unlocked_ioctl(struct file *filp, + unsigned int iocmd, unsigned long ioarg) +{ +#define VMCI_DO_IOCTL(ioctl_name, ioctl_fn) do { \ + char *name = __stringify(IOCTL_VMCI_ ## ioctl_name); \ + return vmci_host_do_ ## ioctl_fn( \ + vmci_host_dev, name, uptr); \ + } while (0) + + struct vmci_host_dev *vmci_host_dev = filp->private_data; + void __user *uptr = (void __user *)ioarg; + + switch (iocmd) { + case IOCTL_VMCI_INIT_CONTEXT: + VMCI_DO_IOCTL(INIT_CONTEXT, init_context); + case IOCTL_VMCI_DATAGRAM_SEND: + VMCI_DO_IOCTL(DATAGRAM_SEND, send_datagram); + case IOCTL_VMCI_DATAGRAM_RECEIVE: + VMCI_DO_IOCTL(DATAGRAM_RECEIVE, receive_datagram); + case IOCTL_VMCI_QUEUEPAIR_ALLOC: + VMCI_DO_IOCTL(QUEUEPAIR_ALLOC, alloc_queuepair); + case IOCTL_VMCI_QUEUEPAIR_SETVA: + VMCI_DO_IOCTL(QUEUEPAIR_SETVA, queuepair_setva); + case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE: + VMCI_DO_IOCTL(QUEUEPAIR_SETPAGEFILE, queuepair_setpf); + case IOCTL_VMCI_QUEUEPAIR_DETACH: + VMCI_DO_IOCTL(QUEUEPAIR_DETACH, qp_detach); + case IOCTL_VMCI_CTX_ADD_NOTIFICATION: + VMCI_DO_IOCTL(CTX_ADD_NOTIFICATION, ctx_add_notify); + case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION: + VMCI_DO_IOCTL(CTX_REMOVE_NOTIFICATION, ctx_remove_notify); + case IOCTL_VMCI_CTX_GET_CPT_STATE: + VMCI_DO_IOCTL(CTX_GET_CPT_STATE, ctx_get_cpt_state); + case IOCTL_VMCI_CTX_SET_CPT_STATE: + VMCI_DO_IOCTL(CTX_SET_CPT_STATE, ctx_set_cpt_state); + case IOCTL_VMCI_GET_CONTEXT_ID: + VMCI_DO_IOCTL(GET_CONTEXT_ID, get_context_id); + case IOCTL_VMCI_SET_NOTIFY: + VMCI_DO_IOCTL(SET_NOTIFY, set_notify); + case IOCTL_VMCI_NOTIFY_RESOURCE: + VMCI_DO_IOCTL(NOTIFY_RESOURCE, notify_resource); + case IOCTL_VMCI_NOTIFICATIONS_RECEIVE: + VMCI_DO_IOCTL(NOTIFICATIONS_RECEIVE, recv_notifications); + + case IOCTL_VMCI_VERSION: + case IOCTL_VMCI_VERSION2: + return vmci_host_get_version(vmci_host_dev, iocmd, uptr); + + default: + pr_devel("%s: Unknown ioctl (iocmd=%d)\n", __func__, iocmd); + return -EINVAL; + } + +#undef VMCI_DO_IOCTL +} + +static const struct file_operations vmuser_fops = { + .owner = THIS_MODULE, + .open = vmci_host_open, + .release = vmci_host_close, + .poll = vmci_host_poll, + .unlocked_ioctl = vmci_host_unlocked_ioctl, + .compat_ioctl = vmci_host_unlocked_ioctl, +}; + +static struct miscdevice vmci_host_miscdev = { + .name = "vmci", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vmuser_fops, +}; + +int __init vmci_host_init(void) +{ + int error; + + host_context = vmci_ctx_create(VMCI_HOST_CONTEXT_ID, + VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, + -1, VMCI_VERSION, NULL); + if (IS_ERR(host_context)) { + error = PTR_ERR(host_context); + pr_warn("Failed to initialize VMCIContext (error%d)\n", + error); + return error; + } + + error = misc_register(&vmci_host_miscdev); + if (error) { + pr_warn("Module registration error (name=%s, major=%d, minor=%d, err=%d)\n", + vmci_host_miscdev.name, + MISC_MAJOR, vmci_host_miscdev.minor, + error); + pr_warn("Unable to initialize host personality\n"); + vmci_ctx_destroy(host_context); + return error; + } + + pr_info("VMCI host device registered (name=%s, major=%d, minor=%d)\n", + vmci_host_miscdev.name, MISC_MAJOR, vmci_host_miscdev.minor); + + vmci_host_device_initialized = true; + return 0; +} + +void __exit vmci_host_exit(void) +{ + int error; + + vmci_host_device_initialized = false; + + error = misc_deregister(&vmci_host_miscdev); + if (error) + pr_warn("Error unregistering character device: %d\n", error); + + vmci_ctx_destroy(host_context); + vmci_qp_broker_exit(); + + pr_debug("VMCI host driver module unloaded\n"); +} diff --git a/kernel/drivers/misc/vmw_vmci/vmci_queue_pair.c b/kernel/drivers/misc/vmw_vmci/vmci_queue_pair.c new file mode 100644 index 000000000..f42d9c4e4 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -0,0 +1,3355 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pagemap.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uio.h> +#include <linux/wait.h> +#include <linux/vmalloc.h> +#include <linux/skbuff.h> + +#include "vmci_handle_array.h" +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_route.h" + +/* + * In the following, we will distinguish between two kinds of VMX processes - + * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized + * VMCI page files in the VMX and supporting VM to VM communication and the + * newer ones that use the guest memory directly. We will in the following + * refer to the older VMX versions as old-style VMX'en, and the newer ones as + * new-style VMX'en. + * + * The state transition datagram is as follows (the VMCIQPB_ prefix has been + * removed for readability) - see below for more details on the transtions: + * + * -------------- NEW ------------- + * | | + * \_/ \_/ + * CREATED_NO_MEM <-----------------> CREATED_MEM + * | | | + * | o-----------------------o | + * | | | + * \_/ \_/ \_/ + * ATTACHED_NO_MEM <----------------> ATTACHED_MEM + * | | | + * | o----------------------o | + * | | | + * \_/ \_/ \_/ + * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM + * | | + * | | + * -------------> gone <------------- + * + * In more detail. When a VMCI queue pair is first created, it will be in the + * VMCIQPB_NEW state. It will then move into one of the following states: + * + * - VMCIQPB_CREATED_NO_MEM: this state indicates that either: + * + * - the created was performed by a host endpoint, in which case there is + * no backing memory yet. + * + * - the create was initiated by an old-style VMX, that uses + * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at + * a later point in time. This state can be distinguished from the one + * above by the context ID of the creator. A host side is not allowed to + * attach until the page store has been set. + * + * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair + * is created by a VMX using the queue pair device backend that + * sets the UVAs of the queue pair immediately and stores the + * information for later attachers. At this point, it is ready for + * the host side to attach to it. + * + * Once the queue pair is in one of the created states (with the exception of + * the case mentioned for older VMX'en above), it is possible to attach to the + * queue pair. Again we have two new states possible: + * + * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following + * paths: + * + * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue + * pair, and attaches to a queue pair previously created by the host side. + * + * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair + * already created by a guest. + * + * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls + * vmci_qp_broker_set_page_store (see below). + * + * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the + * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will + * bring the queue pair into this state. Once vmci_qp_broker_set_page_store + * is called to register the user memory, the VMCIQPB_ATTACH_MEM state + * will be entered. + * + * From the attached queue pair, the queue pair can enter the shutdown states + * when either side of the queue pair detaches. If the guest side detaches + * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where + * the content of the queue pair will no longer be available. If the host + * side detaches first, the queue pair will either enter the + * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or + * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped + * (e.g., the host detaches while a guest is stunned). + * + * New-style VMX'en will also unmap guest memory, if the guest is + * quiesced, e.g., during a snapshot operation. In that case, the guest + * memory will no longer be available, and the queue pair will transition from + * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more, + * in which case the queue pair will transition from the *_NO_MEM state at that + * point back to the *_MEM state. Note that the *_NO_MEM state may have changed, + * since the peer may have either attached or detached in the meantime. The + * values are laid out such that ++ on a state will move from a *_NO_MEM to a + * *_MEM state, and vice versa. + */ + +/* + * VMCIMemcpy{To,From}QueueFunc() prototypes. Functions of these + * types are passed around to enqueue and dequeue routines. Note that + * often the functions passed are simply wrappers around memcpy + * itself. + * + * Note: In order for the memcpy typedefs to be compatible with the VMKernel, + * there's an unused last parameter for the hosted side. In + * ESX, that parameter holds a buffer type. + */ +typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue, + u64 queue_offset, const void *src, + size_t src_offset, size_t size); +typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset, + const struct vmci_queue *queue, + u64 queue_offset, size_t size); + +/* The Kernel specific component of the struct vmci_queue structure. */ +struct vmci_queue_kern_if { + struct mutex __mutex; /* Protects the queue. */ + struct mutex *mutex; /* Shared by producer and consumer queues. */ + size_t num_pages; /* Number of pages incl. header. */ + bool host; /* Host or guest? */ + union { + struct { + dma_addr_t *pas; + void **vas; + } g; /* Used by the guest. */ + struct { + struct page **page; + struct page **header_page; + } h; /* Used by the host. */ + } u; +}; + +/* + * This structure is opaque to the clients. + */ +struct vmci_qp { + struct vmci_handle handle; + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + u64 produce_q_size; + u64 consume_q_size; + u32 peer; + u32 flags; + u32 priv_flags; + bool guest_endpoint; + unsigned int blocked; + unsigned int generation; + wait_queue_head_t event; +}; + +enum qp_broker_state { + VMCIQPB_NEW, + VMCIQPB_CREATED_NO_MEM, + VMCIQPB_CREATED_MEM, + VMCIQPB_ATTACHED_NO_MEM, + VMCIQPB_ATTACHED_MEM, + VMCIQPB_SHUTDOWN_NO_MEM, + VMCIQPB_SHUTDOWN_MEM, + VMCIQPB_GONE +}; + +#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \ + _qpb->state == VMCIQPB_ATTACHED_MEM || \ + _qpb->state == VMCIQPB_SHUTDOWN_MEM) + +/* + * In the queue pair broker, we always use the guest point of view for + * the produce and consume queue values and references, e.g., the + * produce queue size stored is the guests produce queue size. The + * host endpoint will need to swap these around. The only exception is + * the local queue pairs on the host, in which case the host endpoint + * that creates the queue pair will have the right orientation, and + * the attaching host endpoint will need to swap. + */ +struct qp_entry { + struct list_head list_item; + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u32 ref_count; +}; + +struct qp_broker_entry { + struct vmci_resource resource; + struct qp_entry qp; + u32 create_id; + u32 attach_id; + enum qp_broker_state state; + bool require_trusted_attach; + bool created_by_trusted; + bool vmci_page_files; /* Created by VMX using VMCI page files */ + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + struct vmci_queue_header saved_produce_q; + struct vmci_queue_header saved_consume_q; + vmci_event_release_cb wakeup_cb; + void *client_data; + void *local_mem; /* Kernel memory for local queue pair */ +}; + +struct qp_guest_endpoint { + struct vmci_resource resource; + struct qp_entry qp; + u64 num_ppns; + void *produce_q; + void *consume_q; + struct ppn_set ppn_set; +}; + +struct qp_list { + struct list_head head; + struct mutex mutex; /* Protect queue list. */ +}; + +static struct qp_list qp_broker_list = { + .head = LIST_HEAD_INIT(qp_broker_list.head), + .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex), +}; + +static struct qp_list qp_guest_endpoints = { + .head = LIST_HEAD_INIT(qp_guest_endpoints.head), + .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex), +}; + +#define INVALID_VMCI_GUEST_MEM_ID 0 +#define QPE_NUM_PAGES(_QPE) ((u32) \ + (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \ + DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2)) + + +/* + * Frees kernel VA space for a given queue and its queue header, and + * frees physical data pages. + */ +static void qp_free_queue(void *q, u64 size) +{ + struct vmci_queue *queue = q; + + if (queue) { + u64 i; + + /* Given size does not include header, so add in a page here. */ + for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) { + dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE, + queue->kernel_if->u.g.vas[i], + queue->kernel_if->u.g.pas[i]); + } + + vfree(queue); + } +} + +/* + * Allocates kernel queue pages of specified size with IOMMU mappings, + * plus space for the queue structure/kernel interface and the queue + * header. + */ +static void *qp_alloc_queue(u64 size, u32 flags) +{ + u64 i; + struct vmci_queue *queue; + size_t pas_size; + size_t vas_size; + size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if); + const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + + if (num_pages > + (SIZE_MAX - queue_size) / + (sizeof(*queue->kernel_if->u.g.pas) + + sizeof(*queue->kernel_if->u.g.vas))) + return NULL; + + pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas); + vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas); + queue_size += pas_size + vas_size; + + queue = vmalloc(queue_size); + if (!queue) + return NULL; + + queue->q_header = NULL; + queue->saved_header = NULL; + queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); + queue->kernel_if->mutex = NULL; + queue->kernel_if->num_pages = num_pages; + queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1); + queue->kernel_if->u.g.vas = + (void **)((u8 *)queue->kernel_if->u.g.pas + pas_size); + queue->kernel_if->host = false; + + for (i = 0; i < num_pages; i++) { + queue->kernel_if->u.g.vas[i] = + dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE, + &queue->kernel_if->u.g.pas[i], + GFP_KERNEL); + if (!queue->kernel_if->u.g.vas[i]) { + /* Size excl. the header. */ + qp_free_queue(queue, i * PAGE_SIZE); + return NULL; + } + } + + /* Queue header is the first page. */ + queue->q_header = queue->kernel_if->u.g.vas[0]; + + return queue; +} + +/* + * Copies from a given buffer or iovector to a VMCI Queue. Uses + * kmap()/kunmap() to dynamically map/unmap required portions of the queue + * by traversing the offset -> page translation structure for the queue. + * Assumes that offset + size does not wrap around in the queue. + */ +static int __qp_memcpy_to_queue(struct vmci_queue *queue, + u64 queue_offset, + const void *src, + size_t size, + bool is_iovec) +{ + struct vmci_queue_kern_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + const u64 page_index = + (queue_offset + bytes_copied) / PAGE_SIZE; + const size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + if (kernel_if->host) + va = kmap(kernel_if->u.h.page[page_index]); + else + va = kernel_if->u.g.vas[page_index + 1]; + /* Skip header. */ + + if (size - bytes_copied > PAGE_SIZE - page_offset) + /* Enough payload to fill up from this page. */ + to_copy = PAGE_SIZE - page_offset; + else + to_copy = size - bytes_copied; + + if (is_iovec) { + struct msghdr *msg = (struct msghdr *)src; + int err; + + /* The iovec will track bytes_copied internally. */ + err = memcpy_from_msg((u8 *)va + page_offset, + msg, to_copy); + if (err != 0) { + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + return VMCI_ERROR_INVALID_ARGS; + } + } else { + memcpy((u8 *)va + page_offset, + (u8 *)src + bytes_copied, to_copy); + } + + bytes_copied += to_copy; + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + } + + return VMCI_SUCCESS; +} + +/* + * Copies to a given buffer or iovector from a VMCI Queue. Uses + * kmap()/kunmap() to dynamically map/unmap required portions of the queue + * by traversing the offset -> page translation structure for the queue. + * Assumes that offset + size does not wrap around in the queue. + */ +static int __qp_memcpy_from_queue(void *dest, + const struct vmci_queue *queue, + u64 queue_offset, + size_t size, + bool is_iovec) +{ + struct vmci_queue_kern_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + const u64 page_index = + (queue_offset + bytes_copied) / PAGE_SIZE; + const size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + if (kernel_if->host) + va = kmap(kernel_if->u.h.page[page_index]); + else + va = kernel_if->u.g.vas[page_index + 1]; + /* Skip header. */ + + if (size - bytes_copied > PAGE_SIZE - page_offset) + /* Enough payload to fill up this page. */ + to_copy = PAGE_SIZE - page_offset; + else + to_copy = size - bytes_copied; + + if (is_iovec) { + struct msghdr *msg = dest; + int err; + + /* The iovec will track bytes_copied internally. */ + err = memcpy_to_msg(msg, (u8 *)va + page_offset, + to_copy); + if (err != 0) { + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + return VMCI_ERROR_INVALID_ARGS; + } + } else { + memcpy((u8 *)dest + bytes_copied, + (u8 *)va + page_offset, to_copy); + } + + bytes_copied += to_copy; + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + } + + return VMCI_SUCCESS; +} + +/* + * Allocates two list of PPNs --- one for the pages in the produce queue, + * and the other for the pages in the consume queue. Intializes the list + * of PPNs with the page frame numbers of the KVA for the two queues (and + * the queue headers). + */ +static int qp_alloc_ppn_set(void *prod_q, + u64 num_produce_pages, + void *cons_q, + u64 num_consume_pages, struct ppn_set *ppn_set) +{ + u32 *produce_ppns; + u32 *consume_ppns; + struct vmci_queue *produce_q = prod_q; + struct vmci_queue *consume_q = cons_q; + u64 i; + + if (!produce_q || !num_produce_pages || !consume_q || + !num_consume_pages || !ppn_set) + return VMCI_ERROR_INVALID_ARGS; + + if (ppn_set->initialized) + return VMCI_ERROR_ALREADY_EXISTS; + + produce_ppns = + kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL); + if (!produce_ppns) + return VMCI_ERROR_NO_MEM; + + consume_ppns = + kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL); + if (!consume_ppns) { + kfree(produce_ppns); + return VMCI_ERROR_NO_MEM; + } + + for (i = 0; i < num_produce_pages; i++) { + unsigned long pfn; + + produce_ppns[i] = + produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; + pfn = produce_ppns[i]; + + /* Fail allocation if PFN isn't supported by hypervisor. */ + if (sizeof(pfn) > sizeof(*produce_ppns) + && pfn != produce_ppns[i]) + goto ppn_error; + } + + for (i = 0; i < num_consume_pages; i++) { + unsigned long pfn; + + consume_ppns[i] = + consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; + pfn = consume_ppns[i]; + + /* Fail allocation if PFN isn't supported by hypervisor. */ + if (sizeof(pfn) > sizeof(*consume_ppns) + && pfn != consume_ppns[i]) + goto ppn_error; + } + + ppn_set->num_produce_pages = num_produce_pages; + ppn_set->num_consume_pages = num_consume_pages; + ppn_set->produce_ppns = produce_ppns; + ppn_set->consume_ppns = consume_ppns; + ppn_set->initialized = true; + return VMCI_SUCCESS; + + ppn_error: + kfree(produce_ppns); + kfree(consume_ppns); + return VMCI_ERROR_INVALID_ARGS; +} + +/* + * Frees the two list of PPNs for a queue pair. + */ +static void qp_free_ppn_set(struct ppn_set *ppn_set) +{ + if (ppn_set->initialized) { + /* Do not call these functions on NULL inputs. */ + kfree(ppn_set->produce_ppns); + kfree(ppn_set->consume_ppns); + } + memset(ppn_set, 0, sizeof(*ppn_set)); +} + +/* + * Populates the list of PPNs in the hypercall structure with the PPNS + * of the produce queue and the consume queue. + */ +static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set) +{ + memcpy(call_buf, ppn_set->produce_ppns, + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); + memcpy(call_buf + + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns), + ppn_set->consume_ppns, + ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); + + return VMCI_SUCCESS; +} + +static int qp_memcpy_to_queue(struct vmci_queue *queue, + u64 queue_offset, + const void *src, size_t src_offset, size_t size) +{ + return __qp_memcpy_to_queue(queue, queue_offset, + (u8 *)src + src_offset, size, false); +} + +static int qp_memcpy_from_queue(void *dest, + size_t dest_offset, + const struct vmci_queue *queue, + u64 queue_offset, size_t size) +{ + return __qp_memcpy_from_queue((u8 *)dest + dest_offset, + queue, queue_offset, size, false); +} + +/* + * Copies from a given iovec from a VMCI Queue. + */ +static int qp_memcpy_to_queue_iov(struct vmci_queue *queue, + u64 queue_offset, + const void *msg, + size_t src_offset, size_t size) +{ + + /* + * We ignore src_offset because src is really a struct iovec * and will + * maintain offset internally. + */ + return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true); +} + +/* + * Copies to a given iovec from a VMCI Queue. + */ +static int qp_memcpy_from_queue_iov(void *dest, + size_t dest_offset, + const struct vmci_queue *queue, + u64 queue_offset, size_t size) +{ + /* + * We ignore dest_offset because dest is really a struct iovec * and + * will maintain offset internally. + */ + return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true); +} + +/* + * Allocates kernel VA space of specified size plus space for the queue + * and kernel interface. This is different from the guest queue allocator, + * because we do not allocate our own queue header/data pages here but + * share those of the guest. + */ +static struct vmci_queue *qp_host_alloc_queue(u64 size) +{ + struct vmci_queue *queue; + size_t queue_page_size; + const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); + + if (num_pages > (SIZE_MAX - queue_size) / + sizeof(*queue->kernel_if->u.h.page)) + return NULL; + + queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page); + + queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL); + if (queue) { + queue->q_header = NULL; + queue->saved_header = NULL; + queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); + queue->kernel_if->host = true; + queue->kernel_if->mutex = NULL; + queue->kernel_if->num_pages = num_pages; + queue->kernel_if->u.h.header_page = + (struct page **)((u8 *)queue + queue_size); + queue->kernel_if->u.h.page = + &queue->kernel_if->u.h.header_page[1]; + } + + return queue; +} + +/* + * Frees kernel memory for a given queue (header plus translation + * structure). + */ +static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size) +{ + kfree(queue); +} + +/* + * Initialize the mutex for the pair of queues. This mutex is used to + * protect the q_header and the buffer from changing out from under any + * users of either queue. Of course, it's only any good if the mutexes + * are actually acquired. Queue structure must lie on non-paged memory + * or we cannot guarantee access to the mutex. + */ +static void qp_init_queue_mutex(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + /* + * Only the host queue has shared state - the guest queues do not + * need to synchronize access using a queue mutex. + */ + + if (produce_q->kernel_if->host) { + produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; + consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; + mutex_init(produce_q->kernel_if->mutex); + } +} + +/* + * Cleans up the mutex for the pair of queues. + */ +static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + if (produce_q->kernel_if->host) { + produce_q->kernel_if->mutex = NULL; + consume_q->kernel_if->mutex = NULL; + } +} + +/* + * Acquire the mutex for the queue. Note that the produce_q and + * the consume_q share a mutex. So, only one of the two need to + * be passed in to this routine. Either will work just fine. + */ +static void qp_acquire_queue_mutex(struct vmci_queue *queue) +{ + if (queue->kernel_if->host) + mutex_lock(queue->kernel_if->mutex); +} + +/* + * Release the mutex for the queue. Note that the produce_q and + * the consume_q share a mutex. So, only one of the two need to + * be passed in to this routine. Either will work just fine. + */ +static void qp_release_queue_mutex(struct vmci_queue *queue) +{ + if (queue->kernel_if->host) + mutex_unlock(queue->kernel_if->mutex); +} + +/* + * Helper function to release pages in the PageStoreAttachInfo + * previously obtained using get_user_pages. + */ +static void qp_release_pages(struct page **pages, + u64 num_pages, bool dirty) +{ + int i; + + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty(pages[i]); + + page_cache_release(pages[i]); + pages[i] = NULL; + } +} + +/* + * Lock the user pages referenced by the {produce,consume}Buffer + * struct into memory and populate the {produce,consume}Pages + * arrays in the attach structure with them. + */ +static int qp_host_get_user_memory(u64 produce_uva, + u64 consume_uva, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int retval; + int err = VMCI_SUCCESS; + + retval = get_user_pages_fast((uintptr_t) produce_uva, + produce_q->kernel_if->num_pages, 1, + produce_q->kernel_if->u.h.header_page); + if (retval < produce_q->kernel_if->num_pages) { + pr_debug("get_user_pages_fast(produce) failed (retval=%d)", + retval); + qp_release_pages(produce_q->kernel_if->u.h.header_page, + retval, false); + err = VMCI_ERROR_NO_MEM; + goto out; + } + + retval = get_user_pages_fast((uintptr_t) consume_uva, + consume_q->kernel_if->num_pages, 1, + consume_q->kernel_if->u.h.header_page); + if (retval < consume_q->kernel_if->num_pages) { + pr_debug("get_user_pages_fast(consume) failed (retval=%d)", + retval); + qp_release_pages(consume_q->kernel_if->u.h.header_page, + retval, false); + qp_release_pages(produce_q->kernel_if->u.h.header_page, + produce_q->kernel_if->num_pages, false); + err = VMCI_ERROR_NO_MEM; + } + + out: + return err; +} + +/* + * Registers the specification of the user pages used for backing a queue + * pair. Enough information to map in pages is stored in the OS specific + * part of the struct vmci_queue structure. + */ +static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + u64 produce_uva; + u64 consume_uva; + + /* + * The new style and the old style mapping only differs in + * that we either get a single or two UVAs, so we split the + * single UVA range at the appropriate spot. + */ + produce_uva = page_store->pages; + consume_uva = page_store->pages + + produce_q->kernel_if->num_pages * PAGE_SIZE; + return qp_host_get_user_memory(produce_uva, consume_uva, produce_q, + consume_q); +} + +/* + * Releases and removes the references to user pages stored in the attach + * struct. Pages are released from the page cache and may become + * swappable again. + */ +static void qp_host_unregister_user_memory(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + qp_release_pages(produce_q->kernel_if->u.h.header_page, + produce_q->kernel_if->num_pages, true); + memset(produce_q->kernel_if->u.h.header_page, 0, + sizeof(*produce_q->kernel_if->u.h.header_page) * + produce_q->kernel_if->num_pages); + qp_release_pages(consume_q->kernel_if->u.h.header_page, + consume_q->kernel_if->num_pages, true); + memset(consume_q->kernel_if->u.h.header_page, 0, + sizeof(*consume_q->kernel_if->u.h.header_page) * + consume_q->kernel_if->num_pages); +} + +/* + * Once qp_host_register_user_memory has been performed on a + * queue, the queue pair headers can be mapped into the + * kernel. Once mapped, they must be unmapped with + * qp_host_unmap_queues prior to calling + * qp_host_unregister_user_memory. + * Pages are pinned. + */ +static int qp_host_map_queues(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int result; + + if (!produce_q->q_header || !consume_q->q_header) { + struct page *headers[2]; + + if (produce_q->q_header != consume_q->q_header) + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + + if (produce_q->kernel_if->u.h.header_page == NULL || + *produce_q->kernel_if->u.h.header_page == NULL) + return VMCI_ERROR_UNAVAILABLE; + + headers[0] = *produce_q->kernel_if->u.h.header_page; + headers[1] = *consume_q->kernel_if->u.h.header_page; + + produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL); + if (produce_q->q_header != NULL) { + consume_q->q_header = + (struct vmci_queue_header *)((u8 *) + produce_q->q_header + + PAGE_SIZE); + result = VMCI_SUCCESS; + } else { + pr_warn("vmap failed\n"); + result = VMCI_ERROR_NO_MEM; + } + } else { + result = VMCI_SUCCESS; + } + + return result; +} + +/* + * Unmaps previously mapped queue pair headers from the kernel. + * Pages are unpinned. + */ +static int qp_host_unmap_queues(u32 gid, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + if (produce_q->q_header) { + if (produce_q->q_header < consume_q->q_header) + vunmap(produce_q->q_header); + else + vunmap(consume_q->q_header); + + produce_q->q_header = NULL; + consume_q->q_header = NULL; + } + + return VMCI_SUCCESS; +} + +/* + * Finds the entry in the list corresponding to a given handle. Assumes + * that the list is locked. + */ +static struct qp_entry *qp_list_find(struct qp_list *qp_list, + struct vmci_handle handle) +{ + struct qp_entry *entry; + + if (vmci_handle_is_invalid(handle)) + return NULL; + + list_for_each_entry(entry, &qp_list->head, list_item) { + if (vmci_handle_is_equal(entry->handle, handle)) + return entry; + } + + return NULL; +} + +/* + * Finds the entry in the list corresponding to a given handle. + */ +static struct qp_guest_endpoint * +qp_guest_handle_to_entry(struct vmci_handle handle) +{ + struct qp_guest_endpoint *entry; + struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle); + + entry = qp ? container_of( + qp, struct qp_guest_endpoint, qp) : NULL; + return entry; +} + +/* + * Finds the entry in the list corresponding to a given handle. + */ +static struct qp_broker_entry * +qp_broker_handle_to_entry(struct vmci_handle handle) +{ + struct qp_broker_entry *entry; + struct qp_entry *qp = qp_list_find(&qp_broker_list, handle); + + entry = qp ? container_of( + qp, struct qp_broker_entry, qp) : NULL; + return entry; +} + +/* + * Dispatches a queue pair event message directly into the local event + * queue. + */ +static int qp_notify_peer_local(bool attach, struct vmci_handle handle) +{ + u32 context_id = vmci_get_context_id(); + struct vmci_event_qp ev; + + ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = + attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; + ev.payload.peer_id = context_id; + ev.payload.handle = handle; + + return vmci_event_dispatch(&ev.msg.hdr); +} + +/* + * Allocates and initializes a qp_guest_endpoint structure. + * Allocates a queue_pair rid (and handle) iff the given entry has + * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX + * are reserved handles. Assumes that the QP list mutex is held + * by the caller. + */ +static struct qp_guest_endpoint * +qp_guest_endpoint_create(struct vmci_handle handle, + u32 peer, + u32 flags, + u64 produce_size, + u64 consume_size, + void *produce_q, + void *consume_q) +{ + int result; + struct qp_guest_endpoint *entry; + /* One page each for the queue headers. */ + const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) + + DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2; + + if (vmci_handle_is_invalid(handle)) { + u32 context_id = vmci_get_context_id(); + + handle = vmci_make_handle(context_id, VMCI_INVALID_ID); + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry) { + entry->qp.peer = peer; + entry->qp.flags = flags; + entry->qp.produce_size = produce_size; + entry->qp.consume_size = consume_size; + entry->qp.ref_count = 0; + entry->num_ppns = num_ppns; + entry->produce_q = produce_q; + entry->consume_q = consume_q; + INIT_LIST_HEAD(&entry->qp.list_item); + + /* Add resource obj */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_QPAIR_GUEST, + handle); + entry->qp.handle = vmci_resource_handle(&entry->resource); + if ((result != VMCI_SUCCESS) || + qp_list_find(&qp_guest_endpoints, entry->qp.handle)) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", + handle.context, handle.resource, result); + kfree(entry); + entry = NULL; + } + } + return entry; +} + +/* + * Frees a qp_guest_endpoint structure. + */ +static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry) +{ + qp_free_ppn_set(&entry->ppn_set); + qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); + qp_free_queue(entry->produce_q, entry->qp.produce_size); + qp_free_queue(entry->consume_q, entry->qp.consume_size); + /* Unlink from resource hash table and free callback */ + vmci_resource_remove(&entry->resource); + + kfree(entry); +} + +/* + * Helper to make a queue_pairAlloc hypercall when the driver is + * supporting a guest device. + */ +static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry) +{ + struct vmci_qp_alloc_msg *alloc_msg; + size_t msg_size; + int result; + + if (!entry || entry->num_ppns <= 2) + return VMCI_ERROR_INVALID_ARGS; + + msg_size = sizeof(*alloc_msg) + + (size_t) entry->num_ppns * sizeof(u32); + alloc_msg = kmalloc(msg_size, GFP_KERNEL); + if (!alloc_msg) + return VMCI_ERROR_NO_MEM; + + alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_ALLOC); + alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE; + alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE; + alloc_msg->handle = entry->qp.handle; + alloc_msg->peer = entry->qp.peer; + alloc_msg->flags = entry->qp.flags; + alloc_msg->produce_size = entry->qp.produce_size; + alloc_msg->consume_size = entry->qp.consume_size; + alloc_msg->num_ppns = entry->num_ppns; + + result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg), + &entry->ppn_set); + if (result == VMCI_SUCCESS) + result = vmci_send_datagram(&alloc_msg->hdr); + + kfree(alloc_msg); + + return result; +} + +/* + * Helper to make a queue_pairDetach hypercall when the driver is + * supporting a guest device. + */ +static int qp_detatch_hypercall(struct vmci_handle handle) +{ + struct vmci_qp_detach_msg detach_msg; + + detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_DETACH); + detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + detach_msg.hdr.payload_size = sizeof(handle); + detach_msg.handle = handle; + + return vmci_send_datagram(&detach_msg.hdr); +} + +/* + * Adds the given entry to the list. Assumes that the list is locked. + */ +static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry) +{ + if (entry) + list_add(&entry->list_item, &qp_list->head); +} + +/* + * Removes the given entry from the list. Assumes that the list is locked. + */ +static void qp_list_remove_entry(struct qp_list *qp_list, + struct qp_entry *entry) +{ + if (entry) + list_del(&entry->list_item); +} + +/* + * Helper for VMCI queue_pair detach interface. Frees the physical + * pages for the queue pair. + */ +static int qp_detatch_guest_work(struct vmci_handle handle) +{ + int result; + struct qp_guest_endpoint *entry; + u32 ref_count = ~0; /* To avoid compiler warning below */ + + mutex_lock(&qp_guest_endpoints.mutex); + + entry = qp_guest_handle_to_entry(handle); + if (!entry) { + mutex_unlock(&qp_guest_endpoints.mutex); + return VMCI_ERROR_NOT_FOUND; + } + + if (entry->qp.flags & VMCI_QPFLAG_LOCAL) { + result = VMCI_SUCCESS; + + if (entry->qp.ref_count > 1) { + result = qp_notify_peer_local(false, handle); + /* + * We can fail to notify a local queuepair + * because we can't allocate. We still want + * to release the entry if that happens, so + * don't bail out yet. + */ + } + } else { + result = qp_detatch_hypercall(handle); + if (result < VMCI_SUCCESS) { + /* + * We failed to notify a non-local queuepair. + * That other queuepair might still be + * accessing the shared memory, so don't + * release the entry yet. It will get cleaned + * up by VMCIqueue_pair_Exit() if necessary + * (assuming we are going away, otherwise why + * did this fail?). + */ + + mutex_unlock(&qp_guest_endpoints.mutex); + return result; + } + } + + /* + * If we get here then we either failed to notify a local queuepair, or + * we succeeded in all cases. Release the entry if required. + */ + + entry->qp.ref_count--; + if (entry->qp.ref_count == 0) + qp_list_remove_entry(&qp_guest_endpoints, &entry->qp); + + /* If we didn't remove the entry, this could change once we unlock. */ + if (entry) + ref_count = entry->qp.ref_count; + + mutex_unlock(&qp_guest_endpoints.mutex); + + if (ref_count == 0) + qp_guest_endpoint_destroy(entry); + + return result; +} + +/* + * This functions handles the actual allocation of a VMCI queue + * pair guest endpoint. Allocates physical pages for the queue + * pair. It makes OS dependent calls through generic wrappers. + */ +static int qp_alloc_guest_work(struct vmci_handle *handle, + struct vmci_queue **produce_q, + u64 produce_size, + struct vmci_queue **consume_q, + u64 consume_size, + u32 peer, + u32 flags, + u32 priv_flags) +{ + const u64 num_produce_pages = + DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1; + const u64 num_consume_pages = + DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1; + void *my_produce_q = NULL; + void *my_consume_q = NULL; + int result; + struct qp_guest_endpoint *queue_pair_entry = NULL; + + if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS) + return VMCI_ERROR_NO_ACCESS; + + mutex_lock(&qp_guest_endpoints.mutex); + + queue_pair_entry = qp_guest_handle_to_entry(*handle); + if (queue_pair_entry) { + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { + /* Local attach case. */ + if (queue_pair_entry->qp.ref_count > 1) { + pr_devel("Error attempting to attach more than once\n"); + result = VMCI_ERROR_UNAVAILABLE; + goto error_keep_entry; + } + + if (queue_pair_entry->qp.produce_size != consume_size || + queue_pair_entry->qp.consume_size != + produce_size || + queue_pair_entry->qp.flags != + (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) { + pr_devel("Error mismatched queue pair in local attach\n"); + result = VMCI_ERROR_QUEUEPAIR_MISMATCH; + goto error_keep_entry; + } + + /* + * Do a local attach. We swap the consume and + * produce queues for the attacher and deliver + * an attach event. + */ + result = qp_notify_peer_local(true, *handle); + if (result < VMCI_SUCCESS) + goto error_keep_entry; + + my_produce_q = queue_pair_entry->consume_q; + my_consume_q = queue_pair_entry->produce_q; + goto out; + } + + result = VMCI_ERROR_ALREADY_EXISTS; + goto error_keep_entry; + } + + my_produce_q = qp_alloc_queue(produce_size, flags); + if (!my_produce_q) { + pr_warn("Error allocating pages for produce queue\n"); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + my_consume_q = qp_alloc_queue(consume_size, flags); + if (!my_consume_q) { + pr_warn("Error allocating pages for consume queue\n"); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags, + produce_size, consume_size, + my_produce_q, my_consume_q); + if (!queue_pair_entry) { + pr_warn("Error allocating memory in %s\n", __func__); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q, + num_consume_pages, + &queue_pair_entry->ppn_set); + if (result < VMCI_SUCCESS) { + pr_warn("qp_alloc_ppn_set failed\n"); + goto error; + } + + /* + * It's only necessary to notify the host if this queue pair will be + * attached to from another context. + */ + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { + /* Local create case. */ + u32 context_id = vmci_get_context_id(); + + /* + * Enforce similar checks on local queue pairs as we + * do for regular ones. The handle's context must + * match the creator or attacher context id (here they + * are both the current context id) and the + * attach-only flag cannot exist during create. We + * also ensure specified peer is this context or an + * invalid one. + */ + if (queue_pair_entry->qp.handle.context != context_id || + (queue_pair_entry->qp.peer != VMCI_INVALID_ID && + queue_pair_entry->qp.peer != context_id)) { + result = VMCI_ERROR_NO_ACCESS; + goto error; + } + + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) { + result = VMCI_ERROR_NOT_FOUND; + goto error; + } + } else { + result = qp_alloc_hypercall(queue_pair_entry); + if (result < VMCI_SUCCESS) { + pr_warn("qp_alloc_hypercall result = %d\n", result); + goto error; + } + } + + qp_init_queue_mutex((struct vmci_queue *)my_produce_q, + (struct vmci_queue *)my_consume_q); + + qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp); + + out: + queue_pair_entry->qp.ref_count++; + *handle = queue_pair_entry->qp.handle; + *produce_q = (struct vmci_queue *)my_produce_q; + *consume_q = (struct vmci_queue *)my_consume_q; + + /* + * We should initialize the queue pair header pages on a local + * queue pair create. For non-local queue pairs, the + * hypervisor initializes the header pages in the create step. + */ + if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) && + queue_pair_entry->qp.ref_count == 1) { + vmci_q_header_init((*produce_q)->q_header, *handle); + vmci_q_header_init((*consume_q)->q_header, *handle); + } + + mutex_unlock(&qp_guest_endpoints.mutex); + + return VMCI_SUCCESS; + + error: + mutex_unlock(&qp_guest_endpoints.mutex); + if (queue_pair_entry) { + /* The queues will be freed inside the destroy routine. */ + qp_guest_endpoint_destroy(queue_pair_entry); + } else { + qp_free_queue(my_produce_q, produce_size); + qp_free_queue(my_consume_q, consume_size); + } + return result; + + error_keep_entry: + /* This path should only be used when an existing entry was found. */ + mutex_unlock(&qp_guest_endpoints.mutex); + return result; +} + +/* + * The first endpoint issuing a queue pair allocation will create the state + * of the queue pair in the queue pair broker. + * + * If the creator is a guest, it will associate a VMX virtual address range + * with the queue pair as specified by the page_store. For compatibility with + * older VMX'en, that would use a separate step to set the VMX virtual + * address range, the virtual address range can be registered later using + * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be + * used. + * + * If the creator is the host, a page_store of NULL should be used as well, + * since the host is not able to supply a page store for the queue pair. + * + * For older VMX and host callers, the queue pair will be created in the + * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be + * created in VMCOQPB_CREATED_MEM state. + */ +static int qp_broker_create(struct vmci_handle handle, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context, + vmci_event_release_cb wakeup_cb, + void *client_data, struct qp_broker_entry **ent) +{ + struct qp_broker_entry *entry = NULL; + const u32 context_id = vmci_ctx_get_id(context); + bool is_local = flags & VMCI_QPFLAG_LOCAL; + int result; + u64 guest_produce_size; + u64 guest_consume_size; + + /* Do not create if the caller asked not to. */ + if (flags & VMCI_QPFLAG_ATTACH_ONLY) + return VMCI_ERROR_NOT_FOUND; + + /* + * Creator's context ID should match handle's context ID or the creator + * must allow the context in handle's context ID as the "peer". + */ + if (handle.context != context_id && handle.context != peer) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer)) + return VMCI_ERROR_DST_UNREACHABLE; + + /* + * Creator's context ID for local queue pairs should match the + * peer, if a peer is specified. + */ + if (is_local && peer != VMCI_INVALID_ID && context_id != peer) + return VMCI_ERROR_NO_ACCESS; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return VMCI_ERROR_NO_MEM; + + if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) { + /* + * The queue pair broker entry stores values from the guest + * point of view, so a creating host side endpoint should swap + * produce and consume values -- unless it is a local queue + * pair, in which case no swapping is necessary, since the local + * attacher will swap queues. + */ + + guest_produce_size = consume_size; + guest_consume_size = produce_size; + } else { + guest_produce_size = produce_size; + guest_consume_size = consume_size; + } + + entry->qp.handle = handle; + entry->qp.peer = peer; + entry->qp.flags = flags; + entry->qp.produce_size = guest_produce_size; + entry->qp.consume_size = guest_consume_size; + entry->qp.ref_count = 1; + entry->create_id = context_id; + entry->attach_id = VMCI_INVALID_ID; + entry->state = VMCIQPB_NEW; + entry->require_trusted_attach = + !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED); + entry->created_by_trusted = + !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED); + entry->vmci_page_files = false; + entry->wakeup_cb = wakeup_cb; + entry->client_data = client_data; + entry->produce_q = qp_host_alloc_queue(guest_produce_size); + if (entry->produce_q == NULL) { + result = VMCI_ERROR_NO_MEM; + goto error; + } + entry->consume_q = qp_host_alloc_queue(guest_consume_size); + if (entry->consume_q == NULL) { + result = VMCI_ERROR_NO_MEM; + goto error; + } + + qp_init_queue_mutex(entry->produce_q, entry->consume_q); + + INIT_LIST_HEAD(&entry->qp.list_item); + + if (is_local) { + u8 *tmp; + + entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp), + PAGE_SIZE, GFP_KERNEL); + if (entry->local_mem == NULL) { + result = VMCI_ERROR_NO_MEM; + goto error; + } + entry->state = VMCIQPB_CREATED_MEM; + entry->produce_q->q_header = entry->local_mem; + tmp = (u8 *)entry->local_mem + PAGE_SIZE * + (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1); + entry->consume_q->q_header = (struct vmci_queue_header *)tmp; + } else if (page_store) { + /* + * The VMX already initialized the queue pair headers, so no + * need for the kernel side to do that. + */ + result = qp_host_register_user_memory(page_store, + entry->produce_q, + entry->consume_q); + if (result < VMCI_SUCCESS) + goto error; + + entry->state = VMCIQPB_CREATED_MEM; + } else { + /* + * A create without a page_store may be either a host + * side create (in which case we are waiting for the + * guest side to supply the memory) or an old style + * queue pair create (in which case we will expect a + * set page store call as the next step). + */ + entry->state = VMCIQPB_CREATED_NO_MEM; + } + + qp_list_add_entry(&qp_broker_list, &entry->qp); + if (ent != NULL) + *ent = entry; + + /* Add to resource obj */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_QPAIR_HOST, + handle); + if (result != VMCI_SUCCESS) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", + handle.context, handle.resource, result); + goto error; + } + + entry->qp.handle = vmci_resource_handle(&entry->resource); + if (is_local) { + vmci_q_header_init(entry->produce_q->q_header, + entry->qp.handle); + vmci_q_header_init(entry->consume_q->q_header, + entry->qp.handle); + } + + vmci_ctx_qp_create(context, entry->qp.handle); + + return VMCI_SUCCESS; + + error: + if (entry != NULL) { + qp_host_free_queue(entry->produce_q, guest_produce_size); + qp_host_free_queue(entry->consume_q, guest_consume_size); + kfree(entry); + } + + return result; +} + +/* + * Enqueues an event datagram to notify the peer VM attached to + * the given queue pair handle about attach/detach event by the + * given VM. Returns Payload size of datagram enqueued on + * success, error code otherwise. + */ +static int qp_notify_peer(bool attach, + struct vmci_handle handle, + u32 my_id, + u32 peer_id) +{ + int rv; + struct vmci_event_qp ev; + + if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID || + peer_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + /* + * In vmci_ctx_enqueue_datagram() we enforce the upper limit on + * number of pending events from the hypervisor to a given VM + * otherwise a rogue VM could do an arbitrary number of attach + * and detach operations causing memory pressure in the host + * kernel. + */ + + ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = attach ? + VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; + ev.payload.handle = handle; + ev.payload.peer_id = my_id; + + rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID, + &ev.msg.hdr, false); + if (rv < VMCI_SUCCESS) + pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n", + attach ? "ATTACH" : "DETACH", peer_id); + + return rv; +} + +/* + * The second endpoint issuing a queue pair allocation will attach to + * the queue pair registered with the queue pair broker. + * + * If the attacher is a guest, it will associate a VMX virtual address + * range with the queue pair as specified by the page_store. At this + * point, the already attach host endpoint may start using the queue + * pair, and an attach event is sent to it. For compatibility with + * older VMX'en, that used a separate step to set the VMX virtual + * address range, the virtual address range can be registered later + * using vmci_qp_broker_set_page_store. In that case, a page_store of + * NULL should be used, and the attach event will be generated once + * the actual page store has been set. + * + * If the attacher is the host, a page_store of NULL should be used as + * well, since the page store information is already set by the guest. + * + * For new VMX and host callers, the queue pair will be moved to the + * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be + * moved to the VMCOQPB_ATTACHED_NO_MEM state. + */ +static int qp_broker_attach(struct qp_broker_entry *entry, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context, + vmci_event_release_cb wakeup_cb, + void *client_data, + struct qp_broker_entry **ent) +{ + const u32 context_id = vmci_ctx_get_id(context); + bool is_local = flags & VMCI_QPFLAG_LOCAL; + int result; + + if (entry->state != VMCIQPB_CREATED_NO_MEM && + entry->state != VMCIQPB_CREATED_MEM) + return VMCI_ERROR_UNAVAILABLE; + + if (is_local) { + if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) || + context_id != entry->create_id) { + return VMCI_ERROR_INVALID_ARGS; + } + } else if (context_id == entry->create_id || + context_id == entry->attach_id) { + return VMCI_ERROR_ALREADY_EXISTS; + } + + if (VMCI_CONTEXT_IS_VM(context_id) && + VMCI_CONTEXT_IS_VM(entry->create_id)) + return VMCI_ERROR_DST_UNREACHABLE; + + /* + * If we are attaching from a restricted context then the queuepair + * must have been created by a trusted endpoint. + */ + if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) && + !entry->created_by_trusted) + return VMCI_ERROR_NO_ACCESS; + + /* + * If we are attaching to a queuepair that was created by a restricted + * context then we must be trusted. + */ + if (entry->require_trusted_attach && + (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED))) + return VMCI_ERROR_NO_ACCESS; + + /* + * If the creator specifies VMCI_INVALID_ID in "peer" field, access + * control check is not performed. + */ + if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id) + return VMCI_ERROR_NO_ACCESS; + + if (entry->create_id == VMCI_HOST_CONTEXT_ID) { + /* + * Do not attach if the caller doesn't support Host Queue Pairs + * and a host created this queue pair. + */ + + if (!vmci_ctx_supports_host_qp(context)) + return VMCI_ERROR_INVALID_RESOURCE; + + } else if (context_id == VMCI_HOST_CONTEXT_ID) { + struct vmci_ctx *create_context; + bool supports_host_qp; + + /* + * Do not attach a host to a user created queue pair if that + * user doesn't support host queue pair end points. + */ + + create_context = vmci_ctx_get(entry->create_id); + supports_host_qp = vmci_ctx_supports_host_qp(create_context); + vmci_ctx_put(create_context); + + if (!supports_host_qp) + return VMCI_ERROR_INVALID_RESOURCE; + } + + if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER)) + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + + if (context_id != VMCI_HOST_CONTEXT_ID) { + /* + * The queue pair broker entry stores values from the guest + * point of view, so an attaching guest should match the values + * stored in the entry. + */ + + if (entry->qp.produce_size != produce_size || + entry->qp.consume_size != consume_size) { + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + } + } else if (entry->qp.produce_size != consume_size || + entry->qp.consume_size != produce_size) { + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + } + + if (context_id != VMCI_HOST_CONTEXT_ID) { + /* + * If a guest attached to a queue pair, it will supply + * the backing memory. If this is a pre NOVMVM vmx, + * the backing memory will be supplied by calling + * vmci_qp_broker_set_page_store() following the + * return of the vmci_qp_broker_alloc() call. If it is + * a vmx of version NOVMVM or later, the page store + * must be supplied as part of the + * vmci_qp_broker_alloc call. Under all circumstances + * must the initially created queue pair not have any + * memory associated with it already. + */ + + if (entry->state != VMCIQPB_CREATED_NO_MEM) + return VMCI_ERROR_INVALID_ARGS; + + if (page_store != NULL) { + /* + * Patch up host state to point to guest + * supplied memory. The VMX already + * initialized the queue pair headers, so no + * need for the kernel side to do that. + */ + + result = qp_host_register_user_memory(page_store, + entry->produce_q, + entry->consume_q); + if (result < VMCI_SUCCESS) + return result; + + entry->state = VMCIQPB_ATTACHED_MEM; + } else { + entry->state = VMCIQPB_ATTACHED_NO_MEM; + } + } else if (entry->state == VMCIQPB_CREATED_NO_MEM) { + /* + * The host side is attempting to attach to a queue + * pair that doesn't have any memory associated with + * it. This must be a pre NOVMVM vmx that hasn't set + * the page store information yet, or a quiesced VM. + */ + + return VMCI_ERROR_UNAVAILABLE; + } else { + /* The host side has successfully attached to a queue pair. */ + entry->state = VMCIQPB_ATTACHED_MEM; + } + + if (entry->state == VMCIQPB_ATTACHED_MEM) { + result = + qp_notify_peer(true, entry->qp.handle, context_id, + entry->create_id); + if (result < VMCI_SUCCESS) + pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n", + entry->create_id, entry->qp.handle.context, + entry->qp.handle.resource); + } + + entry->attach_id = context_id; + entry->qp.ref_count++; + if (wakeup_cb) { + entry->wakeup_cb = wakeup_cb; + entry->client_data = client_data; + } + + /* + * When attaching to local queue pairs, the context already has + * an entry tracking the queue pair, so don't add another one. + */ + if (!is_local) + vmci_ctx_qp_create(context, entry->qp.handle); + + if (ent != NULL) + *ent = entry; + + return VMCI_SUCCESS; +} + +/* + * queue_pair_Alloc for use when setting up queue pair endpoints + * on the host. + */ +static int qp_broker_alloc(struct vmci_handle handle, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context, + vmci_event_release_cb wakeup_cb, + void *client_data, + struct qp_broker_entry **ent, + bool *swap) +{ + const u32 context_id = vmci_ctx_get_id(context); + bool create; + struct qp_broker_entry *entry = NULL; + bool is_local = flags & VMCI_QPFLAG_LOCAL; + int result; + + if (vmci_handle_is_invalid(handle) || + (flags & ~VMCI_QP_ALL_FLAGS) || is_local || + !(produce_size || consume_size) || + !context || context_id == VMCI_INVALID_ID || + handle.context == VMCI_INVALID_ID) { + return VMCI_ERROR_INVALID_ARGS; + } + + if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store)) + return VMCI_ERROR_INVALID_ARGS; + + /* + * In the initial argument check, we ensure that non-vmkernel hosts + * are not allowed to create local queue pairs. + */ + + mutex_lock(&qp_broker_list.mutex); + + if (!is_local && vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + mutex_unlock(&qp_broker_list.mutex); + return VMCI_ERROR_ALREADY_EXISTS; + } + + if (handle.resource != VMCI_INVALID_ID) + entry = qp_broker_handle_to_entry(handle); + + if (!entry) { + create = true; + result = + qp_broker_create(handle, peer, flags, priv_flags, + produce_size, consume_size, page_store, + context, wakeup_cb, client_data, ent); + } else { + create = false; + result = + qp_broker_attach(entry, peer, flags, priv_flags, + produce_size, consume_size, page_store, + context, wakeup_cb, client_data, ent); + } + + mutex_unlock(&qp_broker_list.mutex); + + if (swap) + *swap = (context_id == VMCI_HOST_CONTEXT_ID) && + !(create && is_local); + + return result; +} + +/* + * This function implements the kernel API for allocating a queue + * pair. + */ +static int qp_alloc_host_work(struct vmci_handle *handle, + struct vmci_queue **produce_q, + u64 produce_size, + struct vmci_queue **consume_q, + u64 consume_size, + u32 peer, + u32 flags, + u32 priv_flags, + vmci_event_release_cb wakeup_cb, + void *client_data) +{ + struct vmci_handle new_handle; + struct vmci_ctx *context; + struct qp_broker_entry *entry; + int result; + bool swap; + + if (vmci_handle_is_invalid(*handle)) { + new_handle = vmci_make_handle( + VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID); + } else + new_handle = *handle; + + context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID); + entry = NULL; + result = + qp_broker_alloc(new_handle, peer, flags, priv_flags, + produce_size, consume_size, NULL, context, + wakeup_cb, client_data, &entry, &swap); + if (result == VMCI_SUCCESS) { + if (swap) { + /* + * If this is a local queue pair, the attacher + * will swap around produce and consume + * queues. + */ + + *produce_q = entry->consume_q; + *consume_q = entry->produce_q; + } else { + *produce_q = entry->produce_q; + *consume_q = entry->consume_q; + } + + *handle = vmci_resource_handle(&entry->resource); + } else { + *handle = VMCI_INVALID_HANDLE; + pr_devel("queue pair broker failed to alloc (result=%d)\n", + result); + } + vmci_ctx_put(context); + return result; +} + +/* + * Allocates a VMCI queue_pair. Only checks validity of input + * arguments. The real work is done in the host or guest + * specific function. + */ +int vmci_qp_alloc(struct vmci_handle *handle, + struct vmci_queue **produce_q, + u64 produce_size, + struct vmci_queue **consume_q, + u64 consume_size, + u32 peer, + u32 flags, + u32 priv_flags, + bool guest_endpoint, + vmci_event_release_cb wakeup_cb, + void *client_data) +{ + if (!handle || !produce_q || !consume_q || + (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS)) + return VMCI_ERROR_INVALID_ARGS; + + if (guest_endpoint) { + return qp_alloc_guest_work(handle, produce_q, + produce_size, consume_q, + consume_size, peer, + flags, priv_flags); + } else { + return qp_alloc_host_work(handle, produce_q, + produce_size, consume_q, + consume_size, peer, flags, + priv_flags, wakeup_cb, client_data); + } +} + +/* + * This function implements the host kernel API for detaching from + * a queue pair. + */ +static int qp_detatch_host_work(struct vmci_handle handle) +{ + int result; + struct vmci_ctx *context; + + context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID); + + result = vmci_qp_broker_detach(handle, context); + + vmci_ctx_put(context); + return result; +} + +/* + * Detaches from a VMCI queue_pair. Only checks validity of input argument. + * Real work is done in the host or guest specific function. + */ +static int qp_detatch(struct vmci_handle handle, bool guest_endpoint) +{ + if (vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + if (guest_endpoint) + return qp_detatch_guest_work(handle); + else + return qp_detatch_host_work(handle); +} + +/* + * Returns the entry from the head of the list. Assumes that the list is + * locked. + */ +static struct qp_entry *qp_list_get_head(struct qp_list *qp_list) +{ + if (!list_empty(&qp_list->head)) { + struct qp_entry *entry = + list_first_entry(&qp_list->head, struct qp_entry, + list_item); + return entry; + } + + return NULL; +} + +void vmci_qp_broker_exit(void) +{ + struct qp_entry *entry; + struct qp_broker_entry *be; + + mutex_lock(&qp_broker_list.mutex); + + while ((entry = qp_list_get_head(&qp_broker_list))) { + be = (struct qp_broker_entry *)entry; + + qp_list_remove_entry(&qp_broker_list, entry); + kfree(be); + } + + mutex_unlock(&qp_broker_list.mutex); +} + +/* + * Requests that a queue pair be allocated with the VMCI queue + * pair broker. Allocates a queue pair entry if one does not + * exist. Attaches to one if it exists, and retrieves the page + * files backing that queue_pair. Assumes that the queue pair + * broker lock is held. + */ +int vmci_qp_broker_alloc(struct vmci_handle handle, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context) +{ + return qp_broker_alloc(handle, peer, flags, priv_flags, + produce_size, consume_size, + page_store, context, NULL, NULL, NULL, NULL); +} + +/* + * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate + * step to add the UVAs of the VMX mapping of the queue pair. This function + * provides backwards compatibility with such VMX'en, and takes care of + * registering the page store for a queue pair previously allocated by the + * VMX during create or attach. This function will move the queue pair state + * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or + * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the + * attached state with memory, the queue pair is ready to be used by the + * host peer, and an attached event will be generated. + * + * Assumes that the queue pair broker lock is held. + * + * This function is only used by the hosted platform, since there is no + * issue with backwards compatibility for vmkernel. + */ +int vmci_qp_broker_set_page_store(struct vmci_handle handle, + u64 produce_uva, + u64 consume_uva, + struct vmci_ctx *context) +{ + struct qp_broker_entry *entry; + int result; + const u32 context_id = vmci_ctx_get_id(context); + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + /* + * We only support guest to host queue pairs, so the VMX must + * supply UVAs for the mapped page files. + */ + + if (produce_uva == 0 || consume_uva == 0) + return VMCI_ERROR_INVALID_ARGS; + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + /* + * If I'm the owner then I can set the page store. + * + * Or, if a host created the queue_pair and I'm the attached peer + * then I can set the page store. + */ + if (entry->create_id != context_id && + (entry->create_id != VMCI_HOST_CONTEXT_ID || + entry->attach_id != context_id)) { + result = VMCI_ERROR_QUEUEPAIR_NOTOWNER; + goto out; + } + + if (entry->state != VMCIQPB_CREATED_NO_MEM && + entry->state != VMCIQPB_ATTACHED_NO_MEM) { + result = VMCI_ERROR_UNAVAILABLE; + goto out; + } + + result = qp_host_get_user_memory(produce_uva, consume_uva, + entry->produce_q, entry->consume_q); + if (result < VMCI_SUCCESS) + goto out; + + result = qp_host_map_queues(entry->produce_q, entry->consume_q); + if (result < VMCI_SUCCESS) { + qp_host_unregister_user_memory(entry->produce_q, + entry->consume_q); + goto out; + } + + if (entry->state == VMCIQPB_CREATED_NO_MEM) + entry->state = VMCIQPB_CREATED_MEM; + else + entry->state = VMCIQPB_ATTACHED_MEM; + + entry->vmci_page_files = true; + + if (entry->state == VMCIQPB_ATTACHED_MEM) { + result = + qp_notify_peer(true, handle, context_id, entry->create_id); + if (result < VMCI_SUCCESS) { + pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n", + entry->create_id, entry->qp.handle.context, + entry->qp.handle.resource); + } + } + + result = VMCI_SUCCESS; + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Resets saved queue headers for the given QP broker + * entry. Should be used when guest memory becomes available + * again, or the guest detaches. + */ +static void qp_reset_saved_headers(struct qp_broker_entry *entry) +{ + entry->produce_q->saved_header = NULL; + entry->consume_q->saved_header = NULL; +} + +/* + * The main entry point for detaching from a queue pair registered with the + * queue pair broker. If more than one endpoint is attached to the queue + * pair, the first endpoint will mainly decrement a reference count and + * generate a notification to its peer. The last endpoint will clean up + * the queue pair state registered with the broker. + * + * When a guest endpoint detaches, it will unmap and unregister the guest + * memory backing the queue pair. If the host is still attached, it will + * no longer be able to access the queue pair content. + * + * If the queue pair is already in a state where there is no memory + * registered for the queue pair (any *_NO_MEM state), it will transition to + * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest + * endpoint is the first of two endpoints to detach. If the host endpoint is + * the first out of two to detach, the queue pair will move to the + * VMCIQPB_SHUTDOWN_MEM state. + */ +int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context) +{ + struct qp_broker_entry *entry; + const u32 context_id = vmci_ctx_get_id(context); + u32 peer_id; + bool is_local = false; + int result; + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) { + return VMCI_ERROR_INVALID_ARGS; + } + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + if (context_id != entry->create_id && context_id != entry->attach_id) { + result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + goto out; + } + + if (context_id == entry->create_id) { + peer_id = entry->attach_id; + entry->create_id = VMCI_INVALID_ID; + } else { + peer_id = entry->create_id; + entry->attach_id = VMCI_INVALID_ID; + } + entry->qp.ref_count--; + + is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL; + + if (context_id != VMCI_HOST_CONTEXT_ID) { + bool headers_mapped; + + /* + * Pre NOVMVM vmx'en may detach from a queue pair + * before setting the page store, and in that case + * there is no user memory to detach from. Also, more + * recent VMX'en may detach from a queue pair in the + * quiesced state. + */ + + qp_acquire_queue_mutex(entry->produce_q); + headers_mapped = entry->produce_q->q_header || + entry->consume_q->q_header; + if (QPBROKERSTATE_HAS_MEM(entry)) { + result = + qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID, + entry->produce_q, + entry->consume_q); + if (result < VMCI_SUCCESS) + pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n", + handle.context, handle.resource, + result); + + if (entry->vmci_page_files) + qp_host_unregister_user_memory(entry->produce_q, + entry-> + consume_q); + else + qp_host_unregister_user_memory(entry->produce_q, + entry-> + consume_q); + + } + + if (!headers_mapped) + qp_reset_saved_headers(entry); + + qp_release_queue_mutex(entry->produce_q); + + if (!headers_mapped && entry->wakeup_cb) + entry->wakeup_cb(entry->client_data); + + } else { + if (entry->wakeup_cb) { + entry->wakeup_cb = NULL; + entry->client_data = NULL; + } + } + + if (entry->qp.ref_count == 0) { + qp_list_remove_entry(&qp_broker_list, &entry->qp); + + if (is_local) + kfree(entry->local_mem); + + qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); + qp_host_free_queue(entry->produce_q, entry->qp.produce_size); + qp_host_free_queue(entry->consume_q, entry->qp.consume_size); + /* Unlink from resource hash table and free callback */ + vmci_resource_remove(&entry->resource); + + kfree(entry); + + vmci_ctx_qp_destroy(context, handle); + } else { + qp_notify_peer(false, handle, context_id, peer_id); + if (context_id == VMCI_HOST_CONTEXT_ID && + QPBROKERSTATE_HAS_MEM(entry)) { + entry->state = VMCIQPB_SHUTDOWN_MEM; + } else { + entry->state = VMCIQPB_SHUTDOWN_NO_MEM; + } + + if (!is_local) + vmci_ctx_qp_destroy(context, handle); + + } + result = VMCI_SUCCESS; + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Establishes the necessary mappings for a queue pair given a + * reference to the queue pair guest memory. This is usually + * called when a guest is unquiesced and the VMX is allowed to + * map guest memory once again. + */ +int vmci_qp_broker_map(struct vmci_handle handle, + struct vmci_ctx *context, + u64 guest_mem) +{ + struct qp_broker_entry *entry; + const u32 context_id = vmci_ctx_get_id(context); + bool is_local = false; + int result; + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + if (context_id != entry->create_id && context_id != entry->attach_id) { + result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + goto out; + } + + is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL; + result = VMCI_SUCCESS; + + if (context_id != VMCI_HOST_CONTEXT_ID) { + struct vmci_qp_page_store page_store; + + page_store.pages = guest_mem; + page_store.len = QPE_NUM_PAGES(entry->qp); + + qp_acquire_queue_mutex(entry->produce_q); + qp_reset_saved_headers(entry); + result = + qp_host_register_user_memory(&page_store, + entry->produce_q, + entry->consume_q); + qp_release_queue_mutex(entry->produce_q); + if (result == VMCI_SUCCESS) { + /* Move state from *_NO_MEM to *_MEM */ + + entry->state++; + + if (entry->wakeup_cb) + entry->wakeup_cb(entry->client_data); + } + } + + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Saves a snapshot of the queue headers for the given QP broker + * entry. Should be used when guest memory is unmapped. + * Results: + * VMCI_SUCCESS on success, appropriate error code if guest memory + * can't be accessed.. + */ +static int qp_save_headers(struct qp_broker_entry *entry) +{ + int result; + + if (entry->produce_q->saved_header != NULL && + entry->consume_q->saved_header != NULL) { + /* + * If the headers have already been saved, we don't need to do + * it again, and we don't want to map in the headers + * unnecessarily. + */ + + return VMCI_SUCCESS; + } + + if (NULL == entry->produce_q->q_header || + NULL == entry->consume_q->q_header) { + result = qp_host_map_queues(entry->produce_q, entry->consume_q); + if (result < VMCI_SUCCESS) + return result; + } + + memcpy(&entry->saved_produce_q, entry->produce_q->q_header, + sizeof(entry->saved_produce_q)); + entry->produce_q->saved_header = &entry->saved_produce_q; + memcpy(&entry->saved_consume_q, entry->consume_q->q_header, + sizeof(entry->saved_consume_q)); + entry->consume_q->saved_header = &entry->saved_consume_q; + + return VMCI_SUCCESS; +} + +/* + * Removes all references to the guest memory of a given queue pair, and + * will move the queue pair from state *_MEM to *_NO_MEM. It is usually + * called when a VM is being quiesced where access to guest memory should + * avoided. + */ +int vmci_qp_broker_unmap(struct vmci_handle handle, + struct vmci_ctx *context, + u32 gid) +{ + struct qp_broker_entry *entry; + const u32 context_id = vmci_ctx_get_id(context); + bool is_local = false; + int result; + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + if (context_id != entry->create_id && context_id != entry->attach_id) { + result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + goto out; + } + + is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL; + + if (context_id != VMCI_HOST_CONTEXT_ID) { + qp_acquire_queue_mutex(entry->produce_q); + result = qp_save_headers(entry); + if (result < VMCI_SUCCESS) + pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n", + handle.context, handle.resource, result); + + qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q); + + /* + * On hosted, when we unmap queue pairs, the VMX will also + * unmap the guest memory, so we invalidate the previously + * registered memory. If the queue pair is mapped again at a + * later point in time, we will need to reregister the user + * memory with a possibly new user VA. + */ + qp_host_unregister_user_memory(entry->produce_q, + entry->consume_q); + + /* + * Move state from *_MEM to *_NO_MEM. + */ + entry->state--; + + qp_release_queue_mutex(entry->produce_q); + } + + result = VMCI_SUCCESS; + + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Destroys all guest queue pair endpoints. If active guest queue + * pairs still exist, hypercalls to attempt detach from these + * queue pairs will be made. Any failure to detach is silently + * ignored. + */ +void vmci_qp_guest_endpoints_exit(void) +{ + struct qp_entry *entry; + struct qp_guest_endpoint *ep; + + mutex_lock(&qp_guest_endpoints.mutex); + + while ((entry = qp_list_get_head(&qp_guest_endpoints))) { + ep = (struct qp_guest_endpoint *)entry; + + /* Don't make a hypercall for local queue_pairs. */ + if (!(entry->flags & VMCI_QPFLAG_LOCAL)) + qp_detatch_hypercall(entry->handle); + + /* We cannot fail the exit, so let's reset ref_count. */ + entry->ref_count = 0; + qp_list_remove_entry(&qp_guest_endpoints, entry); + + qp_guest_endpoint_destroy(ep); + } + + mutex_unlock(&qp_guest_endpoints.mutex); +} + +/* + * Helper routine that will lock the queue pair before subsequent + * operations. + * Note: Non-blocking on the host side is currently only implemented in ESX. + * Since non-blocking isn't yet implemented on the host personality we + * have no reason to acquire a spin lock. So to avoid the use of an + * unnecessary lock only acquire the mutex if we can block. + */ +static void qp_lock(const struct vmci_qp *qpair) +{ + qp_acquire_queue_mutex(qpair->produce_q); +} + +/* + * Helper routine that unlocks the queue pair after calling + * qp_lock. + */ +static void qp_unlock(const struct vmci_qp *qpair) +{ + qp_release_queue_mutex(qpair->produce_q); +} + +/* + * The queue headers may not be mapped at all times. If a queue is + * currently not mapped, it will be attempted to do so. + */ +static int qp_map_queue_headers(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int result; + + if (NULL == produce_q->q_header || NULL == consume_q->q_header) { + result = qp_host_map_queues(produce_q, consume_q); + if (result < VMCI_SUCCESS) + return (produce_q->saved_header && + consume_q->saved_header) ? + VMCI_ERROR_QUEUEPAIR_NOT_READY : + VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + } + + return VMCI_SUCCESS; +} + +/* + * Helper routine that will retrieve the produce and consume + * headers of a given queue pair. If the guest memory of the + * queue pair is currently not available, the saved queue headers + * will be returned, if these are available. + */ +static int qp_get_queue_headers(const struct vmci_qp *qpair, + struct vmci_queue_header **produce_q_header, + struct vmci_queue_header **consume_q_header) +{ + int result; + + result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q); + if (result == VMCI_SUCCESS) { + *produce_q_header = qpair->produce_q->q_header; + *consume_q_header = qpair->consume_q->q_header; + } else if (qpair->produce_q->saved_header && + qpair->consume_q->saved_header) { + *produce_q_header = qpair->produce_q->saved_header; + *consume_q_header = qpair->consume_q->saved_header; + result = VMCI_SUCCESS; + } + + return result; +} + +/* + * Callback from VMCI queue pair broker indicating that a queue + * pair that was previously not ready, now either is ready or + * gone forever. + */ +static int qp_wakeup_cb(void *client_data) +{ + struct vmci_qp *qpair = (struct vmci_qp *)client_data; + + qp_lock(qpair); + while (qpair->blocked > 0) { + qpair->blocked--; + qpair->generation++; + wake_up(&qpair->event); + } + qp_unlock(qpair); + + return VMCI_SUCCESS; +} + +/* + * Makes the calling thread wait for the queue pair to become + * ready for host side access. Returns true when thread is + * woken up after queue pair state change, false otherwise. + */ +static bool qp_wait_for_ready_queue(struct vmci_qp *qpair) +{ + unsigned int generation; + + qpair->blocked++; + generation = qpair->generation; + qp_unlock(qpair); + wait_event(qpair->event, generation != qpair->generation); + qp_lock(qpair); + + return true; +} + +/* + * Enqueues a given buffer to the produce queue using the provided + * function. As many bytes as possible (space available in the queue) + * are enqueued. Assumes the queue->mutex has been acquired. Returns + * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue + * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the + * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if + * an error occured when accessing the buffer, + * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't + * available. Otherwise, the number of bytes written to the queue is + * returned. Updates the tail pointer of the produce queue. + */ +static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, + struct vmci_queue *consume_q, + const u64 produce_q_size, + const void *buf, + size_t buf_size, + vmci_memcpy_to_queue_func memcpy_to_queue) +{ + s64 free_space; + u64 tail; + size_t written; + ssize_t result; + + result = qp_map_queue_headers(produce_q, consume_q); + if (unlikely(result != VMCI_SUCCESS)) + return result; + + free_space = vmci_q_header_free_space(produce_q->q_header, + consume_q->q_header, + produce_q_size); + if (free_space == 0) + return VMCI_ERROR_QUEUEPAIR_NOSPACE; + + if (free_space < VMCI_SUCCESS) + return (ssize_t) free_space; + + written = (size_t) (free_space > buf_size ? buf_size : free_space); + tail = vmci_q_header_producer_tail(produce_q->q_header); + if (likely(tail + written < produce_q_size)) { + result = memcpy_to_queue(produce_q, tail, buf, 0, written); + } else { + /* Tail pointer wraps around. */ + + const size_t tmp = (size_t) (produce_q_size - tail); + + result = memcpy_to_queue(produce_q, tail, buf, 0, tmp); + if (result >= VMCI_SUCCESS) + result = memcpy_to_queue(produce_q, 0, buf, tmp, + written - tmp); + } + + if (result < VMCI_SUCCESS) + return result; + + vmci_q_header_add_producer_tail(produce_q->q_header, written, + produce_q_size); + return written; +} + +/* + * Dequeues data (if available) from the given consume queue. Writes data + * to the user provided buffer using the provided function. + * Assumes the queue->mutex has been acquired. + * Results: + * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. + * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue + * (as defined by the queue size). + * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer. + * Otherwise the number of bytes dequeued is returned. + * Side effects: + * Updates the head pointer of the consume queue. + */ +static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q, + struct vmci_queue *consume_q, + const u64 consume_q_size, + void *buf, + size_t buf_size, + vmci_memcpy_from_queue_func memcpy_from_queue, + bool update_consumer) +{ + s64 buf_ready; + u64 head; + size_t read; + ssize_t result; + + result = qp_map_queue_headers(produce_q, consume_q); + if (unlikely(result != VMCI_SUCCESS)) + return result; + + buf_ready = vmci_q_header_buf_ready(consume_q->q_header, + produce_q->q_header, + consume_q_size); + if (buf_ready == 0) + return VMCI_ERROR_QUEUEPAIR_NODATA; + + if (buf_ready < VMCI_SUCCESS) + return (ssize_t) buf_ready; + + read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready); + head = vmci_q_header_consumer_head(produce_q->q_header); + if (likely(head + read < consume_q_size)) { + result = memcpy_from_queue(buf, 0, consume_q, head, read); + } else { + /* Head pointer wraps around. */ + + const size_t tmp = (size_t) (consume_q_size - head); + + result = memcpy_from_queue(buf, 0, consume_q, head, tmp); + if (result >= VMCI_SUCCESS) + result = memcpy_from_queue(buf, tmp, consume_q, 0, + read - tmp); + + } + + if (result < VMCI_SUCCESS) + return result; + + if (update_consumer) + vmci_q_header_add_consumer_head(produce_q->q_header, + read, consume_q_size); + + return read; +} + +/* + * vmci_qpair_alloc() - Allocates a queue pair. + * @qpair: Pointer for the new vmci_qp struct. + * @handle: Handle to track the resource. + * @produce_qsize: Desired size of the producer queue. + * @consume_qsize: Desired size of the consumer queue. + * @peer: ContextID of the peer. + * @flags: VMCI flags. + * @priv_flags: VMCI priviledge flags. + * + * This is the client interface for allocating the memory for a + * vmci_qp structure and then attaching to the underlying + * queue. If an error occurs allocating the memory for the + * vmci_qp structure no attempt is made to attach. If an + * error occurs attaching, then the structure is freed. + */ +int vmci_qpair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_qsize, + u64 consume_qsize, + u32 peer, + u32 flags, + u32 priv_flags) +{ + struct vmci_qp *my_qpair; + int retval; + struct vmci_handle src = VMCI_INVALID_HANDLE; + struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID); + enum vmci_route route; + vmci_event_release_cb wakeup_cb; + void *client_data; + + /* + * Restrict the size of a queuepair. The device already + * enforces a limit on the total amount of memory that can be + * allocated to queuepairs for a guest. However, we try to + * allocate this memory before we make the queuepair + * allocation hypercall. On Linux, we allocate each page + * separately, which means rather than fail, the guest will + * thrash while it tries to allocate, and will become + * increasingly unresponsive to the point where it appears to + * be hung. So we place a limit on the size of an individual + * queuepair here, and leave the device to enforce the + * restriction on total queuepair memory. (Note that this + * doesn't prevent all cases; a user with only this much + * physical memory could still get into trouble.) The error + * used by the device is NO_RESOURCES, so use that here too. + */ + + if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) || + produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY) + return VMCI_ERROR_NO_RESOURCES; + + retval = vmci_route(&src, &dst, false, &route); + if (retval < VMCI_SUCCESS) + route = vmci_guest_code_active() ? + VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST; + + if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) { + pr_devel("NONBLOCK OR PINNED set"); + return VMCI_ERROR_INVALID_ARGS; + } + + my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL); + if (!my_qpair) + return VMCI_ERROR_NO_MEM; + + my_qpair->produce_q_size = produce_qsize; + my_qpair->consume_q_size = consume_qsize; + my_qpair->peer = peer; + my_qpair->flags = flags; + my_qpair->priv_flags = priv_flags; + + wakeup_cb = NULL; + client_data = NULL; + + if (VMCI_ROUTE_AS_HOST == route) { + my_qpair->guest_endpoint = false; + if (!(flags & VMCI_QPFLAG_LOCAL)) { + my_qpair->blocked = 0; + my_qpair->generation = 0; + init_waitqueue_head(&my_qpair->event); + wakeup_cb = qp_wakeup_cb; + client_data = (void *)my_qpair; + } + } else { + my_qpair->guest_endpoint = true; + } + + retval = vmci_qp_alloc(handle, + &my_qpair->produce_q, + my_qpair->produce_q_size, + &my_qpair->consume_q, + my_qpair->consume_q_size, + my_qpair->peer, + my_qpair->flags, + my_qpair->priv_flags, + my_qpair->guest_endpoint, + wakeup_cb, client_data); + + if (retval < VMCI_SUCCESS) { + kfree(my_qpair); + return retval; + } + + *qpair = my_qpair; + my_qpair->handle = *handle; + + return retval; +} +EXPORT_SYMBOL_GPL(vmci_qpair_alloc); + +/* + * vmci_qpair_detach() - Detatches the client from a queue pair. + * @qpair: Reference of a pointer to the qpair struct. + * + * This is the client interface for detaching from a VMCIQPair. + * Note that this routine will free the memory allocated for the + * vmci_qp structure too. + */ +int vmci_qpair_detach(struct vmci_qp **qpair) +{ + int result; + struct vmci_qp *old_qpair; + + if (!qpair || !(*qpair)) + return VMCI_ERROR_INVALID_ARGS; + + old_qpair = *qpair; + result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint); + + /* + * The guest can fail to detach for a number of reasons, and + * if it does so, it will cleanup the entry (if there is one). + * The host can fail too, but it won't cleanup the entry + * immediately, it will do that later when the context is + * freed. Either way, we need to release the qpair struct + * here; there isn't much the caller can do, and we don't want + * to leak. + */ + + memset(old_qpair, 0, sizeof(*old_qpair)); + old_qpair->handle = VMCI_INVALID_HANDLE; + old_qpair->peer = VMCI_INVALID_ID; + kfree(old_qpair); + *qpair = NULL; + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_detach); + +/* + * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer. + * @qpair: Pointer to the queue pair struct. + * @producer_tail: Reference used for storing producer tail index. + * @consumer_head: Reference used for storing the consumer head index. + * + * This is the client interface for getting the current indexes of the + * QPair from the point of the view of the caller as the producer. + */ +int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair, + u64 *producer_tail, + u64 *consumer_head) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + int result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + vmci_q_header_get_pointers(produce_q_header, consume_q_header, + producer_tail, consumer_head); + qp_unlock(qpair); + + if (result == VMCI_SUCCESS && + ((producer_tail && *producer_tail >= qpair->produce_q_size) || + (consumer_head && *consumer_head >= qpair->produce_q_size))) + return VMCI_ERROR_INVALID_SIZE; + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes); + +/* + * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer. + * @qpair: Pointer to the queue pair struct. + * @consumer_tail: Reference used for storing consumer tail index. + * @producer_head: Reference used for storing the producer head index. + * + * This is the client interface for getting the current indexes of the + * QPair from the point of the view of the caller as the consumer. + */ +int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair, + u64 *consumer_tail, + u64 *producer_head) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + int result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + vmci_q_header_get_pointers(consume_q_header, produce_q_header, + consumer_tail, producer_head); + qp_unlock(qpair); + + if (result == VMCI_SUCCESS && + ((consumer_tail && *consumer_tail >= qpair->consume_q_size) || + (producer_head && *producer_head >= qpair->consume_q_size))) + return VMCI_ERROR_INVALID_SIZE; + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes); + +/* + * vmci_qpair_produce_free_space() - Retrieves free space in producer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of free + * space in the QPair from the point of the view of the caller as + * the producer which is the common case. Returns < 0 if err, else + * available bytes into which data can be enqueued if > 0. + */ +s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_free_space(produce_q_header, + consume_q_header, + qpair->produce_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space); + +/* + * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of free + * space in the QPair from the point of the view of the caller as + * the consumer which is not the common case. Returns < 0 if err, else + * available bytes into which data can be enqueued if > 0. + */ +s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_free_space(consume_q_header, + produce_q_header, + qpair->consume_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space); + +/* + * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from + * producer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of + * enqueued data in the QPair from the point of the view of the + * caller as the producer which is not the common case. Returns < 0 if err, + * else available bytes that may be read. + */ +s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_buf_ready(produce_q_header, + consume_q_header, + qpair->produce_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready); + +/* + * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from + * consumer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of + * enqueued data in the QPair from the point of the view of the + * caller as the consumer which is the normal case. Returns < 0 if err, + * else available bytes that may be read. + */ +s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_buf_ready(consume_q_header, + produce_q_header, + qpair->consume_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready); + +/* + * vmci_qpair_enqueue() - Throw data on the queue. + * @qpair: Pointer to the queue pair struct. + * @buf: Pointer to buffer containing data + * @buf_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for enqueueing data into the queue. + * Returns number of bytes enqueued or < 0 on error. + */ +ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, + const void *buf, + size_t buf_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !buf) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_enqueue_locked(qpair->produce_q, + qpair->consume_q, + qpair->produce_q_size, + buf, buf_size, + qp_memcpy_to_queue); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_enqueue); + +/* + * vmci_qpair_dequeue() - Get data from the queue. + * @qpair: Pointer to the queue pair struct. + * @buf: Pointer to buffer for the data + * @buf_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for dequeueing data from the queue. + * Returns number of bytes dequeued or < 0 on error. + */ +ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, + void *buf, + size_t buf_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !buf) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + buf, buf_size, + qp_memcpy_from_queue, true); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_dequeue); + +/* + * vmci_qpair_peek() - Peek at the data in the queue. + * @qpair: Pointer to the queue pair struct. + * @buf: Pointer to buffer for the data + * @buf_size: Length of buffer. + * @buf_type: Buffer type (Unused on Linux). + * + * This is the client interface for peeking into a queue. (I.e., + * copy data from the queue without updating the head pointer.) + * Returns number of bytes dequeued or < 0 on error. + */ +ssize_t vmci_qpair_peek(struct vmci_qp *qpair, + void *buf, + size_t buf_size, + int buf_type) +{ + ssize_t result; + + if (!qpair || !buf) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + buf, buf_size, + qp_memcpy_from_queue, false); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_peek); + +/* + * vmci_qpair_enquev() - Throw data on the queue using iov. + * @qpair: Pointer to the queue pair struct. + * @iov: Pointer to buffer containing data + * @iov_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for enqueueing data into the queue. + * This function uses IO vectors to handle the work. Returns number + * of bytes enqueued or < 0 on error. + */ +ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, + struct msghdr *msg, + size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_enqueue_locked(qpair->produce_q, + qpair->consume_q, + qpair->produce_q_size, + msg, iov_size, + qp_memcpy_to_queue_iov); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_enquev); + +/* + * vmci_qpair_dequev() - Get data from the queue using iov. + * @qpair: Pointer to the queue pair struct. + * @iov: Pointer to buffer for the data + * @iov_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for dequeueing data from the queue. + * This function uses IO vectors to handle the work. Returns number + * of bytes dequeued or < 0 on error. + */ +ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, + struct msghdr *msg, + size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + msg, iov_size, + qp_memcpy_from_queue_iov, + true); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_dequev); + +/* + * vmci_qpair_peekv() - Peek at the data in the queue using iov. + * @qpair: Pointer to the queue pair struct. + * @iov: Pointer to buffer for the data + * @iov_size: Length of buffer. + * @buf_type: Buffer type (Unused on Linux). + * + * This is the client interface for peeking into a queue. (I.e., + * copy data from the queue without updating the head pointer.) + * This function uses IO vectors to handle the work. Returns number + * of bytes peeked or < 0 on error. + */ +ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, + struct msghdr *msg, + size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + msg, iov_size, + qp_memcpy_from_queue_iov, + false); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_peekv); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_queue_pair.h b/kernel/drivers/misc/vmw_vmci/vmci_queue_pair.h new file mode 100644 index 000000000..ed177f04e --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_queue_pair.h @@ -0,0 +1,173 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_QUEUE_PAIR_H_ +#define _VMCI_QUEUE_PAIR_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/types.h> + +#include "vmci_context.h" + +/* Callback needed for correctly waiting on events. */ +typedef int (*vmci_event_release_cb) (void *client_data); + +/* Guest device port I/O. */ +struct ppn_set { + u64 num_produce_pages; + u64 num_consume_pages; + u32 *produce_ppns; + u32 *consume_ppns; + bool initialized; +}; + +/* VMCIqueue_pairAllocInfo */ +struct vmci_qp_alloc_info { + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u64 ppn_va; /* Start VA of queue pair PPNs. */ + u64 num_ppns; + s32 result; + u32 version; +}; + +/* VMCIqueue_pairSetVAInfo */ +struct vmci_qp_set_va_info { + struct vmci_handle handle; + u64 va; /* Start VA of queue pair PPNs. */ + u64 num_ppns; + u32 version; + s32 result; +}; + +/* + * For backwards compatibility, here is a version of the + * VMCIqueue_pairPageFileInfo before host support end-points was added. + * Note that the current version of that structure requires VMX to + * pass down the VA of the mapped file. Before host support was added + * there was nothing of the sort. So, when the driver sees the ioctl + * with a parameter that is the sizeof + * VMCIqueue_pairPageFileInfo_NoHostQP then it can infer that the version + * of VMX running can't attach to host end points because it doesn't + * provide the VA of the mapped files. + * + * The Linux driver doesn't get an indication of the size of the + * structure passed down from user space. So, to fix a long standing + * but unfiled bug, the _pad field has been renamed to version. + * Existing versions of VMX always initialize the PageFileInfo + * structure so that _pad, er, version is set to 0. + * + * A version value of 1 indicates that the size of the structure has + * been increased to include two UVA's: produce_uva and consume_uva. + * These UVA's are of the mmap()'d queue contents backing files. + * + * In addition, if when VMX is sending down the + * VMCIqueue_pairPageFileInfo structure it gets an error then it will + * try again with the _NoHostQP version of the file to see if an older + * VMCI kernel module is running. + */ + +/* VMCIqueue_pairPageFileInfo */ +struct vmci_qp_page_file_info { + struct vmci_handle handle; + u64 produce_page_file; /* User VA. */ + u64 consume_page_file; /* User VA. */ + u64 produce_page_file_size; /* Size of the file name array. */ + u64 consume_page_file_size; /* Size of the file name array. */ + s32 result; + u32 version; /* Was _pad. */ + u64 produce_va; /* User VA of the mapped file. */ + u64 consume_va; /* User VA of the mapped file. */ +}; + +/* vmci queuepair detach info */ +struct vmci_qp_dtch_info { + struct vmci_handle handle; + s32 result; + u32 _pad; +}; + +/* + * struct vmci_qp_page_store describes how the memory of a given queue pair + * is backed. When the queue pair is between the host and a guest, the + * page store consists of references to the guest pages. On vmkernel, + * this is a list of PPNs, and on hosted, it is a user VA where the + * queue pair is mapped into the VMX address space. + */ +struct vmci_qp_page_store { + /* Reference to pages backing the queue pair. */ + u64 pages; + /* Length of pageList/virtual addres range (in pages). */ + u32 len; +}; + +/* + * This data type contains the information about a queue. + * There are two queues (hence, queue pairs) per transaction model between a + * pair of end points, A & B. One queue is used by end point A to transmit + * commands and responses to B. The other queue is used by B to transmit + * commands and responses. + * + * struct vmci_queue_kern_if is a per-OS defined Queue structure. It contains + * either a direct pointer to the linear address of the buffer contents or a + * pointer to structures which help the OS locate those data pages. See + * vmciKernelIf.c for each platform for its definition. + */ +struct vmci_queue { + struct vmci_queue_header *q_header; + struct vmci_queue_header *saved_header; + struct vmci_queue_kern_if *kernel_if; +}; + +/* + * Utility function that checks whether the fields of the page + * store contain valid values. + * Result: + * true if the page store is wellformed. false otherwise. + */ +static inline bool +VMCI_QP_PAGESTORE_IS_WELLFORMED(struct vmci_qp_page_store *page_store) +{ + return page_store->len >= 2; +} + +void vmci_qp_broker_exit(void); +int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer, + u32 flags, u32 priv_flags, + u64 produce_size, u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context); +int vmci_qp_broker_set_page_store(struct vmci_handle handle, + u64 produce_uva, u64 consume_uva, + struct vmci_ctx *context); +int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context); + +void vmci_qp_guest_endpoints_exit(void); + +int vmci_qp_alloc(struct vmci_handle *handle, + struct vmci_queue **produce_q, u64 produce_size, + struct vmci_queue **consume_q, u64 consume_size, + u32 peer, u32 flags, u32 priv_flags, + bool guest_endpoint, vmci_event_release_cb wakeup_cb, + void *client_data); +int vmci_qp_broker_map(struct vmci_handle handle, + struct vmci_ctx *context, u64 guest_mem); +int vmci_qp_broker_unmap(struct vmci_handle handle, + struct vmci_ctx *context, u32 gid); + +#endif /* _VMCI_QUEUE_PAIR_H_ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_resource.c b/kernel/drivers/misc/vmw_vmci/vmci_resource.c new file mode 100644 index 000000000..9a53a30de --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_resource.c @@ -0,0 +1,227 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/hash.h> +#include <linux/types.h> +#include <linux/rculist.h> + +#include "vmci_resource.h" +#include "vmci_driver.h" + + +#define VMCI_RESOURCE_HASH_BITS 7 +#define VMCI_RESOURCE_HASH_BUCKETS (1 << VMCI_RESOURCE_HASH_BITS) + +struct vmci_hash_table { + spinlock_t lock; + struct hlist_head entries[VMCI_RESOURCE_HASH_BUCKETS]; +}; + +static struct vmci_hash_table vmci_resource_table = { + .lock = __SPIN_LOCK_UNLOCKED(vmci_resource_table.lock), +}; + +static unsigned int vmci_resource_hash(struct vmci_handle handle) +{ + return hash_32(handle.resource, VMCI_RESOURCE_HASH_BITS); +} + +/* + * Gets a resource (if one exists) matching given handle from the hash table. + */ +static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, + enum vmci_resource_type type) +{ + struct vmci_resource *r, *resource = NULL; + unsigned int idx = vmci_resource_hash(handle); + + rcu_read_lock(); + hlist_for_each_entry_rcu(r, + &vmci_resource_table.entries[idx], node) { + u32 cid = r->handle.context; + u32 rid = r->handle.resource; + + if (r->type == type && + rid == handle.resource && + (cid == handle.context || cid == VMCI_INVALID_ID)) { + resource = r; + break; + } + } + rcu_read_unlock(); + + return resource; +} + +/* + * Find an unused resource ID and return it. The first + * VMCI_RESERVED_RESOURCE_ID_MAX are reserved so we start from + * its value + 1. + * Returns VMCI resource id on success, VMCI_INVALID_ID on failure. + */ +static u32 vmci_resource_find_id(u32 context_id, + enum vmci_resource_type resource_type) +{ + static u32 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1; + u32 old_rid = resource_id; + u32 current_rid; + + /* + * Generate a unique resource ID. Keep on trying until we wrap around + * in the RID space. + */ + do { + struct vmci_handle handle; + + current_rid = resource_id; + resource_id++; + if (unlikely(resource_id == VMCI_INVALID_ID)) { + /* Skip the reserved rids. */ + resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1; + } + + handle = vmci_make_handle(context_id, current_rid); + if (!vmci_resource_lookup(handle, resource_type)) + return current_rid; + } while (resource_id != old_rid); + + return VMCI_INVALID_ID; +} + + +int vmci_resource_add(struct vmci_resource *resource, + enum vmci_resource_type resource_type, + struct vmci_handle handle) + +{ + unsigned int idx; + int result; + + spin_lock(&vmci_resource_table.lock); + + if (handle.resource == VMCI_INVALID_ID) { + handle.resource = vmci_resource_find_id(handle.context, + resource_type); + if (handle.resource == VMCI_INVALID_ID) { + result = VMCI_ERROR_NO_HANDLE; + goto out; + } + } else if (vmci_resource_lookup(handle, resource_type)) { + result = VMCI_ERROR_ALREADY_EXISTS; + goto out; + } + + resource->handle = handle; + resource->type = resource_type; + INIT_HLIST_NODE(&resource->node); + kref_init(&resource->kref); + init_completion(&resource->done); + + idx = vmci_resource_hash(resource->handle); + hlist_add_head_rcu(&resource->node, &vmci_resource_table.entries[idx]); + + result = VMCI_SUCCESS; + +out: + spin_unlock(&vmci_resource_table.lock); + return result; +} + +void vmci_resource_remove(struct vmci_resource *resource) +{ + struct vmci_handle handle = resource->handle; + unsigned int idx = vmci_resource_hash(handle); + struct vmci_resource *r; + + /* Remove resource from hash table. */ + spin_lock(&vmci_resource_table.lock); + + hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) { + if (vmci_handle_is_equal(r->handle, resource->handle)) { + hlist_del_init_rcu(&r->node); + break; + } + } + + spin_unlock(&vmci_resource_table.lock); + synchronize_rcu(); + + vmci_resource_put(resource); + wait_for_completion(&resource->done); +} + +struct vmci_resource * +vmci_resource_by_handle(struct vmci_handle resource_handle, + enum vmci_resource_type resource_type) +{ + struct vmci_resource *r, *resource = NULL; + + rcu_read_lock(); + + r = vmci_resource_lookup(resource_handle, resource_type); + if (r && + (resource_type == r->type || + resource_type == VMCI_RESOURCE_TYPE_ANY)) { + resource = vmci_resource_get(r); + } + + rcu_read_unlock(); + + return resource; +} + +/* + * Get a reference to given resource. + */ +struct vmci_resource *vmci_resource_get(struct vmci_resource *resource) +{ + kref_get(&resource->kref); + + return resource; +} + +static void vmci_release_resource(struct kref *kref) +{ + struct vmci_resource *resource = + container_of(kref, struct vmci_resource, kref); + + /* Verify the resource has been unlinked from hash table */ + WARN_ON(!hlist_unhashed(&resource->node)); + + /* Signal that container of this resource can now be destroyed */ + complete(&resource->done); +} + +/* + * Resource's release function will get called if last reference. + * If it is the last reference, then we are sure that nobody else + * can increment the count again (it's gone from the resource hash + * table), so there's no need for locking here. + */ +int vmci_resource_put(struct vmci_resource *resource) +{ + /* + * We propagate the information back to caller in case it wants to know + * whether entry was freed. + */ + return kref_put(&resource->kref, vmci_release_resource) ? + VMCI_SUCCESS_ENTRY_DEAD : VMCI_SUCCESS; +} + +struct vmci_handle vmci_resource_handle(struct vmci_resource *resource) +{ + return resource->handle; +} diff --git a/kernel/drivers/misc/vmw_vmci/vmci_resource.h b/kernel/drivers/misc/vmw_vmci/vmci_resource.h new file mode 100644 index 000000000..9190cd298 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_resource.h @@ -0,0 +1,59 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_RESOURCE_H_ +#define _VMCI_RESOURCE_H_ + +#include <linux/vmw_vmci_defs.h> +#include <linux/types.h> + +#include "vmci_context.h" + + +enum vmci_resource_type { + VMCI_RESOURCE_TYPE_ANY, + VMCI_RESOURCE_TYPE_API, + VMCI_RESOURCE_TYPE_GROUP, + VMCI_RESOURCE_TYPE_DATAGRAM, + VMCI_RESOURCE_TYPE_DOORBELL, + VMCI_RESOURCE_TYPE_QPAIR_GUEST, + VMCI_RESOURCE_TYPE_QPAIR_HOST +}; + +struct vmci_resource { + struct vmci_handle handle; + enum vmci_resource_type type; + struct hlist_node node; + struct kref kref; + struct completion done; +}; + + +int vmci_resource_add(struct vmci_resource *resource, + enum vmci_resource_type resource_type, + struct vmci_handle handle); + +void vmci_resource_remove(struct vmci_resource *resource); + +struct vmci_resource * +vmci_resource_by_handle(struct vmci_handle resource_handle, + enum vmci_resource_type resource_type); + +struct vmci_resource *vmci_resource_get(struct vmci_resource *resource); +int vmci_resource_put(struct vmci_resource *resource); + +struct vmci_handle vmci_resource_handle(struct vmci_resource *resource); + +#endif /* _VMCI_RESOURCE_H_ */ diff --git a/kernel/drivers/misc/vmw_vmci/vmci_route.c b/kernel/drivers/misc/vmw_vmci/vmci_route.c new file mode 100644 index 000000000..91090658b --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_route.c @@ -0,0 +1,226 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> + +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_route.h" + +/* + * Make a routing decision for the given source and destination handles. + * This will try to determine the route using the handles and the available + * devices. Will set the source context if it is invalid. + */ +int vmci_route(struct vmci_handle *src, + const struct vmci_handle *dst, + bool from_guest, + enum vmci_route *route) +{ + bool has_host_device = vmci_host_code_active(); + bool has_guest_device = vmci_guest_code_active(); + + *route = VMCI_ROUTE_NONE; + + /* + * "from_guest" is only ever set to true by + * IOCTL_VMCI_DATAGRAM_SEND (or by the vmkernel equivalent), + * which comes from the VMX, so we know it is coming from a + * guest. + * + * To avoid inconsistencies, test these once. We will test + * them again when we do the actual send to ensure that we do + * not touch a non-existent device. + */ + + /* Must have a valid destination context. */ + if (VMCI_INVALID_ID == dst->context) + return VMCI_ERROR_INVALID_ARGS; + + /* Anywhere to hypervisor. */ + if (VMCI_HYPERVISOR_CONTEXT_ID == dst->context) { + + /* + * If this message already came from a guest then we + * cannot send it to the hypervisor. It must come + * from a local client. + */ + if (from_guest) + return VMCI_ERROR_DST_UNREACHABLE; + + /* + * We must be acting as a guest in order to send to + * the hypervisor. + */ + if (!has_guest_device) + return VMCI_ERROR_DEVICE_NOT_FOUND; + + /* And we cannot send if the source is the host context. */ + if (VMCI_HOST_CONTEXT_ID == src->context) + return VMCI_ERROR_INVALID_ARGS; + + /* + * If the client passed the ANON source handle then + * respect it (both context and resource are invalid). + * However, if they passed only an invalid context, + * then they probably mean ANY, in which case we + * should set the real context here before passing it + * down. + */ + if (VMCI_INVALID_ID == src->context && + VMCI_INVALID_ID != src->resource) + src->context = vmci_get_context_id(); + + /* Send from local client down to the hypervisor. */ + *route = VMCI_ROUTE_AS_GUEST; + return VMCI_SUCCESS; + } + + /* Anywhere to local client on host. */ + if (VMCI_HOST_CONTEXT_ID == dst->context) { + /* + * If it is not from a guest but we are acting as a + * guest, then we need to send it down to the host. + * Note that if we are also acting as a host then this + * will prevent us from sending from local client to + * local client, but we accept that restriction as a + * way to remove any ambiguity from the host context. + */ + if (src->context == VMCI_HYPERVISOR_CONTEXT_ID) { + /* + * If the hypervisor is the source, this is + * host local communication. The hypervisor + * may send vmci event datagrams to the host + * itself, but it will never send datagrams to + * an "outer host" through the guest device. + */ + + if (has_host_device) { + *route = VMCI_ROUTE_AS_HOST; + return VMCI_SUCCESS; + } else { + return VMCI_ERROR_DEVICE_NOT_FOUND; + } + } + + if (!from_guest && has_guest_device) { + /* If no source context then use the current. */ + if (VMCI_INVALID_ID == src->context) + src->context = vmci_get_context_id(); + + /* Send it from local client down to the host. */ + *route = VMCI_ROUTE_AS_GUEST; + return VMCI_SUCCESS; + } + + /* + * Otherwise we already received it from a guest and + * it is destined for a local client on this host, or + * it is from another local client on this host. We + * must be acting as a host to service it. + */ + if (!has_host_device) + return VMCI_ERROR_DEVICE_NOT_FOUND; + + if (VMCI_INVALID_ID == src->context) { + /* + * If it came from a guest then it must have a + * valid context. Otherwise we can use the + * host context. + */ + if (from_guest) + return VMCI_ERROR_INVALID_ARGS; + + src->context = VMCI_HOST_CONTEXT_ID; + } + + /* Route to local client. */ + *route = VMCI_ROUTE_AS_HOST; + return VMCI_SUCCESS; + } + + /* + * If we are acting as a host then this might be destined for + * a guest. + */ + if (has_host_device) { + /* It will have a context if it is meant for a guest. */ + if (vmci_ctx_exists(dst->context)) { + if (VMCI_INVALID_ID == src->context) { + /* + * If it came from a guest then it + * must have a valid context. + * Otherwise we can use the host + * context. + */ + + if (from_guest) + return VMCI_ERROR_INVALID_ARGS; + + src->context = VMCI_HOST_CONTEXT_ID; + } else if (VMCI_CONTEXT_IS_VM(src->context) && + src->context != dst->context) { + /* + * VM to VM communication is not + * allowed. Since we catch all + * communication destined for the host + * above, this must be destined for a + * VM since there is a valid context. + */ + + return VMCI_ERROR_DST_UNREACHABLE; + } + + /* Pass it up to the guest. */ + *route = VMCI_ROUTE_AS_HOST; + return VMCI_SUCCESS; + } else if (!has_guest_device) { + /* + * The host is attempting to reach a CID + * without an active context, and we can't + * send it down, since we have no guest + * device. + */ + + return VMCI_ERROR_DST_UNREACHABLE; + } + } + + /* + * We must be a guest trying to send to another guest, which means + * we need to send it down to the host. We do not filter out VM to + * VM communication here, since we want to be able to use the guest + * driver on older versions that do support VM to VM communication. + */ + if (!has_guest_device) { + /* + * Ending up here means we have neither guest nor host + * device. + */ + return VMCI_ERROR_DEVICE_NOT_FOUND; + } + + /* If no source context then use the current context. */ + if (VMCI_INVALID_ID == src->context) + src->context = vmci_get_context_id(); + + /* + * Send it from local client down to the host, which will + * route it to the other guest for us. + */ + *route = VMCI_ROUTE_AS_GUEST; + return VMCI_SUCCESS; +} diff --git a/kernel/drivers/misc/vmw_vmci/vmci_route.h b/kernel/drivers/misc/vmw_vmci/vmci_route.h new file mode 100644 index 000000000..3b30e8241 --- /dev/null +++ b/kernel/drivers/misc/vmw_vmci/vmci_route.h @@ -0,0 +1,30 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_ROUTE_H_ +#define _VMCI_ROUTE_H_ + +#include <linux/vmw_vmci_defs.h> + +enum vmci_route { + VMCI_ROUTE_NONE, + VMCI_ROUTE_AS_HOST, + VMCI_ROUTE_AS_GUEST, +}; + +int vmci_route(struct vmci_handle *src, const struct vmci_handle *dst, + bool from_guest, enum vmci_route *route); + +#endif /* _VMCI_ROUTE_H_ */ |