diff options
876 files changed, 15413 insertions, 7808 deletions
diff --git a/.get_maintainer.ignore b/.get_maintainer.ignore new file mode 100644 index 000000000000..cca6d870f7a5 --- /dev/null +++ b/.get_maintainer.ignore @@ -0,0 +1 @@ +Christoph Hellwig <hch@lst.de> @@ -17,6 +17,7 @@ Aleksey Gorelov <aleksey_gorelov@phoenix.com> Al Viro <viro@ftp.linux.org.uk> Al Viro <viro@zenIV.linux.org.uk> Andreas Herrmann <aherrman@de.ibm.com> +Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com> Andrew Morton <akpm@linux-foundation.org> Andrew Vasquez <andrew.vasquez@qlogic.com> Andy Adamson <andros@citi.umich.edu> diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus new file mode 100644 index 000000000000..636e938d5e33 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-bus-vmbus @@ -0,0 +1,29 @@ +What: /sys/bus/vmbus/devices/vmbus_*/id +Date: Jul 2009 +KernelVersion: 2.6.31 +Contact: K. Y. Srinivasan <kys@microsoft.com> +Description: The VMBus child_relid of the device's primary channel +Users: tools/hv/lsvmbus + +What: /sys/bus/vmbus/devices/vmbus_*/class_id +Date: Jul 2009 +KernelVersion: 2.6.31 +Contact: K. Y. Srinivasan <kys@microsoft.com> +Description: The VMBus interface type GUID of the device +Users: tools/hv/lsvmbus + +What: /sys/bus/vmbus/devices/vmbus_*/device_id +Date: Jul 2009 +KernelVersion: 2.6.31 +Contact: K. Y. Srinivasan <kys@microsoft.com> +Description: The VMBus interface instance GUID of the device +Users: tools/hv/lsvmbus + +What: /sys/bus/vmbus/devices/vmbus_*/channel_vp_mapping +Date: Jul 2015 +KernelVersion: 4.2.0 +Contact: K. Y. Srinivasan <kys@microsoft.com> +Description: The mapping of which primary/sub channels are bound to which + Virtual Processors. + Format: <channel's child_relid:the bound cpu's number> +Users: tools/hv/lsvmbus diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x index b4d0b99afffb..d72ca1736ba4 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x @@ -112,7 +112,7 @@ KernelVersion: 3.19 Contact: Mathieu Poirier <mathieu.poirier@linaro.org> Description: (RW) Mask to apply to all the context ID comparator. -What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/ctxid_val +What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/ctxid_pid Date: November 2014 KernelVersion: 3.19 Contact: Mathieu Poirier <mathieu.poirier@linaro.org> diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x index 2fe2e3dae487..2355ed8ae31f 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x @@ -249,7 +249,7 @@ KernelVersion: 4.01 Contact: Mathieu Poirier <mathieu.poirier@linaro.org> Description: (RW) Select which context ID comparator to work with. -What: /sys/bus/coresight/devices/<memory_map>.etm/ctxid_val +What: /sys/bus/coresight/devices/<memory_map>.etm/ctxid_pid Date: April 2015 KernelVersion: 4.01 Contact: Mathieu Poirier <mathieu.poirier@linaro.org> diff --git a/Documentation/ABI/testing/sysfs-driver-sunxi-sid b/Documentation/ABI/testing/sysfs-driver-sunxi-sid deleted file mode 100644 index ffb9536f6ecc..000000000000 --- a/Documentation/ABI/testing/sysfs-driver-sunxi-sid +++ /dev/null @@ -1,22 +0,0 @@ -What: /sys/devices/*/<our-device>/eeprom -Date: August 2013 -Contact: Oliver Schinagl <oliver@schinagl.nl> -Description: read-only access to the SID (Security-ID) on current - A-series SoC's from Allwinner. Currently supports A10, A10s, A13 - and A20 CPU's. The earlier A1x series of SoCs exports 16 bytes, - whereas the newer A20 SoC exposes 512 bytes split into sections. - Besides the 16 bytes of SID, there's also an SJTAG area, - HDMI-HDCP key and some custom keys. Below a quick overview, for - details see the user manual: - 0x000 128 bit root-key (sun[457]i) - 0x010 128 bit boot-key (sun7i) - 0x020 64 bit security-jtag-key (sun7i) - 0x028 16 bit key configuration (sun7i) - 0x02b 16 bit custom-vendor-key (sun7i) - 0x02c 320 bit low general key (sun7i) - 0x040 32 bit read-control access (sun7i) - 0x064 224 bit low general key (sun7i) - 0x080 2304 bit HDCP-key (sun7i) - 0x1a0 768 bit high general key (sun7i) -Users: any user space application which wants to read the SID on - Allwinner's A-series of CPU's. diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt index 65a6db2271a2..62938eb9697f 100644 --- a/Documentation/devicetree/bindings/arm/coresight.txt +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -17,6 +17,7 @@ its hardware characteristcs. - "arm,coresight-tmc", "arm,primecell"; - "arm,coresight-funnel", "arm,primecell"; - "arm,coresight-etm3x", "arm,primecell"; + - "arm,coresight-etm4x", "arm,primecell"; - "qcom,coresight-replicator1x", "arm,primecell"; * reg: physical base address and length of the register diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index d6b794cef0b8..91e6e5c478d0 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -199,6 +199,7 @@ nodes to be present and contain the properties described below. "qcom,kpss-acc-v1" "qcom,kpss-acc-v2" "rockchip,rk3066-smp" + "ste,dbx500-smp" - cpu-release-addr Usage: required for systems that have an "enable-method" diff --git a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt index d3058768b23d..c53e0b08032f 100644 --- a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt +++ b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt @@ -35,7 +35,7 @@ Example: device_type = "dma"; reg = <0x0 0x1f270000 0x0 0x10000>, <0x0 0x1f200000 0x0 0x10000>, - <0x0 0x1b008000 0x0 0x2000>, + <0x0 0x1b000000 0x0 0x400000>, <0x0 0x1054a000 0x0 0x100>; interrupts = <0x0 0x82 0x4>, <0x0 0xb8 0x4>, diff --git a/Documentation/devicetree/bindings/extcon/extcon-palmas.txt b/Documentation/devicetree/bindings/extcon/extcon-palmas.txt index 45414bbcd945..f61d5af44a27 100644 --- a/Documentation/devicetree/bindings/extcon/extcon-palmas.txt +++ b/Documentation/devicetree/bindings/extcon/extcon-palmas.txt @@ -10,8 +10,11 @@ Required Properties: Optional Properties: - ti,wakeup : To enable the wakeup comparator in probe - - ti,enable-id-detection: Perform ID detection. + - ti,enable-id-detection: Perform ID detection. If id-gpio is specified + it performs id-detection using GPIO else using OTG core. - ti,enable-vbus-detection: Perform VBUS detection. + - id-gpio: gpio for GPIO ID detection. See gpio binding. + - debounce-delay-ms: debounce delay for GPIO ID pin in milliseconds. palmas-usb { compatible = "ti,twl6035-usb", "ti,palmas-usb"; diff --git a/Documentation/devicetree/bindings/misc/allwinner,sunxi-sid.txt b/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt index fabdf64a5737..d543ed3f5363 100644 --- a/Documentation/devicetree/bindings/misc/allwinner,sunxi-sid.txt +++ b/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt @@ -4,6 +4,10 @@ Required properties: - compatible: "allwinner,sun4i-a10-sid" or "allwinner,sun7i-a20-sid" - reg: Should contain registers location and length += Data cells = +Are child nodes of qfprom, bindings of which as described in +bindings/nvmem/nvmem.txt + Example for sun4i: sid@01c23800 { compatible = "allwinner,sun4i-a10-sid"; diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.txt b/Documentation/devicetree/bindings/nvmem/nvmem.txt new file mode 100644 index 000000000000..b52bc11e9597 --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/nvmem.txt @@ -0,0 +1,80 @@ += NVMEM(Non Volatile Memory) Data Device Tree Bindings = + +This binding is intended to represent the location of hardware +configuration data stored in NVMEMs like eeprom, efuses and so on. + +On a significant proportion of boards, the manufacturer has stored +some data on NVMEM, for the OS to be able to retrieve these information +and act upon it. Obviously, the OS has to know about where to retrieve +these data from, and where they are stored on the storage device. + +This document is here to document this. + += Data providers = +Contains bindings specific to provider drivers and data cells as children +of this node. + +Optional properties: + read-only: Mark the provider as read only. + += Data cells = +These are the child nodes of the provider which contain data cell +information like offset and size in nvmem provider. + +Required properties: +reg: specifies the offset in byte within the storage device. + +Optional properties: + +bits: Is pair of bit location and number of bits, which specifies offset + in bit and number of bits within the address range specified by reg property. + Offset takes values from 0-7. + +For example: + + /* Provider */ + qfprom: qfprom@00700000 { + ... + + /* Data cells */ + tsens_calibration: calib@404 { + reg = <0x404 0x10>; + }; + + tsens_calibration_bckp: calib_bckp@504 { + reg = <0x504 0x11>; + bits = <6 128> + }; + + pvs_version: pvs-version@6 { + reg = <0x6 0x2> + bits = <7 2> + }; + + speed_bin: speed-bin@c{ + reg = <0xc 0x1>; + bits = <2 3>; + + }; + ... + }; + += Data consumers = +Are device nodes which consume nvmem data cells/providers. + +Required-properties: +nvmem-cells: list of phandle to the nvmem data cells. +nvmem-cell-names: names for the each nvmem-cells specified. Required if + nvmem-cells is used. + +Optional-properties: +nvmem : list of phandles to nvmem providers. +nvmem-names: names for the each nvmem provider. required if nvmem is used. + +For example: + + tsens { + ... + nvmem-cells = <&tsens_calibration>; + nvmem-cell-names = "calibration"; + }; diff --git a/Documentation/devicetree/bindings/nvmem/qfprom.txt b/Documentation/devicetree/bindings/nvmem/qfprom.txt new file mode 100644 index 000000000000..4ad68b7f5c18 --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/qfprom.txt @@ -0,0 +1,35 @@ += Qualcomm QFPROM device tree bindings = + +This binding is intended to represent QFPROM which is found in most QCOM SOCs. + +Required properties: +- compatible: should be "qcom,qfprom" +- reg: Should contain registers location and length + += Data cells = +Are child nodes of qfprom, bindings of which as described in +bindings/nvmem/nvmem.txt + +Example: + + qfprom: qfprom@00700000 { + compatible = "qcom,qfprom"; + reg = <0x00700000 0x8000>; + ... + /* Data cells */ + tsens_calibration: calib@404 { + reg = <0x4404 0x10>; + }; + }; + + += Data consumers = +Are device nodes which consume nvmem data cells. + +For example: + + tsens { + ... + nvmem-cells = <&tsens_calibration>; + nvmem-cell-names = "calibration"; + }; diff --git a/Documentation/devicetree/bindings/phy/ti-phy.txt b/Documentation/devicetree/bindings/phy/ti-phy.txt index 305e3df3d9b1..9cf9446eaf2e 100644 --- a/Documentation/devicetree/bindings/phy/ti-phy.txt +++ b/Documentation/devicetree/bindings/phy/ti-phy.txt @@ -82,6 +82,9 @@ Optional properties: - id: If there are multiple instance of the same type, in order to differentiate between each instance "id" can be used (e.g., multi-lane PCIe PHY). If "id" is not provided, it is set to default value of '1'. + - syscon-pllreset: Handle to system control region that contains the + CTRL_CORE_SMA_SW_0 register and register offset to the CTRL_CORE_SMA_SW_0 + register that contains the SATA_PLL_SOFT_RESET bit. Only valid for sata_phy. This is usually a subnode of ocp2scp to which it is connected. @@ -100,3 +103,16 @@ usb3phy@4a084400 { "sysclk", "refclk"; }; + +sata_phy: phy@4A096000 { + compatible = "ti,phy-pipe3-sata"; + reg = <0x4A096000 0x80>, /* phy_rx */ + <0x4A096400 0x64>, /* phy_tx */ + <0x4A096800 0x40>; /* pll_ctrl */ + reg-names = "phy_rx", "phy_tx", "pll_ctrl"; + ctrl-module = <&omap_control_sata>; + clocks = <&sys_clkin1>, <&sata_ref_clk>; + clock-names = "sysclk", "refclk"; + syscon-pllreset = <&scm_conf 0x3fc>; + #phy-cells = <0>; +}; diff --git a/Documentation/devicetree/bindings/power/qcom,coincell-charger.txt b/Documentation/devicetree/bindings/power/qcom,coincell-charger.txt new file mode 100644 index 000000000000..0e6d8754e7ec --- /dev/null +++ b/Documentation/devicetree/bindings/power/qcom,coincell-charger.txt @@ -0,0 +1,48 @@ +Qualcomm Coincell Charger: + +The hardware block controls charging for a coincell or capacitor that is +used to provide power backup for certain features of the power management +IC (PMIC) + +- compatible: + Usage: required + Value type: <string> + Definition: must be: "qcom,pm8941-coincell" + +- reg: + Usage: required + Value type: <u32> + Definition: base address of the coincell charger registers + +- qcom,rset-ohms: + Usage: required + Value type: <u32> + Definition: resistance (in ohms) for current-limiting resistor + must be one of: 800, 1200, 1700, 2100 + +- qcom,vset-millivolts: + Usage: required + Value type: <u32> + Definition: voltage (in millivolts) to apply for charging + must be one of: 2500, 3000, 3100, 3200 + +- qcom,charger-disable: + Usage: optional + Value type: <boolean> + Definition: definining this property disables charging + +This charger is a sub-node of one of the 8941 PMIC blocks, and is specified +as a child node in DTS of that node. See ../mfd/qcom,spmi-pmic.txt and +../mfd/qcom-pm8xxx.txt + +Example: + + pm8941@0 { + coincell@2800 { + compatible = "qcom,pm8941-coincell"; + reg = <0x2800>; + + qcom,rset-ohms = <2100>; + qcom,vset-millivolts = <3000>; + }; + }; diff --git a/Documentation/devicetree/bindings/sound/mt8173-max98090.txt b/Documentation/devicetree/bindings/sound/mt8173-max98090.txt index 829bd26d17f8..519e97c8f1b8 100644 --- a/Documentation/devicetree/bindings/sound/mt8173-max98090.txt +++ b/Documentation/devicetree/bindings/sound/mt8173-max98090.txt @@ -3,11 +3,13 @@ MT8173 with MAX98090 CODEC Required properties: - compatible : "mediatek,mt8173-max98090" - mediatek,audio-codec: the phandle of the MAX98090 audio codec +- mediatek,platform: the phandle of MT8173 ASoC platform Example: sound { compatible = "mediatek,mt8173-max98090"; mediatek,audio-codec = <&max98090>; + mediatek,platform = <&afe>; }; diff --git a/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt b/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt index 61e98c976bd4..f205ce9e31dd 100644 --- a/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt +++ b/Documentation/devicetree/bindings/sound/mt8173-rt5650-rt5676.txt @@ -3,11 +3,13 @@ MT8173 with RT5650 RT5676 CODECS Required properties: - compatible : "mediatek,mt8173-rt5650-rt5676" - mediatek,audio-codec: the phandles of rt5650 and rt5676 codecs +- mediatek,platform: the phandle of MT8173 ASoC platform Example: sound { compatible = "mediatek,mt8173-rt5650-rt5676"; mediatek,audio-codec = <&rt5650 &rt5676>; + mediatek,platform = <&afe>; }; diff --git a/Documentation/devicetree/bindings/spi/spi-ath79.txt b/Documentation/devicetree/bindings/spi/spi-ath79.txt index f1ad9c367532..9c696fa66f81 100644 --- a/Documentation/devicetree/bindings/spi/spi-ath79.txt +++ b/Documentation/devicetree/bindings/spi/spi-ath79.txt @@ -3,7 +3,7 @@ Binding for Qualcomm Atheros AR7xxx/AR9xxx SPI controller Required properties: - compatible: has to be "qca,<soc-type>-spi", "qca,ar7100-spi" as fallback. - reg: Base address and size of the controllers memory area -- clocks: phandle to the AHB clock. +- clocks: phandle of the AHB clock. - clock-names: has to be "ahb". - #address-cells: <1>, as required by generic SPI binding. - #size-cells: <0>, also as required by generic SPI binding. @@ -12,9 +12,9 @@ Child nodes as per the generic SPI binding. Example: - spi@1F000000 { + spi@1f000000 { compatible = "qca,ar9132-spi", "qca,ar7100-spi"; - reg = <0x1F000000 0x10>; + reg = <0x1f000000 0x10>; clocks = <&pll 2>; clock-names = "ahb"; diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904 index 014f112e2a14..57fffe33ebfc 100644 --- a/Documentation/hwmon/nct7904 +++ b/Documentation/hwmon/nct7904 @@ -35,11 +35,11 @@ temp1_input Local temperature (1/1000 degree, temp[2-9]_input CPU temperatures (1/1000 degree, 0.125 degree resolution) -fan[1-4]_mode R/W, 0/1 for manual or SmartFan mode +pwm[1-4]_enable R/W, 1/2 for manual or SmartFan mode Setting SmartFan mode is supported only if it has been previously configured by BIOS (or configuration EEPROM) -fan[1-4]_pwm R/O in SmartFan mode, R/W in manual control mode +pwm[1-4] R/O in SmartFan mode, R/W in manual control mode The driver checks sensor control registers and does not export the sensors that are not enabled. Anyway, a sensor that is enabled may actually be not diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt index c86f2f1ae4f6..1fec1135791d 100644 --- a/Documentation/input/alps.txt +++ b/Documentation/input/alps.txt @@ -119,8 +119,10 @@ ALPS Absolute Mode - Protocol Version 2 byte 5: 0 z6 z5 z4 z3 z2 z1 z0 Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for -the DualPoint Stick. For non interleaved dualpoint devices the pointingstick -buttons get reported separately in the PSM, PSR and PSL bits. +the DualPoint Stick. The M, R and L bits signal the combined status of both +the pointingstick and touchpad buttons, except for Dell dualpoint devices +where the pointingstick buttons get reported separately in the PSM, PSR +and PSL bits. Dualpoint device -- interleaved packet format --------------------------------------------- diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 611c52267d24..141f847c7648 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -124,6 +124,8 @@ Code Seq#(hex) Include File Comments 'H' 00-7F linux/hiddev.h conflict! 'H' 00-0F linux/hidraw.h conflict! 'H' 01 linux/mei.h conflict! +'H' 02 linux/mei.h conflict! +'H' 03 linux/mei.h conflict! 'H' 00-0F sound/asound.h conflict! 'H' 20-40 sound/asound_fm.h conflict! 'H' 80-8F sound/sfnt_info.h conflict! diff --git a/Documentation/misc-devices/mei/mei.txt b/Documentation/misc-devices/mei/mei.txt index 8d47501bba0a..91c1fa34f48b 100644 --- a/Documentation/misc-devices/mei/mei.txt +++ b/Documentation/misc-devices/mei/mei.txt @@ -96,7 +96,7 @@ A code snippet for an application communicating with Intel AMTHI client: IOCTL ===== -The Intel MEI Driver supports the following IOCTL command: +The Intel MEI Driver supports the following IOCTL commands: IOCTL_MEI_CONNECT_CLIENT Connect to firmware Feature (client). usage: @@ -125,6 +125,49 @@ The Intel MEI Driver supports the following IOCTL command: data that can be sent or received. (e.g. if MTU=2K, can send requests up to bytes 2k and received responses up to 2k bytes). + IOCTL_MEI_NOTIFY_SET: enable or disable event notifications + + Usage: + uint32_t enable; + ioctl(fd, IOCTL_MEI_NOTIFY_SET, &enable); + + Inputs: + uint32_t enable = 1; + or + uint32_t enable[disable] = 0; + + Error returns: + EINVAL Wrong IOCTL Number + ENODEV Device is not initialized or the client not connected + ENOMEM Unable to allocate memory to client internal data. + EFAULT Fatal Error (e.g. Unable to access user input data) + EOPNOTSUPP if the device doesn't support the feature + + Notes: + The client must be connected in order to enable notification events + + + IOCTL_MEI_NOTIFY_GET : retrieve event + + Usage: + uint32_t event; + ioctl(fd, IOCTL_MEI_NOTIFY_GET, &event); + + Outputs: + 1 - if an event is pending + 0 - if there is no even pending + + Error returns: + EINVAL Wrong IOCTL Number + ENODEV Device is not initialized or the client not connected + ENOMEM Unable to allocate memory to client internal data. + EFAULT Fatal Error (e.g. Unable to access user input data) + EOPNOTSUPP if the device doesn't support the feature + + Notes: + The client must be connected and event notification has to be enabled + in order to receive an event + Intel ME Applications ===================== diff --git a/Documentation/nvmem/nvmem.txt b/Documentation/nvmem/nvmem.txt new file mode 100644 index 000000000000..dbd40d879239 --- /dev/null +++ b/Documentation/nvmem/nvmem.txt @@ -0,0 +1,152 @@ + NVMEM SUBSYSTEM + Srinivas Kandagatla <srinivas.kandagatla@linaro.org> + +This document explains the NVMEM Framework along with the APIs provided, +and how to use it. + +1. Introduction +=============== +*NVMEM* is the abbreviation for Non Volatile Memory layer. It is used to +retrieve configuration of SOC or Device specific data from non volatile +memories like eeprom, efuses and so on. + +Before this framework existed, NVMEM drivers like eeprom were stored in +drivers/misc, where they all had to duplicate pretty much the same code to +register a sysfs file, allow in-kernel users to access the content of the +devices they were driving, etc. + +This was also a problem as far as other in-kernel users were involved, since +the solutions used were pretty much different from one driver to another, there +was a rather big abstraction leak. + +This framework aims at solve these problems. It also introduces DT +representation for consumer devices to go get the data they require (MAC +Addresses, SoC/Revision ID, part numbers, and so on) from the NVMEMs. This +framework is based on regmap, so that most of the abstraction available in +regmap can be reused, across multiple types of buses. + +NVMEM Providers ++++++++++++++++ + +NVMEM provider refers to an entity that implements methods to initialize, read +and write the non-volatile memory. + +2. Registering/Unregistering the NVMEM provider +=============================================== + +A NVMEM provider can register with NVMEM core by supplying relevant +nvmem configuration to nvmem_register(), on success core would return a valid +nvmem_device pointer. + +nvmem_unregister(nvmem) is used to unregister a previously registered provider. + +For example, a simple qfprom case: + +static struct nvmem_config econfig = { + .name = "qfprom", + .owner = THIS_MODULE, +}; + +static int qfprom_probe(struct platform_device *pdev) +{ + ... + econfig.dev = &pdev->dev; + nvmem = nvmem_register(&econfig); + ... +} + +It is mandatory that the NVMEM provider has a regmap associated with its +struct device. Failure to do would return error code from nvmem_register(). + +NVMEM Consumers ++++++++++++++++ + +NVMEM consumers are the entities which make use of the NVMEM provider to +read from and to NVMEM. + +3. NVMEM cell based consumer APIs +================================= + +NVMEM cells are the data entries/fields in the NVMEM. +The NVMEM framework provides 3 APIs to read/write NVMEM cells. + +struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name); +struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name); + +void nvmem_cell_put(struct nvmem_cell *cell); +void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); + +void *nvmem_cell_read(struct nvmem_cell *cell, ssize_t *len); +int nvmem_cell_write(struct nvmem_cell *cell, void *buf, ssize_t len); + +*nvmem_cell_get() apis will get a reference to nvmem cell for a given id, +and nvmem_cell_read/write() can then read or write to the cell. +Once the usage of the cell is finished the consumer should call *nvmem_cell_put() +to free all the allocation memory for the cell. + +4. Direct NVMEM device based consumer APIs +========================================== + +In some instances it is necessary to directly read/write the NVMEM. +To facilitate such consumers NVMEM framework provides below apis. + +struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); +struct nvmem_device *devm_nvmem_device_get(struct device *dev, + const char *name); +void nvmem_device_put(struct nvmem_device *nvmem); +int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset, + size_t bytes, void *buf); +int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset, + size_t bytes, void *buf); +int nvmem_device_cell_read(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf); +int nvmem_device_cell_write(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf); + +Before the consumers can read/write NVMEM directly, it should get hold +of nvmem_controller from one of the *nvmem_device_get() api. + +The difference between these apis and cell based apis is that these apis always +take nvmem_device as parameter. + +5. Releasing a reference to the NVMEM +===================================== + +When a consumers no longer needs the NVMEM, it has to release the reference +to the NVMEM it has obtained using the APIs mentioned in the above section. +The NVMEM framework provides 2 APIs to release a reference to the NVMEM. + +void nvmem_cell_put(struct nvmem_cell *cell); +void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); +void nvmem_device_put(struct nvmem_device *nvmem); +void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem); + +Both these APIs are used to release a reference to the NVMEM and +devm_nvmem_cell_put and devm_nvmem_device_put destroys the devres associated +with this NVMEM. + +Userspace ++++++++++ + +6. Userspace binary interface +============================== + +Userspace can read/write the raw NVMEM file located at +/sys/bus/nvmem/devices/*/nvmem + +ex: + +hexdump /sys/bus/nvmem/devices/qfprom0/nvmem + +0000000 0000 0000 0000 0000 0000 0000 0000 0000 +* +00000a0 db10 2240 0000 e000 0c00 0c00 0000 0c00 +0000000 0000 0000 0000 0000 0000 0000 0000 0000 +... +* +0001000 + +7. DeviceTree Binding +===================== + +See Documentation/devicetree/bindings/nvmem/nvmem.txt diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index 2850df3bf957..2fc909502db5 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -72,7 +72,7 @@ More details follow: | v Disable regular cpu hotplug - by setting cpu_hotplug_disabled=1 + by increasing cpu_hotplug_disabled | v Release cpu_add_remove_lock @@ -89,7 +89,7 @@ Resuming back is likewise, with the counterparts being (in the order of execution during resume): * enable_nonboot_cpus() which involves: | Acquire cpu_add_remove_lock - | Reset cpu_hotplug_disabled to 0, thereby enabling regular cpu hotplug + | Decrease cpu_hotplug_disabled, thereby enabling regular cpu hotplug | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop] | Release cpu_add_remove_lock v @@ -120,7 +120,7 @@ after the entire cycle is complete (i.e., suspend + resume). Acquire cpu_add_remove_lock | v - If cpu_hotplug_disabled is 1 + If cpu_hotplug_disabled > 0 return gracefully | | diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX index 10c874ebdfe5..9189535f6cd2 100644 --- a/Documentation/s390/00-INDEX +++ b/Documentation/s390/00-INDEX @@ -16,8 +16,6 @@ Debugging390.txt - hints for debugging on s390 systems. driver-model.txt - information on s390 devices and the driver model. -kvm.txt - - ioctl calls to /dev/kvm on s390. monreader.txt - information on accessing the z/VM monitor stream from Linux. qeth.txt diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt deleted file mode 100644 index 85f3280d7ef6..000000000000 --- a/Documentation/s390/kvm.txt +++ /dev/null @@ -1,125 +0,0 @@ -*** BIG FAT WARNING *** -The kvm module is currently in EXPERIMENTAL state for s390. This means that -the interface to the module is not yet considered to remain stable. Thus, be -prepared that we keep breaking your userspace application and guest -compatibility over and over again until we feel happy with the result. Make sure -your guest kernel, your host kernel, and your userspace launcher are in a -consistent state. - -This Documentation describes the unique ioctl calls to /dev/kvm, the resulting -kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86. - -1. ioctl calls to /dev/kvm -KVM does support the following ioctls on s390 that are common with other -architectures and do behave the same: -KVM_GET_API_VERSION -KVM_CREATE_VM (*) see note -KVM_CHECK_EXTENSION -KVM_GET_VCPU_MMAP_SIZE - -Notes: -* KVM_CREATE_VM may fail on s390, if the calling process has multiple -threads and has not called KVM_S390_ENABLE_SIE before. - -In addition, on s390 the following architecture specific ioctls are supported: -ioctl: KVM_S390_ENABLE_SIE -args: none -see also: include/linux/kvm.h -This call causes the kernel to switch on PGSTE in the user page table. This -operation is needed in order to run a virtual machine, and it requires the -calling process to be single-threaded. Note that the first call to KVM_CREATE_VM -will implicitly try to switch on PGSTE if the user process has not called -KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads -before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will -observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time -operation, is not reversible, and will persist over the entire lifetime of -the calling process. It does not have any user-visible effect other than a small -performance penalty. - -2. ioctl calls to the kvm-vm file descriptor -KVM does support the following ioctls on s390 that are common with other -architectures and do behave the same: -KVM_CREATE_VCPU -KVM_SET_USER_MEMORY_REGION (*) see note -KVM_GET_DIRTY_LOG (**) see note - -Notes: -* kvm does only allow exactly one memory slot on s390, which has to start - at guest absolute address zero and at a user address that is aligned on any - page boundary. This hardware "limitation" allows us to have a few unique - optimizations. The memory slot doesn't have to be filled - with memory actually, it may contain sparse holes. That said, with different - user memory layout this does still allow a large flexibility when - doing the guest memory setup. -** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty -log. This ioctl call is only needed for guest migration, and we intend to -implement this one in the future. - -In addition, on s390 the following architecture specific ioctls for the kvm-vm -file descriptor are supported: -ioctl: KVM_S390_INTERRUPT -args: struct kvm_s390_interrupt * -see also: include/linux/kvm.h -This ioctl is used to submit a floating interrupt for a virtual machine. -Floating interrupts may be delivered to any virtual cpu in the configuration. -Only some interrupt types defined in include/linux/kvm.h make sense when -submitted as floating interrupts. The following interrupts are not considered -to be useful as floating interrupts, and a call to inject them will result in --EINVAL error code: program interrupts and interprocessor signals. Valid -floating interrupts are: -KVM_S390_INT_VIRTIO -KVM_S390_INT_SERVICE - -3. ioctl calls to the kvm-vcpu file descriptor -KVM does support the following ioctls on s390 that are common with other -architectures and do behave the same: -KVM_RUN -KVM_GET_REGS -KVM_SET_REGS -KVM_GET_SREGS -KVM_SET_SREGS -KVM_GET_FPU -KVM_SET_FPU - -In addition, on s390 the following architecture specific ioctls for the -kvm-vcpu file descriptor are supported: -ioctl: KVM_S390_INTERRUPT -args: struct kvm_s390_interrupt * -see also: include/linux/kvm.h -This ioctl is used to submit an interrupt for a specific virtual cpu. -Only some interrupt types defined in include/linux/kvm.h make sense when -submitted for a specific cpu. The following interrupts are not considered -to be useful, and a call to inject them will result in -EINVAL error code: -service processor calls and virtio interrupts. Valid interrupt types are: -KVM_S390_PROGRAM_INT -KVM_S390_SIGP_STOP -KVM_S390_RESTART -KVM_S390_SIGP_SET_PREFIX -KVM_S390_INT_EMERGENCY - -ioctl: KVM_S390_STORE_STATUS -args: unsigned long -see also: include/linux/kvm.h -This ioctl stores the state of the cpu at the guest real address given as -argument, unless one of the following values defined in include/linux/kvm.h -is given as argument: -KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in -absolute lowcore as defined by the principles of operation -KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in -its prefix page just like the dump tool that comes with zipl. This is useful -to create a system dump for use with lkcdutils or crash. - -ioctl: KVM_S390_SET_INITIAL_PSW -args: struct kvm_s390_psw * -see also: include/linux/kvm.h -This ioctl can be used to set the processor status word (psw) of a stopped cpu -prior to running it with KVM_RUN. Note that this call is not required to modify -the psw during sie intercepts that fall back to userspace because struct kvm_run -does contain the psw, and this value is evaluated during reentry of KVM_RUN -after the intercept exit was recognized. - -ioctl: KVM_S390_INITIAL_RESET -args: none -see also: include/linux/kvm.h -This ioctl can be used to perform an initial cpu reset as defined by the -principles of operation. The target cpu has to be in stopped state. diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 949de191fcdc..cda56df9b8a7 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -199,7 +199,8 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "#include <linux/string.h>\n" buf += "#include <linux/configfs.h>\n" buf += "#include <linux/ctype.h>\n" - buf += "#include <asm/unaligned.h>\n\n" + buf += "#include <asm/unaligned.h>\n" + buf += "#include <scsi/scsi_proto.h>\n\n" buf += "#include <target/target_core_base.h>\n" buf += "#include <target/target_core_fabric.h>\n" buf += "#include <target/target_core_fabric_configfs.h>\n" @@ -230,8 +231,14 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " }\n" buf += " tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n" buf += " tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n" - buf += " ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n" - buf += " &tpg->se_tpg, SCSI_PROTOCOL_SAS);\n" + + if proto_ident == "FC": + buf += " ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_FCP);\n" + elif proto_ident == "SAS": + buf += " ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_SAS);\n" + elif proto_ident == "iSCSI": + buf += " ret = core_tpg_register(wwn, &tpg->se_tpg, SCSI_PROTOCOL_ISCSI);\n" + buf += " if (ret < 0) {\n" buf += " kfree(tpg);\n" buf += " return NULL;\n" @@ -292,7 +299,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n" buf += " .module = THIS_MODULE,\n" - buf += " .name = " + fabric_mod_name + ",\n" + buf += " .name = \"" + fabric_mod_name + "\",\n" buf += " .get_fabric_name = " + fabric_mod_name + "_get_fabric_name,\n" buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n" buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n" @@ -322,17 +329,17 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .fabric_make_tpg = " + fabric_mod_name + "_make_tpg,\n" buf += " .fabric_drop_tpg = " + fabric_mod_name + "_drop_tpg,\n" buf += "\n" - buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs;\n" + buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs,\n" buf += "};\n\n" buf += "static int __init " + fabric_mod_name + "_init(void)\n" buf += "{\n" - buf += " return target_register_template(" + fabric_mod_name + "_ops);\n" + buf += " return target_register_template(&" + fabric_mod_name + "_ops);\n" buf += "};\n\n" buf += "static void __exit " + fabric_mod_name + "_exit(void)\n" buf += "{\n" - buf += " target_unregister_template(" + fabric_mod_name + "_ops);\n" + buf += " target_unregister_template(&" + fabric_mod_name + "_ops);\n" buf += "};\n\n" buf += "MODULE_DESCRIPTION(\"" + fabric_mod_name.upper() + " series fabric driver\");\n" diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt index 77d14d51a670..0a5c3290e732 100644 --- a/Documentation/trace/coresight.txt +++ b/Documentation/trace/coresight.txt @@ -15,7 +15,7 @@ HW assisted tracing is becoming increasingly useful when dealing with systems that have many SoCs and other components like GPU and DMA engines. ARM has developed a HW assisted tracing solution by means of different components, each being added to a design at synthesis time to cater to specific tracing needs. -Compoments are generally categorised as source, link and sinks and are +Components are generally categorised as source, link and sinks and are (usually) discovered using the AMBA bus. "Sources" generate a compressed stream representing the processor instruction @@ -138,7 +138,7 @@ void coresight_unregister(struct coresight_device *csdev); The registering function is taking a "struct coresight_device *csdev" and register the device with the core framework. The unregister function takes -a reference to a "strut coresight_device", obtained at registration time. +a reference to a "struct coresight_device", obtained at registration time. If everything goes well during the registration process the new devices will show up under /sys/bus/coresight/devices, as showns here for a TC2 platform: diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7926a90156f..a4ebcb712375 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3277,6 +3277,7 @@ should put the acknowledged interrupt vector into the 'epr' field. struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; __u64 flags; } system_event; @@ -3296,6 +3297,10 @@ Valid values for 'type' are: KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM. As with SHUTDOWN, userspace can choose to ignore the request, or to schedule the reset to occur in the future and may call KVM_RUN again. + KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest + has requested a crash condition maintenance. Userspace can choose + to ignore the request, or to gather VM memory core dump and/or + reset/shutdown of the VM. /* Fix the size of the union. */ char padding[256]; diff --git a/MAINTAINERS b/MAINTAINERS index 9289ecb57b68..7899b3f5bec9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3587,6 +3587,15 @@ S: Maintained F: drivers/gpu/drm/rockchip/ F: Documentation/devicetree/bindings/video/rockchip* +DRM DRIVERS FOR STI +M: Benjamin Gaignard <benjamin.gaignard@linaro.org> +M: Vincent Abriou <vincent.abriou@st.com> +L: dri-devel@lists.freedesktop.org +T: git http://git.linaro.org/people/benjamin.gaignard/kernel.git +S: Maintained +F: drivers/gpu/drm/sti +F: Documentation/devicetree/bindings/gpu/st,stih4xx.txt + DSBR100 USB FM RADIO DRIVER M: Alexey Klimov <klimov.linux@gmail.com> L: linux-media@vger.kernel.org @@ -4957,6 +4966,7 @@ F: drivers/scsi/storvsc_drv.c F: drivers/video/fbdev/hyperv_fb.c F: include/linux/hyperv.h F: tools/hv/ +F: Documentation/ABI/stable/sysfs-bus-vmbus I2C OVER PARALLEL PORT M: Jean Delvare <jdelvare@suse.com> @@ -5600,6 +5610,7 @@ F: kernel/irq/ IRQCHIP DRIVERS M: Thomas Gleixner <tglx@linutronix.de> M: Jason Cooper <jason@lakedaemon.net> +M: Marc Zyngier <marc.zyngier@arm.com> L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core @@ -5608,11 +5619,14 @@ F: Documentation/devicetree/bindings/interrupt-controller/ F: drivers/irqchip/ IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY) -M: Benjamin Herrenschmidt <benh@kernel.crashing.org> +M: Jiang Liu <jiang.liu@linux.intel.com> +M: Marc Zyngier <marc.zyngier@arm.com> S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core F: Documentation/IRQ-domain.txt F: include/linux/irqdomain.h F: kernel/irq/irqdomain.c +F: kernel/irq/msi.c ISAPNP M: Jaroslav Kysela <perex@perex.cz> @@ -5836,6 +5850,7 @@ S: Odd Fixes KERNEL NFSD, SUNRPC, AND LOCKD SERVERS M: "J. Bruce Fields" <bfields@fieldses.org> +M: Jeff Layton <jlayton@poochiereds.net> L: linux-nfs@vger.kernel.org W: http://nfs.sourceforge.net/ S: Supported @@ -7284,6 +7299,15 @@ S: Supported F: drivers/block/nvme* F: include/linux/nvme.h +NVMEM FRAMEWORK +M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> +M: Maxime Ripard <maxime.ripard@free-electrons.com> +S: Maintained +F: drivers/nvmem/ +F: Documentation/devicetree/bindings/nvmem/ +F: include/linux/nvmem-consumer.h +F: include/linux/nvmem-provider.h + NXP-NCI NFC DRIVER M: Clément Perrochaud <clement.perrochaud@effinnov.com> R: Charles Gorand <charles.gorand@effinnov.com> @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = NAME = Hurr durr I'ma sheep # *DOCUMENTATION* @@ -597,6 +597,11 @@ endif # $(dot-config) # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +# The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default +# values of the respective KBUILD_* variables +ARCH_CPPFLAGS := +ARCH_AFLAGS := +ARCH_CFLAGS := include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) @@ -848,10 +853,10 @@ export mod_strip_cmd mod_compress_cmd = true ifdef CONFIG_MODULE_COMPRESS ifdef CONFIG_MODULE_COMPRESS_GZIP - mod_compress_cmd = gzip -n + mod_compress_cmd = gzip -n -f endif # CONFIG_MODULE_COMPRESS_GZIP ifdef CONFIG_MODULE_COMPRESS_XZ - mod_compress_cmd = xz + mod_compress_cmd = xz -f endif # CONFIG_MODULE_COMPRESS_XZ endif # CONFIG_MODULE_COMPRESS export mod_compress_cmd diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 91cf4055acab..bd4670d1b89b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -313,11 +313,11 @@ config ARC_PAGE_SIZE_8K config ARC_PAGE_SIZE_16K bool "16KB" - depends on ARC_MMU_V3 + depends on ARC_MMU_V3 || ARC_MMU_V4 config ARC_PAGE_SIZE_4K bool "4KB" - depends on ARC_MMU_V3 + depends on ARC_MMU_V3 || ARC_MMU_V4 endchoice @@ -365,6 +365,11 @@ config ARC_HAS_LLSC default y depends on !ARC_CANT_LLSC +config ARC_STAR_9000923308 + bool "Workaround for llock/scond livelock" + default y + depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC + config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y @@ -379,6 +384,10 @@ config ARC_HAS_LL64 dest operands with 2 possible source operands. default y +config ARC_HAS_DIV_REM + bool "Insn: div, divu, rem, remu" + default y + config ARC_HAS_RTC bool "Local 64-bit r/o cycle counter" default n diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 46d87310220d..8a27a48304a4 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -36,8 +36,16 @@ cflags-$(atleast_gcc44) += -fsection-anchors cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape +ifdef CONFIG_ISA_ARCV2 + ifndef CONFIG_ARC_HAS_LL64 -cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64 +cflags-y += -mno-ll64 +endif + +ifndef CONFIG_ARC_HAS_DIV_REM +cflags-y += -mno-div-rem +endif + endif cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 070f58827a5c..c8f57b8449dc 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -89,11 +89,10 @@ #define ECR_C_BIT_DTLB_LD_MISS 8 #define ECR_C_BIT_DTLB_ST_MISS 9 - /* Auxiliary registers */ #define AUX_IDENTITY 4 #define AUX_INTR_VEC_BASE 0x25 - +#define AUX_NON_VOL 0x5e /* * Floating Pt Registers @@ -240,9 +239,9 @@ struct bcr_extn_xymem { struct bcr_perip { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int start:8, pad2:8, sz:8, pad:8; + unsigned int start:8, pad2:8, sz:8, ver:8; #else - unsigned int pad:8, sz:8, pad2:8, start:8; + unsigned int ver:8, sz:8, pad2:8, start:8; #endif }; diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 03484cb4d16d..87d18ae53115 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -23,33 +23,60 @@ #define atomic_set(v, i) (((v)->counter) = (i)) -#ifdef CONFIG_ISA_ARCV2 -#define PREFETCHW " prefetchw [%1] \n" -#else -#define PREFETCHW +#ifdef CONFIG_ARC_STAR_9000923308 + +#define SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int delay = 1, tmp; \ + +#define SCOND_FAIL_RETRY_ASM \ + " bz 4f \n" \ + " ; --- scond fail delay --- \n" \ + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ + " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " b 1b \n" /* start over */ \ + "4: ; --- success --- \n" \ + +#define SCOND_FAIL_RETRY_VARS \ + ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ + +#else /* !CONFIG_ARC_STAR_9000923308 */ + +#define SCOND_FAIL_RETRY_VAR_DEF + +#define SCOND_FAIL_RETRY_ASM \ + " bnz 1b \n" \ + +#define SCOND_FAIL_RETRY_VARS + #endif #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - unsigned int temp; \ + unsigned int val; \ + SCOND_FAIL_RETRY_VAR_DEF \ \ __asm__ __volatile__( \ - "1: \n" \ - PREFETCHW \ - " llock %0, [%1] \n" \ - " " #asm_op " %0, %0, %2 \n" \ - " scond %0, [%1] \n" \ - " bnz 1b \n" \ - : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ - : "r"(&v->counter), "ir"(i) \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ + [i] "ir" (i) \ : "cc"); \ } \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ - unsigned int temp; \ + unsigned int val; \ + SCOND_FAIL_RETRY_VAR_DEF \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -58,19 +85,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ smp_mb(); \ \ __asm__ __volatile__( \ - "1: \n" \ - PREFETCHW \ - " llock %0, [%1] \n" \ - " " #asm_op " %0, %0, %2 \n" \ - " scond %0, [%1] \n" \ - " bnz 1b \n" \ - : "=&r"(temp) \ - : "r"(&v->counter), "ir"(i) \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val) \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ : "cc"); \ \ smp_mb(); \ \ - return temp; \ + return val; \ } #else /* !CONFIG_ARC_HAS_LLSC */ @@ -150,6 +179,9 @@ ATOMIC_OP(and, &=, and) #undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP +#undef SCOND_FAIL_RETRY_VAR_DEF +#undef SCOND_FAIL_RETRY_ASM +#undef SCOND_FAIL_RETRY_VARS /** * __atomic_add_unless - add unless the number is a given value diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 91694ec1ce95..69095da1fcfd 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -20,20 +20,20 @@ struct pt_regs { /* Real registers */ - long bta; /* bta_l1, bta_l2, erbta */ + unsigned long bta; /* bta_l1, bta_l2, erbta */ - long lp_start, lp_end, lp_count; + unsigned long lp_start, lp_end, lp_count; - long status32; /* status32_l1, status32_l2, erstatus */ - long ret; /* ilink1, ilink2 or eret */ - long blink; - long fp; - long r26; /* gp */ + unsigned long status32; /* status32_l1, status32_l2, erstatus */ + unsigned long ret; /* ilink1, ilink2 or eret */ + unsigned long blink; + unsigned long fp; + unsigned long r26; /* gp */ - long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; + unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; - long sp; /* user/kernel sp depending on where we came from */ - long orig_r0; + unsigned long sp; /* User/Kernel depending on where we came from */ + unsigned long orig_r0; /* * To distinguish bet excp, syscall, irq @@ -55,13 +55,13 @@ struct pt_regs { unsigned long event; }; - long user_r25; + unsigned long user_r25; }; #else struct pt_regs { - long orig_r0; + unsigned long orig_r0; union { struct { @@ -76,26 +76,26 @@ struct pt_regs { unsigned long event; }; - long bta; /* bta_l1, bta_l2, erbta */ + unsigned long bta; /* bta_l1, bta_l2, erbta */ - long user_r25; + unsigned long user_r25; - long r26; /* gp */ - long fp; - long sp; /* user/kernel sp depending on where we came from */ + unsigned long r26; /* gp */ + unsigned long fp; + unsigned long sp; /* user/kernel sp depending on where we came from */ - long r12; + unsigned long r12; /*------- Below list auto saved by h/w -----------*/ - long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; + unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; - long blink; - long lp_end, lp_start, lp_count; + unsigned long blink; + unsigned long lp_end, lp_start, lp_count; - long ei, ldi, jli; + unsigned long ei, ldi, jli; - long ret; - long status32; + unsigned long ret; + unsigned long status32; }; #endif @@ -103,10 +103,10 @@ struct pt_regs { /* Callee saved registers - need to be saved only when you are scheduled out */ struct callee_regs { - long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) (unsigned long)((regs)->ret) +#define instruction_pointer(regs) ((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ @@ -142,7 +142,7 @@ struct callee_regs { static inline long regs_return_value(struct pt_regs *regs) { - return regs->r0; + return (long)regs->r0; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index e1651df6a93d..db8c59d1eaeb 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -18,9 +18,518 @@ #define arch_spin_unlock_wait(x) \ do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) +#ifdef CONFIG_ARC_HAS_LLSC + +/* + * A normal LLOCK/SCOND based system, w/o need for livelock workaround + */ +#ifndef CONFIG_ARC_STAR_9000923308 + static inline void arch_spin_lock(arch_spinlock_t *lock) { - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + unsigned int val; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned int val, got_it = 0; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bnz 1b \n" + " mov %[got_it], 1 \n" + "4: \n" + " \n" + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + smp_mb(); + + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + + smp_mb(); +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) + */ + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * zero means writer holds the lock exclusively, deny Reader. + * Otherwise grant lock to first/subseq reader + * + * if (rw->counter > 0) { + * rw->counter--; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ + " sub %[val], %[val], 1 \n" /* reader lock */ + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ + " sub %[val], %[val], 1 \n" /* counter-- */ + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" /* retry if collided with someone */ + " mov %[got_it], 1 \n" + " \n" + "4: ; --- done --- \n" + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), + * deny writer. Otherwise if unlocked grant to writer + * Hence the claim that Linux rwlocks are unfair to writers. + * (can be starved for an indefinite time by readers). + * + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { + * rw->counter = 0; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" /* retry if collided with someone */ + " mov %[got_it], 1 \n" + " \n" + "4: ; --- done --- \n" + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter++; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " add %[val], %[val], 1 \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)) + : "memory", "cc"); + + smp_mb(); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + smp_mb(); + + rw->counter = __ARCH_RW_LOCK_UNLOCKED__; + + smp_mb(); +} + +#else /* CONFIG_ARC_STAR_9000923308 */ + +/* + * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping + * coherency transactions in the SCU. The exclusive line state keeps rotating + * among contenting cores leading to a never ending cycle. So break the cycle + * by deferring the retry of failed exclusive access (SCOND). The actual delay + * needed is function of number of contending cores as well as the unrelated + * coherency traffic from other cores. To keep the code simple, start off with + * small delay of 1 which would suffice most cases and in case of contention + * double the delay. Eventually the delay is sufficient such that the coherency + * pipeline is drained, thus a subsequent exclusive access would succeed. + */ + +#define SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int delay, tmp; \ + +#define SCOND_FAIL_RETRY_ASM \ + " ; --- scond fail delay --- \n" \ + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ + " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " b 1b \n" /* start over */ \ + " \n" \ + "4: ; --- done --- \n" \ + +#define SCOND_FAIL_RETRY_VARS \ + ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bz 4f \n" /* done */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + smp_mb(); + + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + + smp_mb(); +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) + */ + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + /* + * zero means writer holds the lock exclusively, deny Reader. + * Otherwise grant lock to first/subseq reader + * + * if (rw->counter > 0) { + * rw->counter--; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */ + " sub %[val], %[val], 1 \n" /* reader lock */ + " scond %[val], [%[rwlock]] \n" + " bz 4f \n" /* done */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ + " sub %[val], %[val], 1 \n" /* counter-- */ + " scond %[val], [%[rwlock]] \n" + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + /* + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), + * deny writer. Otherwise if unlocked grant to writer + * Hence the claim that Linux rwlocks are unfair to writers. + * (can be starved for an indefinite time by readers). + * + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { + * rw->counter = 0; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bz 4f \n" + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter++; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " add %[val], %[val], 1 \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)) + : "memory", "cc"); + + smp_mb(); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " scond %[UNLOCKED], [%[rwlock]]\n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +#undef SCOND_FAIL_RETRY_VAR_DEF +#undef SCOND_FAIL_RETRY_ASM +#undef SCOND_FAIL_RETRY_VARS + +#endif /* CONFIG_ARC_STAR_9000923308 */ + +#else /* !CONFIG_ARC_HAS_LLSC */ + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; /* * This smp_mb() is technically superfluous, we only need the one @@ -33,7 +542,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" - : "+&r" (tmp) + : "+&r" (val) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); @@ -48,26 +557,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) smp_mb(); } +/* 1 - lock taken successfully */ static inline int arch_spin_trylock(arch_spinlock_t *lock) { - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; smp_mb(); __asm__ __volatile__( "1: ex %0, [%1] \n" - : "+r" (tmp) + : "+r" (val) : "r"(&(lock->slock)) : "memory"); smp_mb(); - return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); + return (val == __ARCH_SPIN_LOCK_UNLOCKED__); } static inline void arch_spin_unlock(arch_spinlock_t *lock) { - unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__; /* * RELEASE barrier: given the instructions avail on ARCv2, full barrier @@ -77,7 +587,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) __asm__ __volatile__( " ex %0, [%1] \n" - : "+r" (tmp) + : "+r" (val) : "r"(&(lock->slock)) : "memory"); @@ -90,19 +600,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) /* * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) * * The spinlock itself is contained in @counter and access to it is * serialized with @lock_mutex. - * - * Unfair locking as Writers could be starved indefinitely by Reader(s) */ -/* Would read_trylock() succeed? */ -#define arch_read_can_lock(x) ((x)->counter > 0) - -/* Would write_trylock() succeed? */ -#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) - /* 1 - lock taken successfully */ static inline int arch_read_trylock(arch_rwlock_t *rw) { @@ -173,6 +676,11 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) arch_spin_unlock(&(rw->lock_mutex)); } +#endif + +#define arch_read_can_lock(x) ((x)->counter > 0) +#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) + #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/arc/include/asm/spinlock_types.h b/arch/arc/include/asm/spinlock_types.h index 662627ced4f2..4e1ef5f650c6 100644 --- a/arch/arc/include/asm/spinlock_types.h +++ b/arch/arc/include/asm/spinlock_types.h @@ -26,7 +26,9 @@ typedef struct { */ typedef struct { volatile unsigned int counter; +#ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t lock_mutex; +#endif } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000 diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 76a7739aab1c..0b3ef63d4a03 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -32,20 +32,20 @@ */ struct user_regs_struct { - long pad; + unsigned long pad; struct { - long bta, lp_start, lp_end, lp_count; - long status32, ret, blink, fp, gp; - long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; - long sp; + unsigned long bta, lp_start, lp_end, lp_count; + unsigned long status32, ret, blink, fp, gp; + unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; + unsigned long sp; } scratch; - long pad2; + unsigned long pad2; struct { - long r25, r24, r23, r22, r21, r20; - long r19, r18, r17, r16, r15, r14, r13; + unsigned long r25, r24, r23, r22, r21, r20; + unsigned long r19, r18, r17, r16, r15, r14, r13; } callee; - long efa; /* break pt addr, for break points in delay slots */ - long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */ + unsigned long efa; /* break pt addr, for break points in delay slots */ + unsigned long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */ }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 18cc01591c96..cabde9dc0696 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -47,6 +47,7 @@ static void read_arc_build_cfg_regs(void) struct bcr_perip uncached_space; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + unsigned long perip_space; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); @@ -56,7 +57,12 @@ static void read_arc_build_cfg_regs(void) cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); - BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE); + if (uncached_space.ver < 3) + perip_space = uncached_space.start << 24; + else + perip_space = read_aux_reg(AUX_NON_VOL) & 0xF0000000; + + BUG_ON(perip_space != ARC_UNCACHED_ADDR_SPACE); READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); @@ -330,6 +336,10 @@ static void arc_chk_core_config(void) pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n"); else if (!cpu->extn.fpu_dp && fpu_enabled) panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); + + if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic && + !IS_ENABLED(CONFIG_ARC_STAR_9000923308)) + panic("llock/scond livelock workaround missing\n"); } /* diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 3364d2bbc515..4294761a2b3e 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -203,34 +203,24 @@ static int arc_clkevent_set_next_event(unsigned long delta, return 0; } -static void arc_clkevent_set_mode(enum clock_event_mode mode, - struct clock_event_device *dev) +static int arc_clkevent_set_periodic(struct clock_event_device *dev) { - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - /* - * At X Hz, 1 sec = 1000ms -> X cycles; - * 10ms -> X / 100 cycles - */ - arc_timer_event_setup(arc_get_core_freq() / HZ); - break; - case CLOCK_EVT_MODE_ONESHOT: - break; - default: - break; - } - - return; + /* + * At X Hz, 1 sec = 1000ms -> X cycles; + * 10ms -> X / 100 cycles + */ + arc_timer_event_setup(arc_get_core_freq() / HZ); + return 0; } static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = { - .name = "ARC Timer0", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, - .mode = CLOCK_EVT_MODE_UNUSED, - .rating = 300, - .irq = TIMER0_IRQ, /* hardwired, no need for resources */ - .set_next_event = arc_clkevent_set_next_event, - .set_mode = arc_clkevent_set_mode, + .name = "ARC Timer0", + .features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_PERIODIC, + .rating = 300, + .irq = TIMER0_IRQ, /* hardwired, no need for resources */ + .set_next_event = arc_clkevent_set_next_event, + .set_state_periodic = arc_clkevent_set_periodic, }; static irqreturn_t timer_irq_handler(int irq, void *dev_id) @@ -240,7 +230,7 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id) * irq_set_chip_and_handler() asked for handle_percpu_devid_irq() */ struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device); - int irq_reenable = evt->mode == CLOCK_EVT_MODE_PERIODIC; + int irq_reenable = clockevent_state_periodic(evt); /* * Any write to CTRL reg ACks the interrupt, we rewrite the diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S index 1b2b3acfed52..0cab0b8a57c5 100644 --- a/arch/arc/lib/memcpy-archs.S +++ b/arch/arc/lib/memcpy-archs.S @@ -206,7 +206,7 @@ unalignedOffby3: ld.ab r6, [r1, 4] prefetch [r1, 28] ;Prefetch the next read location ld.ab r8, [r1,4] - prefetch [r3, 32] ;Prefetch the next write location + prefetchw [r3, 32] ;Prefetch the next write location SHIFT_1 (r7, r6, 8) or r7, r7, r5 diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index 92d573c734b5..365b18364815 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -10,12 +10,6 @@ #undef PREALLOC_NOT_AVAIL -#ifdef PREALLOC_NOT_AVAIL -#define PREWRITE(A,B) prefetchw [(A),(B)] -#else -#define PREWRITE(A,B) prealloc [(A),(B)] -#endif - ENTRY(memset) prefetchw [r0] ; Prefetch the write location mov.f 0, r2 @@ -51,9 +45,15 @@ ENTRY(memset) ;;; Convert len to Dwords, unfold x8 lsr.f lp_count, lp_count, 6 + lpnz @.Lset64bytes ;; LOOP START - PREWRITE(r3, 64) ;Prefetch the next write location +#ifdef PREALLOC_NOT_AVAIL + prefetchw [r3, 64] ;Prefetch the next write location +#else + prealloc [r3, 64] +#endif +#ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] std.ab r4, [r3, 8] @@ -62,16 +62,45 @@ ENTRY(memset) std.ab r4, [r3, 8] std.ab r4, [r3, 8] std.ab r4, [r3, 8] +#else + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] +#endif .Lset64bytes: lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes lpnz .Lset32bytes ;; LOOP START prefetchw [r3, 32] ;Prefetch the next write location +#ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] std.ab r4, [r3, 8] std.ab r4, [r3, 8] +#else + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] +#endif .Lset32bytes: and.f lp_count, r2, 0x1F ;Last remaining 31 bytes diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 99f7da513a48..e7769c3ab5f2 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -389,6 +389,21 @@ axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od) static void __init axs103_early_init(void) { + /* + * AXS103 configurations for SMP/QUAD configurations share device tree + * which defaults to 90 MHz. However recent failures of Quad config + * revealed P&R timing violations so clamp it down to safe 50 MHz + * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack + * + * This hack is really hacky as of now. Fix it properly by getting the + * number of cores as return value of platform's early SMP callback + */ +#ifdef CONFIG_ARC_MCIP + unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; + if (num_cores > 2) + arc_set_core_freq(50 * 1000000); +#endif + switch (arc_get_core_freq()/1000000) { case 33: axs103_set_freq(1, 1, 1); diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 07ab3d203916..7451b447cc2d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -312,6 +312,9 @@ INSTALL_TARGETS = zinstall uinstall install PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS) +bootpImage uImage: zImage +zImage: Image + $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 8f1e25bcecbd..1e29ccf77ea2 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -116,7 +116,7 @@ ranges = <0 0x2000 0x2000>; scm_conf: scm_conf@0 { - compatible = "syscon"; + compatible = "syscon", "simple-bus"; reg = <0x0 0x1400>; #address-cells = <1>; #size-cells = <1>; @@ -1140,6 +1140,7 @@ ctrl-module = <&omap_control_sata>; clocks = <&sys_clkin1>, <&sata_ref_clk>; clock-names = "sysclk", "refclk"; + syscon-pllreset = <&scm_conf 0x3fc>; #phy-cells = <0>; }; diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index d7201333e3bc..2db99433e17f 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -138,8 +138,8 @@ mipi_phy: video-phy@10020710 { compatible = "samsung,s5pv210-mipi-video-phy"; - reg = <0x10020710 8>; #phy-cells = <1>; + syscon = <&pmu_system_controller>; }; pd_cam: cam-power-domain@10023C00 { diff --git a/arch/arm/boot/dts/exynos4210-origen.dts b/arch/arm/boot/dts/exynos4210-origen.dts index e0abfc3324d1..e050d85cdacd 100644 --- a/arch/arm/boot/dts/exynos4210-origen.dts +++ b/arch/arm/boot/dts/exynos4210-origen.dts @@ -127,6 +127,10 @@ }; }; +&cpu0 { + cpu0-supply = <&buck1_reg>; +}; + &fimd { pinctrl-0 = <&lcd_en &lcd_clk &lcd_data24 &pwm0_out>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts index 98f3ce65cb9a..ba34886f8b65 100644 --- a/arch/arm/boot/dts/exynos4210-trats.dts +++ b/arch/arm/boot/dts/exynos4210-trats.dts @@ -188,6 +188,10 @@ }; }; +&cpu0 { + cpu0-supply = <&varm_breg>; +}; + &dsi_0 { vddcore-supply = <&vusb_reg>; vddio-supply = <&vmipi_reg>; diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts index d4f2b11319dd..775892b2cc6a 100644 --- a/arch/arm/boot/dts/exynos4210-universal_c210.dts +++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts @@ -548,6 +548,10 @@ }; }; +&cpu0 { + cpu0-supply = <&vdd_arm_reg>; +}; + &pinctrl_1 { hdmi_hpd: hdmi-hpd { samsung,pins = "gpx3-7"; diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 10d3c173396e..3e5ba665d200 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -40,6 +40,18 @@ device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0x900>; + clocks = <&clock CLK_ARM_CLK>; + clock-names = "cpu"; + clock-latency = <160000>; + + operating-points = < + 1200000 1250000 + 1000000 1150000 + 800000 1075000 + 500000 975000 + 400000 975000 + 200000 950000 + >; cooling-min-level = <4>; cooling-max-level = <2>; #cooling-cells = <2>; /* min followed by max */ diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index b6478e97d6a7..e6540b5cfa4c 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -286,8 +286,8 @@ can1: can@53fe4000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe4000 0x1000>; - clocks = <&clks 33>; - clock-names = "ipg"; + clocks = <&clks 33>, <&clks 33>; + clock-names = "ipg", "per"; interrupts = <43>; status = "disabled"; }; @@ -295,8 +295,8 @@ can2: can@53fe8000 { compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; reg = <0x53fe8000 0x1000>; - clocks = <&clks 34>; - clock-names = "ipg"; + clocks = <&clks 34>, <&clks 34>; + clock-names = "ipg", "per"; interrupts = <44>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index e6d13592080d..b57033e8c633 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -181,10 +181,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gpc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gpc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gpc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gpc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_PCIE_AXI>, <&clks IMX6QDL_CLK_LVDS1_GATE>, <&clks IMX6QDL_CLK_PCIE_REF_125M>; diff --git a/arch/arm/boot/dts/k2e-clocks.dtsi b/arch/arm/boot/dts/k2e-clocks.dtsi index 4773d6af66a0..d56d68fe7ffc 100644 --- a/arch/arm/boot/dts/k2e-clocks.dtsi +++ b/arch/arm/boot/dts/k2e-clocks.dtsi @@ -13,9 +13,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2e.dtsi b/arch/arm/boot/dts/k2e.dtsi index 1b6494fbdb91..675fb8e492c6 100644 --- a/arch/arm/boot/dts/k2e.dtsi +++ b/arch/arm/boot/dts/k2e.dtsi @@ -131,10 +131,17 @@ <GIC_SPI 376 IRQ_TYPE_EDGE_RISING>; }; }; + + mdio: mdio@24200f00 { + compatible = "ti,keystone_mdio", "ti,davinci_mdio"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x24200f00 0x100>; + status = "disabled"; + clocks = <&clkcpgmac>; + clock-names = "fck"; + bus_freq = <2500000>; + }; /include/ "k2e-netcp.dtsi" }; }; - -&mdio { - reg = <0x24200f00 0x100>; -}; diff --git a/arch/arm/boot/dts/k2hk-clocks.dtsi b/arch/arm/boot/dts/k2hk-clocks.dtsi index d5adee3c0067..af9b7190533a 100644 --- a/arch/arm/boot/dts/k2hk-clocks.dtsi +++ b/arch/arm/boot/dts/k2hk-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2hk.dtsi b/arch/arm/boot/dts/k2hk.dtsi index ae6472407b22..d0810a5f2968 100644 --- a/arch/arm/boot/dts/k2hk.dtsi +++ b/arch/arm/boot/dts/k2hk.dtsi @@ -98,6 +98,17 @@ #gpio-cells = <2>; gpio,syscon-dev = <&devctrl 0x25c>; }; + + mdio: mdio@02090300 { + compatible = "ti,keystone_mdio", "ti,davinci_mdio"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x02090300 0x100>; + status = "disabled"; + clocks = <&clkcpgmac>; + clock-names = "fck"; + bus_freq = <2500000>; + }; /include/ "k2hk-netcp.dtsi" }; }; diff --git a/arch/arm/boot/dts/k2l-clocks.dtsi b/arch/arm/boot/dts/k2l-clocks.dtsi index eb1e3e29f073..ef8464bb11ff 100644 --- a/arch/arm/boot/dts/k2l-clocks.dtsi +++ b/arch/arm/boot/dts/k2l-clocks.dtsi @@ -22,9 +22,8 @@ clocks { #clock-cells = <0>; compatible = "ti,keystone,main-pll-clock"; clocks = <&refclksys>; - reg = <0x02620350 4>, <0x02310110 4>; - reg-names = "control", "multiplier"; - fixed-postdiv = <2>; + reg = <0x02620350 4>, <0x02310110 4>, <0x02310108 4>; + reg-names = "control", "multiplier", "post-divider"; }; papllclk: papllclk@2620358 { diff --git a/arch/arm/boot/dts/k2l.dtsi b/arch/arm/boot/dts/k2l.dtsi index 0e007483615e..49fd414f680c 100644 --- a/arch/arm/boot/dts/k2l.dtsi +++ b/arch/arm/boot/dts/k2l.dtsi @@ -29,7 +29,6 @@ }; soc { - /include/ "k2l-clocks.dtsi" uart2: serial@02348400 { @@ -79,6 +78,17 @@ #gpio-cells = <2>; gpio,syscon-dev = <&devctrl 0x24c>; }; + + mdio: mdio@26200f00 { + compatible = "ti,keystone_mdio", "ti,davinci_mdio"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x26200f00 0x100>; + status = "disabled"; + clocks = <&clkcpgmac>; + clock-names = "fck"; + bus_freq = <2500000>; + }; /include/ "k2l-netcp.dtsi" }; }; @@ -96,7 +106,3 @@ /* Pin muxed. Enabled and configured by Bootloader */ status = "disabled"; }; - -&mdio { - reg = <0x26200f00 0x100>; -}; diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi index e7a6f6deabb6..72816d65f7ec 100644 --- a/arch/arm/boot/dts/keystone.dtsi +++ b/arch/arm/boot/dts/keystone.dtsi @@ -267,17 +267,6 @@ 1 0 0x21000A00 0x00000100>; }; - mdio: mdio@02090300 { - compatible = "ti,keystone_mdio", "ti,davinci_mdio"; - #address-cells = <1>; - #size-cells = <0>; - reg = <0x02090300 0x100>; - status = "disabled"; - clocks = <&clkpa>; - clock-names = "fck"; - bus_freq = <2500000>; - }; - kirq0: keystone_irq@26202a0 { compatible = "ti,keystone-irq"; interrupts = <GIC_SPI 4 IRQ_TYPE_EDGE_RISING>; diff --git a/arch/arm/boot/dts/omap2430.dtsi b/arch/arm/boot/dts/omap2430.dtsi index 11a7963be003..2390f387c271 100644 --- a/arch/arm/boot/dts/omap2430.dtsi +++ b/arch/arm/boot/dts/omap2430.dtsi @@ -51,7 +51,8 @@ }; scm_conf: scm_conf@270 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x270 0x240>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 7d31c6ff246f..abc4473e6f8a 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -191,7 +191,8 @@ }; omap4_padconf_global: omap4_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0x170>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index c8fd648a7108..b1a1263e6001 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -180,7 +180,8 @@ }; omap5_padconf_global: omap5_padconf_global@5a0 { - compatible = "syscon"; + compatible = "syscon", + "simple-bus"; reg = <0x5a0 0xec>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts index bd35b0674ff6..9bc72a3356e4 100644 --- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts +++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-honami.dts @@ -17,3 +17,13 @@ status = "ok"; }; }; + +&spmi_bus { + pm8941@0 { + coincell@2800 { + status = "ok"; + qcom,rset-ohms = <2100>; + qcom,vset-millivolts = <3000>; + }; + }; +}; diff --git a/arch/arm/boot/dts/qcom-pm8941.dtsi b/arch/arm/boot/dts/qcom-pm8941.dtsi index aa774e685018..968f1043d4f5 100644 --- a/arch/arm/boot/dts/qcom-pm8941.dtsi +++ b/arch/arm/boot/dts/qcom-pm8941.dtsi @@ -125,6 +125,12 @@ interrupts = <0x0 0x36 0x0 IRQ_TYPE_EDGE_RISING>; qcom,external-resistor-micro-ohms = <10000>; }; + + coincell@2800 { + compatible = "qcom,pm8941-coincell"; + reg = <0x2800>; + status = "disabled"; + }; }; usid1: pm8941@1 { diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index a75f3289e653..b8f81fb418ce 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -15,6 +15,33 @@ #include "skeleton.dtsi" / { + cpus { + #address-cells = <1>; + #size-cells = <0>; + enable-method = "ste,dbx500-smp"; + + cpu-map { + cluster0 { + core0 { + cpu = <&CPU0>; + }; + core1 { + cpu = <&CPU1>; + }; + }; + }; + CPU0: cpu@300 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0x300>; + }; + CPU1: cpu@301 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0x301>; + }; + }; + soc { #address-cells = <1>; #size-cells = <1>; @@ -22,32 +49,6 @@ interrupt-parent = <&intc>; ranges; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu-map { - cluster0 { - core0 { - cpu = <&CPU0>; - }; - core1 { - cpu = <&CPU1>; - }; - }; - }; - CPU0: cpu@0 { - device_type = "cpu"; - compatible = "arm,cortex-a9"; - reg = <0>; - }; - CPU1: cpu@1 { - device_type = "cpu"; - compatible = "arm,cortex-a9"; - reg = <1>; - }; - }; - ptm@801ae000 { compatible = "arm,coresight-etm3x", "arm,primecell"; reg = <0x801ae000 0x1000>; diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts index 3d0b8755caee..3d25dba143a5 100644 --- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts +++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts @@ -17,6 +17,7 @@ }; aliases { + serial1 = &uart1; stmpe-i2c0 = &stmpe0; stmpe-i2c1 = &stmpe1; }; diff --git a/arch/arm/boot/dts/ste-nomadik-s8815.dts b/arch/arm/boot/dts/ste-nomadik-s8815.dts index 85d3b95dfdba..3c140d05f796 100644 --- a/arch/arm/boot/dts/ste-nomadik-s8815.dts +++ b/arch/arm/boot/dts/ste-nomadik-s8815.dts @@ -15,6 +15,10 @@ bootargs = "root=/dev/ram0 console=ttyAMA1,115200n8 earlyprintk"; }; + aliases { + serial1 = &uart1; + }; + src@101e0000 { /* These chrystal drivers are not used on this board */ disable-sxtalo; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index 9a5f2ba139b7..ef794a33b4dc 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -757,6 +757,7 @@ clock-names = "uartclk", "apb_pclk"; pinctrl-names = "default"; pinctrl-0 = <&uart0_default_mux>; + status = "disabled"; }; uart1: uart@101fb000 { diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 92828a1dec80..b48dd4f37f80 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -61,6 +61,7 @@ work_pending: movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) ldmia sp, {r0 - r6} @ have to reload r0 - r6 b local_restart @ ... and off we go +ENDPROC(ret_fast_syscall) /* * "slow" syscall return path. "why" tells us if this was a real syscall. diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index bd755d97e459..29e2991465cb 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -399,6 +399,9 @@ ENTRY(secondary_startup) sub lr, r4, r5 @ mmu has been enabled add r3, r7, lr ldrd r4, [r3, #0] @ get secondary_data.pgdir +ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: +ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps +ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir badr lr, __enable_mmu @ return address mov r13, r12 @ __secondary_switched address diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index efe17dd9b921..54a5aeab988d 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -296,7 +296,6 @@ static bool tk_is_cntvct(const struct timekeeper *tk) */ void update_vsyscall(struct timekeeper *tk) { - struct timespec xtime_coarse; struct timespec64 *wtm = &tk->wall_to_monotonic; if (!cntvct_ok) { @@ -308,10 +307,10 @@ void update_vsyscall(struct timekeeper *tk) vdso_write_begin(vdso_data); - xtime_coarse = __current_kernel_time(); vdso_data->tk_is_cntvct = tk_is_cntvct(tk); - vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; - vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = (u32)(tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift); vdso_data->wtm_clock_sec = wtm->tv_sec; vdso_data->wtm_clock_nsec = wtm->tv_nsec; diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 3e58d710013c..4b39af2dfda9 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -96,7 +96,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) } /* the mmap semaphore is taken only if not in an atomic context */ - atomic = in_atomic(); + atomic = faulthandler_disabled(); if (!atomic) down_read(¤t->mm->mmap_sem); diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index 6001f1c9d136..4a87e86dec45 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -146,9 +146,8 @@ static __init int exynos4_pm_init_power_domain(void) pd->base = of_iomap(np, 0); if (!pd->base) { pr_warn("%s: failed to map memory\n", __func__); - kfree(pd->pd.name); + kfree_const(pd->pd.name); kfree(pd); - of_node_put(np); continue; } diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 8e52621b5a6b..e1d2e991d17a 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -392,6 +392,7 @@ static struct irq_chip wakeupgen_chip = { .irq_mask = wakeupgen_mask, .irq_unmask = wakeupgen_unmask, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index d78c12e7cb5e..486cc4ded190 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2373,6 +2373,9 @@ static int of_dev_hwmod_lookup(struct device_node *np, * registers. This address is needed early so the OCP registers that * are part of the device's address space can be ioremapped properly. * + * If SYSC access is not needed, the registers will not be remapped + * and non-availability of MPU access is not treated as an error. + * * Returns 0 on success, -EINVAL if an invalid hwmod is passed, and * -ENXIO on absent or invalid register target address space. */ @@ -2387,6 +2390,11 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, _save_mpu_port_index(oh); + /* if we don't need sysc access we don't need to ioremap */ + if (!oh->class->sysc) + return 0; + + /* we can't continue without MPU PORT if we need sysc access */ if (oh->_int_flags & _HWMOD_NO_MPU_PORT) return -ENXIO; @@ -2396,8 +2404,10 @@ static int __init _init_mpu_rt_base(struct omap_hwmod *oh, void *data, oh->name); /* Extract the IO space from device tree blob */ - if (!np) + if (!np) { + pr_err("omap_hwmod: %s: no dt node\n", oh->name); return -ENXIO; + } va_start = of_iomap(np, index + oh->mpu_rt_idx); } else { @@ -2456,13 +2466,11 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->name, np->name); } - if (oh->class->sysc) { - r = _init_mpu_rt_base(oh, NULL, index, np); - if (r < 0) { - WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", - oh->name); - return 0; - } + r = _init_mpu_rt_base(oh, NULL, index, np); + if (r < 0) { + WARN(1, "omap_hwmod: %s: doesn't have mpu register target base\n", + oh->name); + return 0; } r = _init_clocks(oh, NULL); diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index 2606c6608bd8..562247bced49 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -827,8 +827,7 @@ static struct omap_hwmod_class_sysconfig dra7xx_gpmc_sysc = { .syss_offs = 0x0014, .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS), - .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | - SIDLE_SMART_WKUP), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), .sysc_fields = &omap_hwmod_sysc_type1, }; @@ -844,7 +843,7 @@ static struct omap_hwmod dra7xx_gpmc_hwmod = { .class = &dra7xx_gpmc_hwmod_class, .clkdm_name = "l3main1_clkdm", /* Skip reset for CONFIG_OMAP_GPMC_DEBUG for bootloader timings */ - .flags = HWMOD_SWSUP_SIDLE | DEBUG_OMAP_GPMC_HWMOD_FLAGS, + .flags = DEBUG_OMAP_GPMC_HWMOD_FLAGS, .main_clk = "l3_iclk_div", .prcm = { .omap4 = { diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 9d259d94e429..1160434eece0 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,7 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 VDSO_LDFLAGS += -nostdlib -shared VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) -VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd) +VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 0689c3fb56e3..58093edeea2e 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -823,7 +823,7 @@ device_type = "dma"; reg = <0x0 0x1f270000 0x0 0x10000>, <0x0 0x1f200000 0x0 0x10000>, - <0x0 0x1b008000 0x0 0x2000>, + <0x0 0x1b000000 0x0 0x400000>, <0x0 0x1054a000 0x0 0x100>; interrupts = <0x0 0x82 0x4>, <0x0 0xb8 0x4>, diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 9d4aa18f2a82..e8ca6eaedd02 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -122,12 +122,12 @@ static int __init uefi_init(void) /* Show what we know for posterity */ c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor)); + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = c16[i]; vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor)); + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); } pr_info("EFI v%u.%.02u by %s\n", diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 1670f15ef69e..948f0ad2de23 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitely for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif break; @@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE)) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index ec37ab3f524f..97bc68f4c689 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -199,16 +199,15 @@ up_fail: */ void update_vsyscall(struct timekeeper *tk) { - struct timespec xtime_coarse; u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); ++vdso_data->tb_seq_count; smp_wmb(); - xtime_coarse = __current_kernel_time(); vdso_data->use_syscall = use_syscall; - vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; - vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift; vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index f02530e726f6..85c57158dcd9 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -168,8 +168,8 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, false, addr); - - inject_abt64(vcpu, false, addr); + else + inject_abt64(vcpu, false, addr); } /** @@ -184,8 +184,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_abt32(vcpu, true, addr); - - inject_abt64(vcpu, true, addr); + else + inject_abt64(vcpu, true, addr); } /** @@ -198,6 +198,6 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.hcr_el2 & HCR_RW)) inject_undef32(vcpu); - - inject_undef64(vcpu); + else + inject_undef64(vcpu); } diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c index 23b1a97fae7a..52c179bec0cc 100644 --- a/arch/avr32/mach-at32ap/clock.c +++ b/arch/avr32/mach-at32ap/clock.c @@ -80,6 +80,9 @@ int clk_enable(struct clk *clk) { unsigned long flags; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); __clk_enable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -106,6 +109,9 @@ void clk_disable(struct clk *clk) { unsigned long flags; + if (IS_ERR_OR_NULL(clk)) + return; + spin_lock_irqsave(&clk_lock, flags); __clk_disable(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -117,6 +123,9 @@ unsigned long clk_get_rate(struct clk *clk) unsigned long flags; unsigned long rate; + if (!clk) + return 0; + spin_lock_irqsave(&clk_lock, flags); rate = clk->get_rate(clk); spin_unlock_irqrestore(&clk_lock, flags); @@ -129,6 +138,9 @@ long clk_round_rate(struct clk *clk, unsigned long rate) { unsigned long flags, actual_rate; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -145,6 +157,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) unsigned long flags; long ret; + if (!clk) + return 0; + if (!clk->set_rate) return -ENOSYS; @@ -161,6 +176,9 @@ int clk_set_parent(struct clk *clk, struct clk *parent) unsigned long flags; int ret; + if (!clk) + return 0; + if (!clk->set_parent) return -ENOSYS; @@ -174,7 +192,7 @@ EXPORT_SYMBOL(clk_set_parent); struct clk *clk_get_parent(struct clk *clk) { - return clk->parent; + return !clk ? NULL : clk->parent; } EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee5f93e5712..199a8357838c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -151,7 +151,6 @@ config BMIPS_GENERIC select BCM7120_L2_IRQ select BRCMSTB_L2_IRQ select IRQ_MIPS_CPU - select RAW_IRQ_ACCESSORS select DMA_NONCOHERENT select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 01a644f174dd..1ba21204ebe0 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -190,6 +190,7 @@ int get_c0_perfcount_int(void) { return ATH79_MISC_IRQ(5); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 56f5d080ef9d..b7fa9ae28c36 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -42,7 +42,7 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id) cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action); if (action & SMP_CALL_FUNCTION) - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); if (action & SMP_RESCHEDULE_YOURSELF) scheduler_ipi(); diff --git a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h b/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h deleted file mode 100644 index 11d3b572b1b3..000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/dma-coherence.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_MACH_BCM63XX_DMA_COHERENCE_H -#define __ASM_MACH_BCM63XX_DMA_COHERENCE_H - -#include <asm/bmips.h> - -#define plat_post_dma_flush bmips_post_dma_flush - -#include <asm/mach-generic/dma-coherence.h> - -#endif /* __ASM_MACH_BCM63XX_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 9d8106758142..ae8569475264 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -182,8 +182,39 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ +#ifdef CONFIG_64BIT +#define LL_INSN "lld" +#define SC_INSN "scd" +#else /* CONFIG_32BIT */ +#define LL_INSN "ll" +#define SC_INSN "sc" +#endif + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + " .set push\n" + " .set noreorder\n" + "1: " LL_INSN " %[tmp], %[buddy]\n" + " bnez %[tmp], 2f\n" + " or %[tmp], %[tmp], %[global]\n" + " " SC_INSN " %[tmp], %[buddy]\n" + " beqz %[tmp], 1b\n" + " nop\n" + "2:\n" + " .set pop" + : [buddy] "+m" (buddy->pte), + [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } #endif } diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 16f1ea9ab191..03722d4326a1 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -83,8 +83,6 @@ static inline void __cpu_die(unsigned int cpu) extern void play_dead(void); #endif -extern asmlinkage void smp_call_function_interrupt(void); - static inline void arch_send_call_function_single_ipi(int cpu) { extern struct plat_smp_ops *mp_ops; /* private */ diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 28d6d9364bd1..a71da576883c 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -152,6 +152,31 @@ .set noreorder bltz k0, 8f move k1, sp +#ifdef CONFIG_EVA + /* + * Flush interAptiv's Return Prediction Stack (RPS) by writing + * EntryHi. Toggling Config7.RPS is slower and less portable. + * + * The RPS isn't automatically flushed when exceptions are + * taken, which can result in kernel mode speculative accesses + * to user addresses if the RPS mispredicts. That's harmless + * when user and kernel share the same address space, but with + * EVA the same user segments may be unmapped to kernel mode, + * even containing sensitive MMIO regions or invalid memory. + * + * This can happen when the kernel sets the return address to + * ret_from_* and jr's to the exception handler, which looks + * more like a tail call than a function call. If nested calls + * don't evict the last user address in the RPS, it will + * mispredict the return and fetch from a user controlled + * address into the icache. + * + * More recent EVA-capable cores with MAAR to restrict + * speculative accesses aren't affected. + */ + MFC0 k0, CP0_ENTRYHI + MTC0 k0, CP0_ENTRYHI +#endif .set reorder /* Called from user mode, new stack. */ get_saved_sp diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index af42e7003f12..baa7b6fc0a60 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -407,7 +407,7 @@ NESTED(nmi_handler, PT_SIZE, sp) .set noat SAVE_ALL FEXPORT(handle_\exception\ext) - __BUILD_clear_\clear + __build_clear_\clear .set at __BUILD_\verbose \exception move a0, sp diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 3e4491aa6d6b..789d7bf4fef3 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -154,7 +154,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { unsigned int real_len; - cpumask_t mask; + cpumask_t allowed, mask; int retval; struct task_struct *p; @@ -173,7 +173,8 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_and(&mask, &p->thread.user_cpus_allowed, cpu_possible_mask); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: read_unlock(&tasklist_lock); diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index b130033838ba..5fcec3032f38 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -38,7 +38,7 @@ char *mips_get_machine_name(void) return mips_machine_name; } -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF void __init early_init_dt_add_memory_arch(u64 base, u64 size) { return add_memory_region(base, size, BOOT_MEM_RAM); diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index 74bab9ddd0e1..c6bbf2165051 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -24,7 +24,7 @@ LEAF(relocate_new_kernel) process_entry: PTR_L s2, (s0) - PTR_ADD s0, s0, SZREG + PTR_ADDIU s0, s0, SZREG /* * In case of a kdump/crash kernel, the indirection page is not @@ -61,9 +61,9 @@ copy_word: /* copy page word by word */ REG_L s5, (s2) REG_S s5, (s4) - PTR_ADD s4, s4, SZREG - PTR_ADD s2, s2, SZREG - LONG_SUB s6, s6, 1 + PTR_ADDIU s4, s4, SZREG + PTR_ADDIU s2, s2, SZREG + LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word b process_entry diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index ad4d44635c76..a6f6b762c47a 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -80,7 +80,7 @@ syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_64_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 446cc654da56..4b2010654c46 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -72,7 +72,7 @@ n32_syscall_trace_entry: SAVE_STATIC move s0, t2 move a0, sp - daddiu a1, v0, __NR_N32_Linux + move a1, v0 jal syscall_trace_enter bltz v0, 2f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 19a7705f2a01..5d7f2634996f 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 336708ae5c5b..78cf8c2f1de0 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -284,7 +284,7 @@ static irqreturn_t bmips5000_ipi_interrupt(int irq, void *dev_id) if (action == 0) scheduler_ipi(); else - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); return IRQ_HANDLED; } @@ -336,7 +336,7 @@ static irqreturn_t bmips43xx_ipi_interrupt(int irq, void *dev_id) if (action & SMP_RESCHEDULE_YOURSELF) scheduler_ipi(); if (action & SMP_CALL_FUNCTION) - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); return IRQ_HANDLED; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index d0744cc77ea7..a31896c33716 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -192,16 +192,6 @@ asmlinkage void start_secondary(void) cpu_startup_entry(CPUHP_ONLINE); } -/* - * Call into both interrupt handlers, as we share the IPI for them - */ -void __irq_entry smp_call_function_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_interrupt(); - irq_exit(); -} - static void stop_this_cpu(void *dummy) { /* diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e207a43b5f8f..8ea28e6ab37d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -192,6 +192,7 @@ static void show_stacktrace(struct task_struct *task, void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; + mm_segment_t old_fs = get_fs(); if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; @@ -210,7 +211,13 @@ void show_stack(struct task_struct *task, unsigned long *sp) prepare_frametrace(®s); } } + /* + * show_stack() deals exclusively with kernel mode, so be sure to access + * the stack in the kernel (not user) address space. + */ + set_fs(KERNEL_DS); show_stacktrace(task, ®s); + set_fs(old_fs); } static void show_code(unsigned int __user *pc) @@ -1519,6 +1526,7 @@ asmlinkage void do_mcheck(struct pt_regs *regs) const int field = 2 * sizeof(unsigned long); int multi_match = regs->cp0_status & ST0_TS; enum ctx_state prev_state; + mm_segment_t old_fs = get_fs(); prev_state = exception_enter(); show_regs(regs); @@ -1540,8 +1548,13 @@ asmlinkage void do_mcheck(struct pt_regs *regs) dump_tlb_all(); } + if (!user_mode(regs)) + set_fs(KERNEL_DS); + show_code((unsigned int __user *) regs->cp0_epc); + set_fs(old_fs); + /* * Some chips may have other causes of machine check (e.g. SB1 * graduation timer) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index af84bef0c90d..eb3efd137fd1 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -438,7 +438,7 @@ do { \ : "memory"); \ } while(0) -#define StoreDW(addr, value, res) \ +#define _StoreDW(addr, value, res) \ do { \ __asm__ __volatile__ ( \ ".set\tpush\n\t" \ diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 6ab10573490d..2c218c3bbca5 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -293,7 +293,7 @@ static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) { - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); return IRQ_HANDLED; } @@ -466,6 +466,7 @@ int get_c0_perfcount_int(void) { return ltq_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index 509877c6e9d9..1a4738a8f2d3 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -266,8 +266,11 @@ void loongson3_ipi_interrupt(struct pt_regs *regs) if (action & SMP_RESCHEDULE_YOURSELF) scheduler_ipi(); - if (action & SMP_CALL_FUNCTION) - smp_call_function_interrupt(); + if (action & SMP_CALL_FUNCTION) { + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); + } if (action & SMP_ASK_C0COUNT) { BUG_ON(cpu != 0); diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 77d96db8253c..aab218c36e0d 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -160,18 +160,18 @@ static inline void setup_protection_map(void) protection_map[1] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); protection_map[2] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); protection_map[3] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); + protection_map[4] = __pgprot(_page_cachable_default | _PAGE_PRESENT); protection_map[5] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[6] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); + protection_map[6] = __pgprot(_page_cachable_default | _PAGE_PRESENT); protection_map[7] = __pgprot(_page_cachable_default | _PAGE_PRESENT); protection_map[8] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); protection_map[9] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); + protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT); protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE | _PAGE_NO_READ); + protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE); protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE); } else { diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 36c0f26fac6b..852a41c6da45 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -133,7 +133,8 @@ good_area: #endif goto bad_area; } - if (!(vma->vm_flags & VM_READ)) { + if (!(vma->vm_flags & VM_READ) && + exception_epc(regs) != address) { #if 0 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", raw_smp_processor_id(), diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index d1392f8f5811..fa8f591f3713 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -222,7 +222,7 @@ static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) { - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); return IRQ_HANDLED; } diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 5625b190edc0..b7bf721eabf5 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -154,6 +154,7 @@ int get_c0_perfcount_int(void) return mips_cpu_perf_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { @@ -171,14 +172,17 @@ unsigned int get_c0_compare_int(void) static void __init init_rtc(void) { - /* stop the clock whilst setting it up */ - CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL); + unsigned char freq, ctrl; - /* 32KHz time base */ - CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); + /* Set 32KHz time base if not already set */ + freq = CMOS_READ(RTC_FREQ_SELECT); + if ((freq & RTC_DIV_CTL) != RTC_REF_CLCK_32KHZ) + CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT); - /* start the clock */ - CMOS_WRITE(RTC_24H, RTC_CONTROL); + /* Ensure SET bit is clear so RTC can run */ + ctrl = CMOS_READ(RTC_CONTROL); + if (ctrl & RTC_SET) + CMOS_WRITE(ctrl & ~RTC_SET, RTC_CONTROL); } void __init plat_time_init(void) diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c index e1d69895fb1d..a120b7a5a8fe 100644 --- a/arch/mips/mti-sead3/sead3-time.c +++ b/arch/mips/mti-sead3/sead3-time.c @@ -77,6 +77,7 @@ int get_c0_perfcount_int(void) return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq; return -1; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index dc3e327fbbac..f5fff228b347 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -86,7 +86,7 @@ void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc) { clear_c0_eimr(irq); ack_c0_eirr(irq); - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); set_c0_eimr(irq); } diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c index 42181c7105df..f8d3e081b2eb 100644 --- a/arch/mips/paravirt/paravirt-smp.c +++ b/arch/mips/paravirt/paravirt-smp.c @@ -114,7 +114,7 @@ static irqreturn_t paravirt_reched_interrupt(int irq, void *dev_id) static irqreturn_t paravirt_function_interrupt(int irq, void *dev_id) { - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); return IRQ_HANDLED; } diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 7c73fcb92a10..8a377346f0ca 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -26,6 +26,7 @@ int get_c0_perfcount_int(void) { return gic_get_c0_perfcount_int(); } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); int get_c0_fdc_int(void) { diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c index 10170580a2de..ffa0f7101a97 100644 --- a/arch/mips/pmcs-msp71xx/msp_smp.c +++ b/arch/mips/pmcs-msp71xx/msp_smp.c @@ -44,7 +44,7 @@ static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) { - smp_call_function_interrupt(); + generic_smp_call_function_interrupt(); return IRQ_HANDLED; } diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 53707aacc0f8..8c624a8b9ea2 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -89,6 +89,7 @@ int get_c0_perfcount_int(void) { return rt_perfcount_irq; } +EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 3fbaef97a1b8..16ec4e12daa3 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -107,10 +107,14 @@ static void ip27_do_irq_mask0(void) scheduler_ipi(); } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ); - smp_call_function_interrupt(); + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); } else if (pend0 & (1UL << CPU_CALL_B_IRQ)) { LOCAL_HUB_CLR_INTR(CPU_CALL_B_IRQ); - smp_call_function_interrupt(); + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); } else #endif { diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index af7d44edd9a8..4c71aea25663 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -29,8 +29,6 @@ #include <asm/sibyte/bcm1480_regs.h> #include <asm/sibyte/bcm1480_int.h> -extern void smp_call_function_interrupt(void); - /* * These are routines for dealing with the bcm1480 smp capabilities * independent of board/firmware @@ -184,6 +182,9 @@ void bcm1480_mailbox_interrupt(void) if (action & SMP_RESCHEDULE_YOURSELF) scheduler_ipi(); - if (action & SMP_CALL_FUNCTION) - smp_call_function_interrupt(); + if (action & SMP_CALL_FUNCTION) { + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); + } } diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index c0c4b3f88a08..1cf66f5ff23d 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -172,6 +172,9 @@ void sb1250_mailbox_interrupt(void) if (action & SMP_RESCHEDULE_YOURSELF) scheduler_ipi(); - if (action & SMP_CALL_FUNCTION) - smp_call_function_interrupt(); + if (action & SMP_CALL_FUNCTION) { + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); + } } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 42e02a2d570b..efc3fa54c90b 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -191,6 +191,9 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pci_device_add(dev, bus); + /* Setup MSI caps & disable MSI/MSI-X interrupts */ + pci_msi_setup_pci_dev(dev); + return dev; } EXPORT_SYMBOL(of_create_pci_dev); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f92..da50e0c9c57e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5cf5e6ea213b..7cf0df859d05 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1478,7 +1478,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) } /* Unmask the event */ - if (eeh_enabled()) + if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) enable_irq(eeh_event_irq); return ret; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5738d315248b..85cbc96eff6c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2220,7 +2220,7 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, unsigned levels, unsigned long limit, - unsigned long *current_offset) + unsigned long *current_offset, unsigned long *total_allocated) { struct page *tce_mem = NULL; __be64 *addr, *tmp; @@ -2236,6 +2236,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, } addr = page_address(tce_mem); memset(addr, 0, allocated); + *total_allocated += allocated; --levels; if (!levels) { @@ -2245,7 +2246,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, for (i = 0; i < entries; ++i) { tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, - levels, limit, current_offset); + levels, limit, current_offset, total_allocated); if (!tmp) break; @@ -2267,7 +2268,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, struct iommu_table *tbl) { void *addr; - unsigned long offset = 0, level_shift; + unsigned long offset = 0, level_shift, total_allocated = 0; const unsigned window_shift = ilog2(window_size); unsigned entries_shift = window_shift - page_shift; unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); @@ -2286,7 +2287,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - levels, tce_table_size, &offset); + levels, tce_table_size, &offset, &total_allocated); /* addr==NULL means that the first level allocation failed */ if (!addr) @@ -2308,7 +2309,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, page_shift); tbl->it_level_size = 1ULL << (level_shift - 3); tbl->it_indirect_levels = levels - 1; - tbl->it_allocated_size = offset; + tbl->it_allocated_size = total_allocated; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h index 629b79a93165..f7e5c36688c3 100644 --- a/arch/s390/include/asm/etr.h +++ b/arch/s390/include/asm/etr.h @@ -214,6 +214,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func) void etr_switch_to_local(void); void etr_sync_check(void); +/* notifier for syncs */ +extern struct atomic_notifier_head s390_epoch_delta_notifier; + /* STP interruption parameter */ struct stp_irq_parm { unsigned int _pad0 : 14; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3024acbe1f9d..df4db81254d3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -258,6 +258,9 @@ struct kvm_vcpu_stat { u32 diagnose_10; u32 diagnose_44; u32 diagnose_9c; + u32 diagnose_258; + u32 diagnose_308; + u32 diagnose_500; }; #define PGM_OPERATION 0x01 @@ -630,7 +633,6 @@ extern char sie_exit; static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} -static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index bff5e3b6d822..8ba32436effe 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -138,6 +138,8 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; + if (!test_facility(34)) + return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 9e733d965e08..627887b075a7 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc); static DEFINE_PER_CPU(struct clock_event_device, comparators); +ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier); +EXPORT_SYMBOL(s390_epoch_delta_notifier); + /* * Scheduler clock - returns current time in nanosec units. */ @@ -752,7 +755,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync) static int etr_sync_clock(void *data) { static int first; - unsigned long long clock, old_clock, delay, delta; + unsigned long long clock, old_clock, clock_delta, delay, delta; struct clock_sync_data *etr_sync; struct etr_aib *sync_port, *aib; int port; @@ -789,6 +792,9 @@ static int etr_sync_clock(void *data) delay = (unsigned long long) (aib->edf2.etv - sync_port->edf2.etv) << 32; delta = adjust_time(old_clock, clock, delay); + clock_delta = clock - old_clock; + atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, + &clock_delta); etr_sync->fixup_cc = delta; fixup_clock_comparator(delta); /* Verify that the clock is properly set. */ @@ -1526,7 +1532,7 @@ void stp_island_check(void) static int stp_sync_clock(void *data) { static int first; - unsigned long long old_clock, delta; + unsigned long long old_clock, delta, new_clock, clock_delta; struct clock_sync_data *stp_sync; int rc; @@ -1551,7 +1557,11 @@ static int stp_sync_clock(void *data) old_clock = get_tod_clock(); rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); if (rc == 0) { - delta = adjust_time(old_clock, get_tod_clock(), 0); + new_clock = get_tod_clock(); + delta = adjust_time(old_clock, new_clock, 0); + clock_delta = new_clock - old_clock; + atomic_notifier_call_chain(&s390_epoch_delta_notifier, + 0, &clock_delta); fixup_clock_comparator(delta); rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index fc7ec95848c3..5fbfb88f8477 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + vcpu->stat.diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); - vcpu->stat.diagnose_10++; /* * We checked for start >= end above, so lets check for the @@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); + VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", + vcpu->run->s.regs.gprs[rx]); + vcpu->stat.diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) switch (parm.subcode) { case 0: /* TOKEN */ + VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx " + "select mask 0x%llx compare mask 0x%llx", + parm.token_addr, parm.select_mask, parm.compare_mask); if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { /* * If the pagefault handshake is already activated, @@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) * the cancel, therefore to reduce code complexity, we assume * all outstanding tokens are already pending. */ + VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr); if (parm.token_addr || parm.select_mask || parm.compare_mask || parm.zarch) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; - VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); + vcpu->stat.diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; + vcpu->stat.diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index e97b3455d7e6..47518a324d75 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->iprcc &= ~PGM_PER; } +#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH) +#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH) +#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1) +#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff) + void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) { + int new_as; + if (debug_exit_required(vcpu)) vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; filter_guest_per_event(vcpu); + + /* + * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger + * a space-switch event. PER events enforce space-switch events + * for these instructions. So if no PER event for the guest is left, + * we might have to filter the space-switch element out, too. + */ + if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) { + vcpu->arch.sie_block->iprcc = 0; + new_as = psw_bits(vcpu->arch.sie_block->gpsw).as; + + /* + * If the AS changed from / to home, we had RP, SAC or SACF + * instruction. Check primary and home space-switch-event + * controls. (theoretically home -> home produced no event) + */ + if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) && + (pssec(vcpu) || hssec(vcpu))) + vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; + + /* + * PT, PTI, PR, PC instruction operate on primary AS only. Check + * if the primary-space-switch-event control was or got set. + */ + if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) && + (pssec(vcpu) || old_ssec(vcpu))) + vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; + } } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c98d89708e99..b277d50dcf76 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -30,7 +30,6 @@ #define IOINT_SCHID_MASK 0x0000ffff #define IOINT_SSID_MASK 0x00030000 #define IOINT_CSSID_MASK 0x03fc0000 -#define IOINT_AI_MASK 0x04000000 #define PFAULT_INIT 0x0600 #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 @@ -72,9 +71,13 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) static int ckc_irq_pending(struct kvm_vcpu *vcpu) { + preempt_disable(); if (!(vcpu->arch.sie_block->ckc < - get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) { + preempt_enable(); return 0; + } + preempt_enable(); return ckc_interrupts_enabled(vcpu); } @@ -311,8 +314,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) li->irq.ext.ext_params2 = 0; spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx", - 0, ext.ext_params2); + VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx", + ext.ext_params2); trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, 0, ext.ext_params2); @@ -368,7 +371,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) spin_unlock(&fi->lock); if (deliver) { - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx", mchk.mcic); trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, @@ -403,7 +406,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc; - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart"); vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); @@ -427,7 +430,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address); vcpu->stat.deliver_prefix_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, @@ -450,7 +452,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg"); vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, cpu_addr, 0); @@ -477,7 +479,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call"); vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, @@ -506,7 +508,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) memset(&li->irq.pgm, 0, sizeof(pgm_info)); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", pgm_info.code, ilc); vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, @@ -622,7 +624,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x", ext.ext_params); vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, @@ -651,9 +653,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) struct kvm_s390_interrupt_info, list); if (inti) { - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, - KVM_S390_INT_PFAULT_DONE, 0, - inti->ext.ext_params2); list_del(&inti->list); fi->counters[FIRQ_CNTR_PFAULT] -= 1; } @@ -662,6 +661,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) spin_unlock(&fi->lock); if (inti) { + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx", + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); rc |= put_guest_lc(vcpu, PFAULT_DONE, @@ -691,7 +696,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) list); if (inti) { VCPU_EVENT(vcpu, 4, - "interrupt: virtio parm:%x,parm64:%llx", + "deliver: virtio parm: 0x%x,parm64: 0x%llx", inti->ext.ext_params, inti->ext.ext_params2); vcpu->stat.deliver_virtio_interrupt++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, @@ -741,7 +746,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info, list); if (inti) { - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type); vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -855,7 +860,9 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) goto no_timer; } + preempt_disable(); now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; + preempt_enable(); sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); /* underflow */ @@ -864,7 +871,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); - VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); + VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); @@ -894,7 +901,9 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) u64 now, sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + preempt_disable(); now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; + preempt_enable(); sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); /* @@ -968,6 +977,10 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, + irq->u.pgm.code, 0); + li->irq.pgm = irq->u.pgm; set_bit(IRQ_PEND_PROG, &li->pending_irqs); return 0; @@ -978,9 +991,6 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_irq irq; - VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code, - 0, 1); spin_lock(&li->lock); irq.u.pgm.code = code; __inject_prog(vcpu, &irq); @@ -996,10 +1006,6 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, struct kvm_s390_irq irq; int rc; - VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", - pgm_info->code); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, - pgm_info->code, 0, 1); spin_lock(&li->lock); irq.u.pgm = *pgm_info; rc = __inject_prog(vcpu, &irq); @@ -1012,11 +1018,11 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx", - irq->u.ext.ext_params, irq->u.ext.ext_params2); + VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx", + irq->u.ext.ext_params2); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, irq->u.ext.ext_params, - irq->u.ext.ext_params2, 2); + irq->u.ext.ext_params2); li->irq.ext = irq->u.ext; set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); @@ -1045,10 +1051,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_extcall_info *extcall = &li->irq.extcall; uint16_t src_id = irq->u.extcall.code; - VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u", src_id); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, - src_id, 0, 2); + src_id, 0); /* sending vcpu invalid */ if (src_id >= KVM_MAX_VCPUS || @@ -1070,10 +1076,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_prefix_info *prefix = &li->irq.prefix; - VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x", irq->u.prefix.address); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, - irq->u.prefix.address, 0, 2); + irq->u.prefix.address, 0); if (!is_vcpu_stopped(vcpu)) return -EBUSY; @@ -1090,7 +1096,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_stop_info *stop = &li->irq.stop; int rc = 0; - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0); if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) return -EINVAL; @@ -1114,8 +1120,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu, { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2); + VCPU_EVENT(vcpu, 3, "%s", "inject: restart int"); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); set_bit(IRQ_PEND_RESTART, &li->pending_irqs); return 0; @@ -1126,10 +1132,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", + VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u", irq->u.emerg.code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, - irq->u.emerg.code, 0, 2); + irq->u.emerg.code, 0); set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); @@ -1142,10 +1148,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_mchk_info *mchk = &li->irq.mchk; - VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx", irq->u.mchk.mcic); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, - irq->u.mchk.mcic, 2); + irq->u.mchk.mcic); /* * Because repressible machine checks can be indicated along with @@ -1172,9 +1178,9 @@ static int __inject_ckc(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP); + VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external"); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, - 0, 0, 2); + 0, 0); set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1185,9 +1191,9 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER); + VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external"); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, - 0, 0, 2); + 0, 0); set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1435,20 +1441,20 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_INT_SERVICE: - VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm); inti->ext.ext_params = s390int->parm; break; case KVM_S390_INT_PFAULT_DONE: inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_MCHK: - VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", + VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx", s390int->parm64); inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (inti->type & IOINT_AI_MASK) + if (inti->type & KVM_S390_INT_IO_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); else VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", @@ -1535,8 +1541,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) switch (irq->type) { case KVM_S390_PROGRAM_INT: - VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", - irq->u.pgm.code); rc = __inject_prog(vcpu, irq); break; case KVM_S390_SIGP_SET_PREFIX: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2078f92d15ac..6861b74649ae 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,6 +28,7 @@ #include <linux/vmalloc.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> +#include <asm/etr.h> #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> @@ -108,6 +109,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { "diagnose_9c", VCPU_STAT(diagnose_9c) }, + { "diagnose_258", VCPU_STAT(diagnose_258) }, + { "diagnose_308", VCPU_STAT(diagnose_308) }, + { "diagnose_500", VCPU_STAT(diagnose_500) }, { NULL } }; @@ -124,6 +128,7 @@ unsigned long kvm_s390_fac_list_mask_size(void) } static struct gmap_notifier gmap_notifier; +debug_info_t *kvm_s390_dbf; /* Section: not file related */ int kvm_arch_hardware_enable(void) @@ -134,24 +139,69 @@ int kvm_arch_hardware_enable(void) static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); +/* + * This callback is executed during stop_machine(). All CPUs are therefore + * temporarily stopped. In order not to change guest behavior, we have to + * disable preemption whenever we touch the epoch of kvm and the VCPUs, + * so a CPU won't be stopped while calculating with the epoch. + */ +static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, + void *v) +{ + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i; + unsigned long long *delta = v; + + list_for_each_entry(kvm, &vm_list, vm_list) { + kvm->arch.epoch -= *delta; + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.sie_block->epoch -= *delta; + } + } + return NOTIFY_OK; +} + +static struct notifier_block kvm_clock_notifier = { + .notifier_call = kvm_clock_sync, +}; + int kvm_arch_hardware_setup(void) { gmap_notifier.notifier_call = kvm_gmap_notifier; gmap_register_ipte_notifier(&gmap_notifier); + atomic_notifier_chain_register(&s390_epoch_delta_notifier, + &kvm_clock_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { gmap_unregister_ipte_notifier(&gmap_notifier); + atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, + &kvm_clock_notifier); } int kvm_arch_init(void *opaque) { + kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); + if (!kvm_s390_dbf) + return -ENOMEM; + + if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { + debug_unregister(kvm_s390_dbf); + return -ENOMEM; + } + /* Register floating interrupt controller interface. */ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); } +void kvm_arch_exit(void) +{ + debug_unregister(kvm_s390_dbf); +} + /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -281,10 +331,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) switch (cap->cap) { case KVM_CAP_S390_IRQCHIP: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); kvm->arch.use_irqchip = 1; r = 0; break; case KVM_CAP_S390_USER_SIGP: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); kvm->arch.user_sigp = 1; r = 0; break; @@ -295,8 +347,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; } else r = -EINVAL; + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", + r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_USER_STSI: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; r = 0; break; @@ -314,6 +369,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att switch (attr->attr) { case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; + VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", + kvm->arch.gmap->asce_end); if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) ret = -EFAULT; break; @@ -330,7 +387,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: + /* enable CMMA only for z10 and later (EDAT_1) */ + ret = -EINVAL; + if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1) + break; + ret = -EBUSY; + VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { kvm->arch.use_cmma = 1; @@ -339,6 +402,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att mutex_unlock(&kvm->lock); break; case KVM_S390_VM_MEM_CLR_CMMA: + ret = -EINVAL; + if (!kvm->arch.use_cmma) + break; + + VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); mutex_lock(&kvm->lock); idx = srcu_read_lock(&kvm->srcu); s390_reset_cmma(kvm->arch.gmap->mm); @@ -374,6 +442,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); break; } default: @@ -400,22 +469,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) kvm->arch.crypto.crycb->aes_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); kvm->arch.crypto.aes_kw = 1; + VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); break; case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: get_random_bytes( kvm->arch.crypto.crycb->dea_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); kvm->arch.crypto.dea_kw = 1; + VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); break; case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: kvm->arch.crypto.aes_kw = 0; memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); + VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); break; case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: kvm->arch.crypto.dea_kw = 0; memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); + VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); break; default: mutex_unlock(&kvm->lock); @@ -440,6 +513,7 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) if (gtod_high != 0) return -EINVAL; + VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high); return 0; } @@ -459,12 +533,15 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) return r; mutex_lock(&kvm->lock); + preempt_disable(); kvm->arch.epoch = gtod - host_tod; kvm_s390_vcpu_block_all(kvm); kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; kvm_s390_vcpu_unblock_all(kvm); + preempt_enable(); mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod); return 0; } @@ -496,6 +573,7 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) if (copy_to_user((void __user *)attr->addr, >od_high, sizeof(gtod_high))) return -EFAULT; + VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high); return 0; } @@ -509,9 +587,12 @@ static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) if (r) return r; + preempt_disable(); gtod = host_tod + kvm->arch.epoch; + preempt_enable(); if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) return -EFAULT; + VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod); return 0; } @@ -821,7 +902,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } /* Enable storage key handling for the guest */ - s390_enable_skey(); + r = s390_enable_skey(); + if (r) + goto out; for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); @@ -879,8 +962,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->arch.use_irqchip) { /* Set up dummy routing. */ memset(&routing, 0, sizeof(routing)); - kvm_set_irq_routing(kvm, &routing, 0, 0); - r = 0; + r = kvm_set_irq_routing(kvm, &routing, 0, 0); } break; } @@ -1043,7 +1125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) sprintf(debug_name, "kvm-%u", current->pid); - kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); if (!kvm->arch.dbf) goto out_err; @@ -1086,7 +1168,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.ipte_mutex); debug_register_view(kvm->arch.dbf, &debug_sprintf_view); - VM_EVENT(kvm, 3, "%s", "vm created"); + VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; @@ -1103,6 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -1110,6 +1193,7 @@ out_err: free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); free_page((unsigned long)(kvm->arch.sca)); + KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1131,7 +1215,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); - if (kvm_s390_cmma_enabled(vcpu->kvm)) + if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); free_page((unsigned long)(vcpu->arch.sie_block)); @@ -1166,6 +1250,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -1264,7 +1349,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { mutex_lock(&vcpu->kvm->lock); + preempt_disable(); vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; + preempt_enable(); mutex_unlock(&vcpu->kvm->lock); if (!kvm_is_ucontrol(vcpu->kvm)) vcpu->arch.gmap = vcpu->kvm->arch.gmap; @@ -1342,7 +1429,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) } vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; - if (kvm_s390_cmma_enabled(vcpu->kvm)) { + if (vcpu->kvm->arch.use_cmma) { rc = kvm_s390_vcpu_setup_cmma(vcpu); if (rc) return rc; @@ -1723,18 +1810,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return rc; } -bool kvm_s390_cmma_enabled(struct kvm *kvm) -{ - if (!MACHINE_IS_LPAR) - return false; - /* only enable for z10 and later */ - if (!MACHINE_HAS_EDAT1) - return false; - if (!kvm->arch.use_cmma) - return false; - return true; -} - static bool ibs_enabled(struct kvm_vcpu *vcpu) { return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; @@ -1742,10 +1817,10 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { - if (!vcpu->requests) - return 0; retry: kvm_s390_vcpu_request_handled(vcpu); + if (!vcpu->requests) + return 0; /* * We use MMU_RELOAD just to re-arm the ipte notifier for the * guest prefix page. gmap_ipte_notify will wait on the ptl lock. @@ -2340,6 +2415,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_S390_CSS_SUPPORT: if (!vcpu->kvm->arch.css_support) { vcpu->kvm->arch.css_support = 1; + VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); trace_kvm_s390_enable_css(vcpu->kvm); } r = 0; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c5704786e473..c446aabf60d3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); #define TDB_FORMAT1 1 #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) +extern debug_info_t *kvm_s390_dbf; +#define KVM_EVENT(d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ do { \ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ @@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu) static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { + VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id, + prefix); vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); @@ -217,8 +226,6 @@ void exit_sie(struct kvm_vcpu *vcpu); void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); -/* is cmma enabled */ -bool kvm_s390_cmma_enabled(struct kvm *kvm); unsigned long kvm_s390_fac_list_mask_size(void); extern unsigned long kvm_s390_fac_list_mask[]; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ad4242245771..4d21dc4d1a84 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -53,11 +53,14 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) kvm_s390_set_psw_cc(vcpu, 3); return 0; } + VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); val = (val - hostclk) & ~0x3fUL; mutex_lock(&vcpu->kvm->lock); + preempt_disable(); kvm_for_each_vcpu(i, cpup, vcpu->kvm) cpup->arch.sie_block->epoch = val; + preempt_enable(); mutex_unlock(&vcpu->kvm->lock); kvm_s390_set_psw_cc(vcpu, 0); @@ -98,8 +101,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); - - VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); return 0; } @@ -129,7 +130,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); + VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2); trace_kvm_s390_handle_prefix(vcpu, 0, address); return 0; } @@ -155,7 +156,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga); + VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga); trace_kvm_s390_handle_stap(vcpu, ga); return 0; } @@ -167,6 +168,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu) return rc; rc = s390_enable_skey(); + VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest"); trace_kvm_s390_skey_related_inst(vcpu); vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); return rc; @@ -370,7 +372,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) &fac, sizeof(fac)); if (rc) return rc; - VCPU_EVENT(vcpu, 5, "store facility list value %x", fac); + VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac); trace_kvm_s390_handle_stfl(vcpu, fac); return 0; } @@ -468,7 +470,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); + VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data); return 0; } @@ -521,7 +523,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) ar_t ar; vcpu->stat.instruction_stsi++; - VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -758,10 +760,10 @@ static int handle_essa(struct kvm_vcpu *vcpu) struct gmap *gmap; int i; - VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); + VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries); gmap = vcpu->arch.gmap; vcpu->stat.instruction_essa++; - if (!kvm_s390_cmma_enabled(vcpu->kvm)) + if (!vcpu->kvm->arch.use_cmma) return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) @@ -829,7 +831,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; @@ -868,7 +870,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); reg = reg1; @@ -902,7 +904,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; @@ -940,7 +942,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); reg = reg1; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 72e58bd2bee7..da690b69f9fe 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; return SIGP_CC_STATUS_STORED; - } else if (rc == 0) { - VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", - dst_vcpu->vcpu_id, irq.u.prefix.address); } return rc; @@ -371,7 +368,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, return rc; } -static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) +static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code, + u16 cpu_addr) { if (!vcpu->kvm->arch.user_sigp) return 0; @@ -414,9 +412,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) default: vcpu->stat.instruction_sigp_unknown++; } - - VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space", - order_code); + VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace", + order_code, cpu_addr); return 1; } @@ -435,7 +432,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); - if (handle_sigp_order_in_user_space(vcpu, order_code)) + if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr)) return -EOPNOTSUPP; if (r1 % 2) diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 3208d33a48cb..cc1d6c68356f 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop, {KVM_S390_PROGRAM_INT, "program interrupt"}, \ {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \ {KVM_S390_RESTART, "sigp restart"}, \ + {KVM_S390_INT_PFAULT_INIT, "pfault init"}, \ + {KVM_S390_INT_PFAULT_DONE, "pfault done"}, \ + {KVM_S390_MCHK, "machine check"}, \ + {KVM_S390_INT_CLOCK_COMP, "clock comparator"}, \ + {KVM_S390_INT_CPU_TIMER, "cpu timer"}, \ {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \ {KVM_S390_INT_SERVICE, "sclp interrupt"}, \ {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \ {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"} +#define get_irq_name(__type) \ + (__type > KVM_S390_INT_IO_MAX ? \ + __print_symbolic(__type, kvm_s390_int_type) : \ + (__type & KVM_S390_INT_IO_AI_MASK ? \ + "adapter I/O interrupt" : "subchannel I/O interrupt")) + TRACE_EVENT(kvm_s390_inject_vm, TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who), TP_ARGS(type, parm, parm64, who), @@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm, TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx", (__entry->who == 1) ? " (from kernel)" : (__entry->who == 2) ? " (from user)" : "", - __entry->inttype, - __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->inttype, get_irq_name(__entry->inttype), __entry->parm, __entry->parm64) ); TRACE_EVENT(kvm_s390_inject_vcpu, - TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \ - int who), - TP_ARGS(id, type, parm, parm64, who), + TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64), + TP_ARGS(id, type, parm, parm64), TP_STRUCT__entry( __field(int, id) __field(__u32, inttype) __field(__u32, parm) __field(__u64, parm64) - __field(int, who) ), TP_fast_assign( @@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu, __entry->inttype = type & 0x00000000ffffffff; __entry->parm = parm; __entry->parm64 = parm64; - __entry->who = who; ), - TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx", - (__entry->who == 1) ? " (from kernel)" : - (__entry->who == 2) ? " (from user)" : "", + TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx", __entry->id, __entry->inttype, - __print_symbolic(__entry->inttype, kvm_s390_int_type), - __entry->parm, __entry->parm64) + get_irq_name(__entry->inttype), __entry->parm, + __entry->parm64) ); /* @@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt, TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ "data:%08llx %016llx", __entry->id, __entry->inttype, - __print_symbolic(__entry->inttype, kvm_s390_int_type), - __entry->data0, __entry->data1) + get_irq_name(__entry->inttype), __entry->data0, + __entry->data1) ); /* diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index fee782acc2ee..8d2e5165865f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -448,13 +448,13 @@ static void bpf_jit_prologue(struct bpf_jit *jit) EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, BPF_REG_1, offsetof(struct sk_buff, data)); } - /* BPF compatibility: clear A (%b7) and X (%b8) registers */ - if (REG_SEEN(BPF_REG_7)) - /* lghi %b7,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_7, 0); - if (REG_SEEN(BPF_REG_8)) - /* lghi %b8,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_8, 0); + /* BPF compatibility: clear A (%b0) and X (%b7) registers */ + if (REG_SEEN(BPF_REG_A)) + /* lghi %ba,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_A, 0); + if (REG_SEEN(BPF_REG_X)) + /* lghi %bx,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_X, 0); } /* diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 1f0aa2024e94..6424249d5f78 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -28,16 +28,10 @@ * Must preserve %o5 between VISEntryHalf and VISExitHalf */ #define VISEntryHalf \ - rd %fprs, %o5; \ - andcc %o5, FPRS_FEF, %g0; \ - be,pt %icc, 297f; \ - sethi %hi(298f), %g7; \ - sethi %hi(VISenterhalf), %g1; \ - jmpl %g1 + %lo(VISenterhalf), %g0; \ - or %g7, %lo(298f), %g7; \ - clr %o5; \ -297: wr %o5, FPRS_FEF, %fprs; \ -298: + VISEntry + +#define VISExitHalf \ + VISExit #define VISEntryHalfFast(fail_label) \ rd %fprs, %o5; \ @@ -47,7 +41,7 @@ ba,a,pt %xcc, fail_label; \ 297: wr %o5, FPRS_FEF, %fprs; -#define VISExitHalf \ +#define VISExitHalfFast \ wr %o5, 0, %fprs; #ifndef __ASSEMBLY__ diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 140527a20e7d..83aeeb1dffdb 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -240,8 +240,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) +#ifdef NON_USER_COPY + VISExitHalfFast +#else VISExitHalf - +#endif brz,pn %o2, .Lexit cmp %o2, 19 ble,pn %icc, .Lsmall_unaligned diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index b320ae9e2e2e..a063d84336d6 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -44,9 +44,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 stx %g3, [%g6 + TI_GSR] 2: add %g6, %g1, %g3 - cmp %o5, FPRS_DU - be,pn %icc, 6f - sll %g1, 3, %g1 + mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 + sll %g1, 3, %g1 stb %o5, [%g3 + TI_FPSAVED] rd %gsr, %g2 add %g6, %g1, %g3 @@ -80,65 +79,3 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop - -6: ldub [%g3 + TI_FPSAVED], %o5 - or %o5, FPRS_DU, %o5 - add %g6, TI_FPREGS+0x80, %g2 - stb %o5, [%g3 + TI_FPSAVED] - - sll %g1, 5, %g1 - add %g6, TI_FPREGS+0xc0, %g3 - wr %g0, FPRS_FEF, %fprs - membar #Sync - stda %f32, [%g2 + %g1] ASI_BLK_P - stda %f48, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 80f - nop - - .align 32 -80: jmpl %g7 + %g0, %g0 - nop - - .align 32 -VISenterhalf: - ldub [%g6 + TI_FPDEPTH], %g1 - brnz,a,pn %g1, 1f - cmp %g1, 1 - stb %g0, [%g6 + TI_FPSAVED] - stx %fsr, [%g6 + TI_XFSR] - clr %o5 - jmpl %g7 + %g0, %g0 - wr %g0, FPRS_FEF, %fprs - -1: bne,pn %icc, 2f - srl %g1, 1, %g1 - ba,pt %xcc, vis1 - sub %g7, 8, %g7 -2: addcc %g6, %g1, %g3 - sll %g1, 3, %g1 - andn %o5, FPRS_DU, %g2 - stb %g2, [%g3 + TI_FPSAVED] - - rd %gsr, %g2 - add %g6, %g1, %g3 - stx %g2, [%g3 + TI_GSR] - add %g6, %g1, %g2 - stx %fsr, [%g2 + TI_XFSR] - sll %g1, 5, %g1 -3: andcc %o5, FPRS_DL, %g0 - be,pn %icc, 4f - add %g6, TI_FPREGS, %g2 - - add %g6, TI_FPREGS+0x40, %g3 - membar #Sync - stda %f0, [%g2 + %g1] ASI_BLK_P - stda %f16, [%g3 + %g1] ASI_BLK_P - membar #Sync - ba,pt %xcc, 4f - nop - - .align 32 -4: and %o5, FPRS_DU, %o5 - jmpl %g7 + %g0, %g0 - wr %o5, FPRS_FEF, %fprs diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 1d649a95660c..8069ce12f20b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -135,10 +135,6 @@ EXPORT_SYMBOL(copy_user_page); void VISenter(void); EXPORT_SYMBOL(VISenter); -/* CRYPTO code needs this */ -void VISenterhalf(void); -EXPORT_SYMBOL(VISenterhalf); - extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, unsigned long *); diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index e8c2c04143cd..c667e104a0c2 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) return -EFAULT; - memset(to, 0, sizeof(*to)); - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c82bd150d43..7d69afd8b6fa 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1193,6 +1193,10 @@ static efi_status_t setup_e820(struct boot_params *params, unsigned int e820_type = 0; unsigned long m = efi->efi_memmap; +#ifdef CONFIG_X86_64 + m |= (u64)efi->efi_memmap_hi << 32; +#endif + d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); switch (d->type) { case EFI_RESERVED_TYPE: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 5a1844765a7a..a7e257d9cb90 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -140,6 +140,7 @@ sysexit_from_sys_call: */ andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) movl RIP(%rsp), %ecx /* User %eip */ + movq RAX(%rsp), %rax RESTORE_RSI_RDI xorl %edx, %edx /* Do not leak kernel information */ xorq %r8, %r8 @@ -219,7 +220,6 @@ sysexit_from_sys_call: 1: setbe %al /* 1 if error, 0 if not */ movzbl %al, %edi /* zero-extend that into %edi */ call __audit_syscall_exit - movq RAX(%rsp), %rax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -368,6 +368,7 @@ sysretl_from_sys_call: RESTORE_RSI_RDI_RDX movl RIP(%rsp), %ecx movl EFLAGS(%rsp), %r11d + movq RAX(%rsp), %rax xorq %r10, %r10 xorq %r9, %r9 xorq %r8, %r8 diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd2647..4e10d73cf018 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49ec9038ec14..c12e845f59e6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -252,6 +252,11 @@ struct kvm_pio_request { int size; }; +struct rsvd_bits_validate { + u64 rsvd_bits_mask[2][4]; + u64 bad_mt_xwr; +}; + /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level * 32-bit). The kvm_mmu structure abstracts the details of the current mmu @@ -289,8 +294,15 @@ struct kvm_mmu { u64 *pae_root; u64 *lm_root; - u64 rsvd_bits_mask[2][4]; - u64 bad_mt_xwr; + + /* + * check zero bits on shadow page table entries, these + * bits include not only hardware reserved bits but also + * the bits spte never used. + */ + struct rsvd_bits_validate shadow_zero_check; + + struct rsvd_bits_validate guest_rsvd_check; /* * Bitmap: bit set = last pte in walk @@ -358,6 +370,11 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V per vcpu emulation context */ +struct kvm_vcpu_hv { + u64 hv_vapic; +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -514,8 +531,7 @@ struct kvm_vcpu_arch { /* used for guest single stepping over the given code position */ unsigned long singlestep_rip; - /* fields used by HYPER-V emulation */ - u64 hv_vapic; + struct kvm_vcpu_hv hyperv; cpumask_var_t wbinvd_dirty_mask; @@ -586,6 +602,17 @@ struct kvm_apic_map { struct kvm_lapic *logical_map[16][16]; }; +/* Hyper-V emulation context */ +struct kvm_hv { + u64 hv_guest_os_id; + u64 hv_hypercall; + u64 hv_tsc_page; + + /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ + u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; + u64 hv_crash_ctl; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -645,16 +672,14 @@ struct kvm_arch { /* reads protected by irq_srcu, writes by irq_lock */ struct hlist_head mask_notifier_list; - /* fields used by HYPER-V emulation */ - u64 hv_guest_os_id; - u64 hv_hypercall; - u64 hv_tsc_page; + struct kvm_hv hyperv; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; #endif bool boot_vcpu_runs_old_kvmclock; + u32 bsp_vcpu_id; u64 disabled_quirks; }; @@ -1203,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); int x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b4..364d27481a52 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; + struct ldt_struct *ldt; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 804a3a6030ca..984abfe47edc 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) {} #endif /* + * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + /* + * Xen requires page-aligned LDTs with special permissions. This is + * needed to prevent us from installing evil descriptors such as + * call gates. On native, we could merge the ldt_struct and LDT + * allocations, but it's not worth trying to optimize. + */ + struct desc_struct *entries; + int size; +}; + +static inline void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); + + DEBUG_LOCKS_WARN_ON(preemptible()); +} + +/* * Used for LDT copy/destruction. */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); @@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); } #ifdef CONFIG_SMP else { @@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index c163215abb9a..aaf59b7da98a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -7,6 +7,7 @@ struct ms_hyperv_info { u32 features; + u32 misc_features; u32 hints; }; @@ -20,4 +21,8 @@ void hyperv_vector_handler(struct pt_regs *regs); void hv_setup_vmbus_irq(void (*handler)(void)); void hv_remove_vmbus_irq(void); +void hv_setup_kexec_handler(void (*handler)(void)); +void hv_remove_kexec_handler(void); +void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); +void hv_remove_crash_handler(void); #endif diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 6fe6b182c998..9dfce4e0417d 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short __pad2; /* Was called gs, but was always zero. */ - unsigned short __pad1; /* Was called fs, but was always zero. */ - unsigned short ss; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 751bf4b7bf11..d7f3b3b78ac3 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -79,12 +79,12 @@ do { \ #else /* CONFIG_X86_32 */ /* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" +#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" #define __EXTRA_CLOBBER \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15", "flags" + "r12", "r13", "r14", "r15" #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ @@ -100,11 +100,7 @@ do { \ #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ -/* - * There is no need to save or restore flags, because flags are always - * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL - * has no effect. - */ +/* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index da772edd19ab..448b7ca61aee 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -47,6 +47,7 @@ #define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 #define CPU_BASED_USE_MSR_BITMAPS 0x10000000 #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 @@ -367,29 +368,29 @@ enum vmcs_field { #define TYPE_PHYSICAL_APIC_EVENT (10 << 12) #define TYPE_PHYSICAL_APIC_INST (15 << 12) -/* segment AR */ -#define SEGMENT_AR_L_MASK (1 << 13) - -#define AR_TYPE_ACCESSES_MASK 1 -#define AR_TYPE_READABLE_MASK (1 << 1) -#define AR_TYPE_WRITEABLE_MASK (1 << 2) -#define AR_TYPE_CODE_MASK (1 << 3) -#define AR_TYPE_MASK 0x0f -#define AR_TYPE_BUSY_64_TSS 11 -#define AR_TYPE_BUSY_32_TSS 11 -#define AR_TYPE_BUSY_16_TSS 3 -#define AR_TYPE_LDT 2 - -#define AR_UNUSABLE_MASK (1 << 16) -#define AR_S_MASK (1 << 4) -#define AR_P_MASK (1 << 7) -#define AR_L_MASK (1 << 13) -#define AR_DB_MASK (1 << 14) -#define AR_G_MASK (1 << 15) -#define AR_DPL_SHIFT 5 -#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) - -#define AR_RESERVD_MASK 0xfffe0f00 +/* segment AR in VMCS -- these are different from what LAR reports */ +#define VMX_SEGMENT_AR_L_MASK (1 << 13) + +#define VMX_AR_TYPE_ACCESSES_MASK 1 +#define VMX_AR_TYPE_READABLE_MASK (1 << 1) +#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2) +#define VMX_AR_TYPE_CODE_MASK (1 << 3) +#define VMX_AR_TYPE_MASK 0x0f +#define VMX_AR_TYPE_BUSY_64_TSS 11 +#define VMX_AR_TYPE_BUSY_32_TSS 11 +#define VMX_AR_TYPE_BUSY_16_TSS 3 +#define VMX_AR_TYPE_LDT 2 + +#define VMX_AR_UNUSABLE_MASK (1 << 16) +#define VMX_AR_S_MASK (1 << 4) +#define VMX_AR_P_MASK (1 << 7) +#define VMX_AR_L_MASK (1 << 13) +#define VMX_AR_DB_MASK (1 << 14) +#define VMX_AR_G_MASK (1 << 15) +#define VMX_AR_DPL_SHIFT 5 +#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3) + +#define VMX_AR_RESERVD_MASK 0xfffe0f00 #define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index f36d56bd7632..f0412c50c47b 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -27,6 +27,8 @@ #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +/* Partition reference TSC MSR is available */ +#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) /* A partition's reference time stamp counter (TSC) page */ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 0e8a973de9ee..40836a9a7250 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,24 +177,9 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - - /* - * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), - * Linux saved and restored fs and gs in these slots. This - * was counterproductive, as fsbase and gsbase were never - * saved, so arch_prctl was presumably unreliable. - * - * If these slots are ever needed for any other purpose, there - * is some risk that very old 64-bit binaries could get - * confused. I doubt that many such binaries still work, - * though, since the same patch in 2.5.64 also removed the - * 64-bit set_thread_area syscall, so it appears that there is - * no TLS API that works in both pre- and post-2.5.64 kernels. - */ - __u16 __pad2; /* Was gs. */ - __u16 __pad1; /* Was fs. */ - - __u16 ss; + __u16 gs; + __u16 fs; + __u16 __pad0; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 1fe92181ee9e..37fee272618f 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -58,6 +58,7 @@ #define EXIT_REASON_INVALID_STATE 33 #define EXIT_REASON_MSR_LOAD_FAIL 34 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_TRAP_FLAG 37 #define EXIT_REASON_MONITOR_INSTRUCTION 39 #define EXIT_REASON_PAUSE_INSTRUCTION 40 #define EXIT_REASON_MCE_DURING_VMENTRY 41 @@ -106,6 +107,7 @@ { EXIT_REASON_MSR_READ, "MSR_READ" }, \ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dcb52850a28f..cde732c1b495 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1424,7 +1424,7 @@ static inline void __x2apic_disable(void) { u64 msr; - if (cpu_has_apic) + if (!cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); @@ -1483,10 +1483,13 @@ void x2apic_setup(void) static __init void x2apic_disable(void) { - u32 x2apic_id; + u32 x2apic_id, state = x2apic_state; - if (x2apic_state != X2APIC_ON) - goto out; + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; + + if (state != X2APIC_ON) + return; x2apic_id = read_apic_id(); if (x2apic_id >= 255) @@ -1494,9 +1497,6 @@ static __init void x2apic_disable(void) __x2apic_disable(); register_lapic_address(mp_lapic_addr); -out: - x2apic_state = X2APIC_DISABLED; - x2apic_mode = 0; } static __init void x2apic_enable(void) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 845dc0df2002..206052e55517 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -943,7 +943,7 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info) */ if (irq < nr_legacy_irqs() && data->count == 1) { if (info->ioapic_trigger != data->trigger) - mp_register_handler(irq, data->trigger); + mp_register_handler(irq, info->ioapic_trigger); data->entry.trigger = data->trigger = info->ioapic_trigger; data->entry.polarity = data->polarity = info->ioapic_polarity; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f813261d9740..2683f36e4e0a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -322,7 +322,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irq_data->chip = &lapic_controller; irq_data->chip_data = data; irq_data->hwirq = virq + i; - err = assign_irq_vector_policy(virq, irq_data->node, data, + err = assign_irq_vector_policy(virq + i, irq_data->node, data, info); if (err) goto error; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 922c5e0cea4c..cb9e5df42dd2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1410,7 +1410,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); clear_all_debug_regs(); dbg_restore_debug_regs(); @@ -1459,7 +1459,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index aad4bd84b475..f794bfa3c138 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -18,6 +18,7 @@ #include <linux/efi.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/kexec.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv.h> @@ -28,10 +29,14 @@ #include <asm/i8259.h> #include <asm/apic.h> #include <asm/timer.h> +#include <asm/reboot.h> struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); +static void (*hv_kexec_handler)(void); +static void (*hv_crash_handler)(struct pt_regs *regs); + #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); @@ -67,8 +72,47 @@ void hv_remove_vmbus_irq(void) } EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); + +void hv_setup_kexec_handler(void (*handler)(void)) +{ + hv_kexec_handler = handler; +} +EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); + +void hv_remove_kexec_handler(void) +{ + hv_kexec_handler = NULL; +} +EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); + +void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) +{ + hv_crash_handler = handler; +} +EXPORT_SYMBOL_GPL(hv_setup_crash_handler); + +void hv_remove_crash_handler(void) +{ + hv_crash_handler = NULL; +} +EXPORT_SYMBOL_GPL(hv_remove_crash_handler); #endif +static void hv_machine_shutdown(void) +{ + if (kexec_in_progress && hv_kexec_handler) + hv_kexec_handler(); + native_machine_shutdown(); +} + +static void hv_machine_crash_shutdown(struct pt_regs *regs) +{ + if (hv_crash_handler) + hv_crash_handler(regs); + native_machine_crash_shutdown(regs); +} + + static uint32_t __init ms_hyperv_platform(void) { u32 eax; @@ -114,6 +158,7 @@ static void __init ms_hyperv_init_platform(void) * Extract the features and hints */ ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); + ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", @@ -141,6 +186,8 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif + machine_ops.shutdown = hv_machine_shutdown; + machine_ops.crash_shutdown = hv_machine_crash_shutdown; } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3658de47900f..9469dfa55607 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3; if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { + struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0; - if (idx > current->active_mm->context.size) + /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0; - desc = current->active_mm->context.ldt; + desc = &ldt->entries[idx]; } else { if (idx > GDT_ENTRIES) return 0; - desc = raw_cpu_ptr(gdt_page.gdt); + desc = raw_cpu_ptr(gdt_page.gdt) + idx; } - return get_desc_base(desc + idx); + return get_desc_base(desc); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index b9826a981fb2..6326ae24e4d5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2534,7 +2534,7 @@ static int intel_pmu_cpu_prepare(int cpu) if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) - return NOTIFY_BAD; + goto err; } if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { @@ -2542,18 +2542,27 @@ static int intel_pmu_cpu_prepare(int cpu) cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); if (!cpuc->constraint_list) - return NOTIFY_BAD; + goto err_shared_regs; cpuc->excl_cntrs = allocate_excl_cntrs(cpu); - if (!cpuc->excl_cntrs) { - kfree(cpuc->constraint_list); - kfree(cpuc->shared_regs); - return NOTIFY_BAD; - } + if (!cpuc->excl_cntrs) + goto err_constraint_list; + cpuc->excl_thread_id = 0; } return NOTIFY_OK; + +err_constraint_list: + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; + +err_shared_regs: + kfree(cpuc->shared_regs); + cpuc->shared_regs = NULL; + +err: + return NOTIFY_BAD; } static void intel_pmu_cpu_starting(int cpu) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 63eb68b73589..377e8f8ed391 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -1255,7 +1255,7 @@ static inline void cqm_pick_event_reader(int cpu) cpumask_set_cpu(cpu, &cqm_cpumask); } -static void intel_cqm_cpu_prepare(unsigned int cpu) +static void intel_cqm_cpu_starting(unsigned int cpu) { struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1296,13 +1296,11 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb, unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - intel_cqm_cpu_prepare(cpu); - break; case CPU_DOWN_PREPARE: intel_cqm_cpu_exit(cpu); break; case CPU_STARTING: + intel_cqm_cpu_starting(cpu); cqm_pick_event_reader(cpu); break; } @@ -1373,7 +1371,7 @@ static int __init intel_cqm_init(void) goto out; for_each_online_cpu(i) { - intel_cqm_cpu_prepare(i); + intel_cqm_cpu_starting(i); cqm_pick_event_reader(i); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 79de954626fd..d25097c3fc1d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -270,7 +270,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; - if (src_fpu->fpstate_active) + if (src_fpu->fpstate_active && cpu_has_fpu) fpu_copy(dst_fpu, src_fpu); return 0; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 1e173f6285c7..d14e9ac3235a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -40,7 +40,12 @@ static void fpu__init_cpu_generic(void) write_cr0(cr0); /* Flush out any pending x87 state: */ - asm volatile ("fninit"); +#ifdef CONFIG_MATH_EMULATION + if (!cpu_has_fpu) + fpstate_init_soft(¤t->thread.fpu.state.soft); + else +#endif + asm volatile ("fninit"); } /* diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cc..2bcc0525f1c1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> @@ -20,82 +21,82 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h> -#ifdef CONFIG_SMP +/* context.lock is held for us, so we don't need any locking. */ static void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + mm_context_t *pc; + + if (current->active_mm != current_mm) + return; + + pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); } -#endif -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ +static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + struct ldt_struct *new_ldt; + int alloc_size; + + if (size > LDT_ENTRIES) + return NULL; + + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + if (!new_ldt) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = size * LDT_ENTRY_SIZE; + + /* + * Xen is very picky: it requires a page-aligned LDT that has no + * trailing nonzero bytes in any page that contains LDT descriptors. + * Keep it simple: zero the whole allocation and never allocate less + * than PAGE_SIZE. + */ + if (alloc_size > PAGE_SIZE) + new_ldt->entries = vzalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); - - if (!newldt) - return -ENOMEM; + new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); + if (!new_ldt->entries) { + kfree(new_ldt); + return NULL; + } - paravirt_alloc_ldt(newldt, mincount); + new_ldt->size = size; + return new_ldt; +} -#ifdef CONFIG_X86_64 - /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; +/* After calling this, the LDT is immutable. */ +static void finalize_ldt_struct(struct ldt_struct *ldt) +{ + paravirt_alloc_ldt(ldt->entries, ldt->size); } -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +/* context.lock is held */ +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; + /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt); + + /* Activate the LDT for all CPUs using current_mm. */ + on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); +} - if (err < 0) - return err; +static void free_ldt_struct(struct ldt_struct *ldt) +{ + if (likely(!ldt)) + return; - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + paravirt_free_ldt(ldt->entries, ldt->size); + if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(ldt->entries); + else + kfree(ldt->entries); + kfree(ldt); } /* @@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + struct ldt_struct *new_ldt; struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); - mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (!old_mm->context.ldt) { + mm->context.ldt = NULL; + goto out_unlock; + } + + new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + memcpy(new_ldt->entries, old_mm->context.ldt->entries, + new_ldt->size * LDT_ENTRY_SIZE); + finalize_ldt_struct(new_ldt); + + mm->context.ldt = new_ldt; + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; } @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; } static int read_ldt(void __user *ptr, unsigned long bytecount) { - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm; - if (!mm->context.size) - return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + /* Zero-fill the rest and pretend we read bytecount bytes. */ + if (clear_user(ptr + size, bytecount - size)) { + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; } static int read_default_ldt(void __user *ptr, unsigned long bytecount) @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) struct desc_struct ldt; int error; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt; error = -EINVAL; if (bytecount != sizeof(ldt_info)) @@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; } - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; } - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { - error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - } - fill_ldt(&ldt, &ldt_info); - if (oldmode) - ldt.avl = 0; + if (old_ldt) + memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); + new_ldt->entries[ldt_info.entry_number] = ldt; + finalize_ldt_struct(new_ldt); - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0; out_unlock: diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 397688beed4b..c27cad726765 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -408,6 +408,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) static void mwait_idle(void) { if (!current_set_polling_and_test()) { + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { smp_mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); @@ -419,6 +420,7 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71d7849a07f7..f6b916387590 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 206996c1669d..71820c42b6ce 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -93,8 +93,15 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); +#else /* !CONFIG_X86_32 */ + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); +#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -154,9 +161,8 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->__pad2); - put_user_ex(0, &sc->__pad1); - put_user_ex(regs->ss, &sc->ss); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -451,19 +457,9 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* - * Set up the CS and SS registers to run signal handlers in - * 64-bit mode, even if the handler happens to be interrupting - * 32-bit or 16-bit code. - * - * SS is subtle. In 64-bit mode, we don't need any particular - * SS descriptor, but we do need SS to be valid. It's possible - * that the old SS is entirely bogus -- this can happen if the - * signal we're trying to deliver is #GP or #SS caused by a bad - * SS value. - */ + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ regs->cs = __USER_CS; - regs->ss = __USER_DS; return 0; } diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d0..0ccb53a9fcd9 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/ptrace.h> #include <asm/desc.h> +#include <asm/mmu_context.h> unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { @@ -27,13 +28,14 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + seg >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 67d215cb8953..a1ff508bb423 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,7 +12,9 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o + i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ + hyperv.o + kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o kvm-intel-y += vmx.o pmu_intel.o kvm-amd-y += svm.o pmu_amd.o diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c new file mode 100644 index 000000000000..a8160d2ae362 --- /dev/null +++ b/arch/x86/kvm/hyperv.c @@ -0,0 +1,377 @@ +/* + * KVM Microsoft Hyper-V emulation + * + * derived from arch/x86/kvm/x86.c + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2008 Qumranet, Inc. + * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * Amit Shah <amit.shah@qumranet.com> + * Ben-Ami Yassour <benami@il.ibm.com> + * Andrey Smetanin <asmetanin@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "x86.h" +#include "lapic.h" +#include "hyperv.h" + +#include <linux/kvm_host.h> +#include <trace/events/kvm.h> + +#include "trace.h" + +static bool kvm_hv_msr_partition_wide(u32 msr) +{ + bool r = false; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: + case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + r = true; + break; + } + + return r; +} + +static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, + u32 index, u64 *pdata) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + return -EINVAL; + + *pdata = hv->hv_crash_param[index]; + return 0; +} + +static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + *pdata = hv->hv_crash_ctl; + return 0; +} + +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + if (host) + hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY; + + if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) { + + vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", + hv->hv_crash_param[0], + hv->hv_crash_param[1], + hv->hv_crash_param[2], + hv->hv_crash_param[3], + hv->hv_crash_param[4]); + + /* Send notification about crash to user space */ + kvm_make_request(KVM_REQ_HV_CRASH, vcpu); + } + + return 0; +} + +static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, + u32 index, u64 data) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + return -EINVAL; + + hv->hv_crash_param[index] = data; + return 0; +} + +static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, + bool host) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + hv->hv_guest_os_id = data; + /* setting guest os id to zero disables hypercall page */ + if (!hv->hv_guest_os_id) + hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; + break; + case HV_X64_MSR_HYPERCALL: { + u64 gfn; + unsigned long addr; + u8 instructions[4]; + + /* if guest os id is not set hypercall should remain disabled */ + if (!hv->hv_guest_os_id) + break; + if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { + hv->hv_hypercall = data; + break; + } + gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; + addr = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(addr)) + return 1; + kvm_x86_ops->patch_hypercall(vcpu, instructions); + ((unsigned char *)instructions)[3] = 0xc3; /* ret */ + if (__copy_to_user((void __user *)addr, instructions, 4)) + return 1; + hv->hv_hypercall = data; + mark_page_dirty(kvm, gfn); + break; + } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + HV_REFERENCE_TSC_PAGE tsc_ref; + + memset(&tsc_ref, 0, sizeof(tsc_ref)); + hv->hv_tsc_page = data; + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + break; + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + if (kvm_write_guest( + kvm, + gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, + &tsc_ref, sizeof(tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + break; + } + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_set_crash_data(vcpu, + msr - HV_X64_MSR_CRASH_P0, + data); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_set_crash_ctl(vcpu, data, host); + default: + vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + msr, data); + return 1; + } + return 0; +} + +static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_APIC_ASSIST_PAGE: { + u64 gfn; + unsigned long addr; + + if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { + hv->hv_vapic = data; + if (kvm_lapic_enable_pv_eoi(vcpu, 0)) + return 1; + break; + } + gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; + addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); + if (kvm_is_error_hva(addr)) + return 1; + if (__clear_user((void __user *)addr, PAGE_SIZE)) + return 1; + hv->hv_vapic = data; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + if (kvm_lapic_enable_pv_eoi(vcpu, + gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) + return 1; + break; + } + case HV_X64_MSR_EOI: + return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); + case HV_X64_MSR_ICR: + return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); + case HV_X64_MSR_TPR: + return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); + default: + vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + msr, data); + return 1; + } + + return 0; +} + +static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + u64 data = 0; + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + data = hv->hv_guest_os_id; + break; + case HV_X64_MSR_HYPERCALL: + data = hv->hv_hypercall; + break; + case HV_X64_MSR_TIME_REF_COUNT: { + data = + div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = hv->hv_tsc_page; + break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_get_crash_data(vcpu, + msr - HV_X64_MSR_CRASH_P0, + pdata); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_get_crash_ctl(vcpu, pdata); + default: + vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); + return 1; + } + + *pdata = data; + return 0; +} + +static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + u64 data = 0; + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_VP_INDEX: { + int r; + struct kvm_vcpu *v; + + kvm_for_each_vcpu(r, v, vcpu->kvm) { + if (v == vcpu) { + data = r; + break; + } + } + break; + } + case HV_X64_MSR_EOI: + return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); + case HV_X64_MSR_ICR: + return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); + case HV_X64_MSR_TPR: + return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); + case HV_X64_MSR_APIC_ASSIST_PAGE: + data = hv->hv_vapic; + break; + default: + vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); + return 1; + } + *pdata = data; + return 0; +} + +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) +{ + if (kvm_hv_msr_partition_wide(msr)) { + int r; + + mutex_lock(&vcpu->kvm->lock); + r = kvm_hv_set_msr_pw(vcpu, msr, data, host); + mutex_unlock(&vcpu->kvm->lock); + return r; + } else + return kvm_hv_set_msr(vcpu, msr, data); +} + +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + if (kvm_hv_msr_partition_wide(msr)) { + int r; + + mutex_lock(&vcpu->kvm->lock); + r = kvm_hv_get_msr_pw(vcpu, msr, pdata); + mutex_unlock(&vcpu->kvm->lock); + return r; + } else + return kvm_hv_get_msr(vcpu, msr, pdata); +} + +bool kvm_hv_hypercall_enabled(struct kvm *kvm) +{ + return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; +} + +int kvm_hv_hypercall(struct kvm_vcpu *vcpu) +{ + u64 param, ingpa, outgpa, ret; + uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; + bool fast, longmode; + + /* + * hypercall generates UD from non zero cpl and real mode + * per HYPER-V spec + */ + if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + longmode = is_64_bit_mode(vcpu); + + if (!longmode) { + param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | + (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); + ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | + (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); + outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | + (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); + } +#ifdef CONFIG_X86_64 + else { + param = kvm_register_read(vcpu, VCPU_REGS_RCX); + ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); + outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); + } +#endif + + code = param & 0xffff; + fast = (param >> 16) & 0x1; + rep_cnt = (param >> 32) & 0xfff; + rep_idx = (param >> 48) & 0xfff; + + trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); + + switch (code) { + case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: + kvm_vcpu_on_spin(vcpu); + break; + default: + res = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + } + + ret = res | (((u64)rep_done & 0xfff) << 32); + if (longmode) { + kvm_register_write(vcpu, VCPU_REGS_RAX, ret); + } else { + kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); + kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); + } + + return 1; +} diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h new file mode 100644 index 000000000000..c7bce559f67b --- /dev/null +++ b/arch/x86/kvm/hyperv.h @@ -0,0 +1,32 @@ +/* + * KVM Microsoft Hyper-V emulation + * + * derived from arch/x86/kvm/x86.c + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2008 Qumranet, Inc. + * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * Amit Shah <amit.shah@qumranet.com> + * Ben-Ami Yassour <benami@il.ibm.com> + * Andrey Smetanin <asmetanin@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef __ARCH_X86_KVM_HYPERV_H__ +#define __ARCH_X86_KVM_HYPERV_H__ + +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +bool kvm_hv_hypercall_enabled(struct kvm *kvm); +int kvm_hv_hypercall(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index fef922ff2635..7cc2360f1848 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -651,15 +651,10 @@ fail_unlock: return NULL; } -void kvm_destroy_pic(struct kvm *kvm) +void kvm_destroy_pic(struct kvm_pic *vpic) { - struct kvm_pic *vpic = kvm->arch.vpic; - - if (vpic) { - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr); - kvm->arch.vpic = NULL; - kfree(vpic); - } + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); + kfree(vpic); } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ad68c73008c5..3d782a2c336a 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -74,7 +74,7 @@ struct kvm_pic { }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); -void kvm_destroy_pic(struct kvm *kvm); +void kvm_destroy_pic(struct kvm_pic *vpic); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); @@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) static inline int irqchip_in_kernel(struct kvm *kvm) { - int ret; + struct kvm_pic *vpic = pic_irqchip(kvm); - ret = (pic_irqchip(kvm) != NULL); + /* Read vpic before kvm->irq_routing. */ smp_rmb(); - return ret; + return vpic != NULL; } void kvm_pic_reset(struct kvm_kpic_state *s); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2a5ca97c263b..9a3e342e3cda 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1900,8 +1900,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32)); + if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32))) + return; apic_set_tpr(vcpu->arch.apic, data & 0xff); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 71952748222a..764037991d26 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) { - return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; + return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; } int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 44171462bd2a..fb16a8ea3dee 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep) { return ACCESS_ONCE(*sptep); } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - /* It is valid if the spte is zapped. */ - return spte == 0ull; -} #else union split_spte { struct { @@ -478,23 +472,6 @@ retry: return spte.spte; } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - union split_spte sspte = (union split_spte)spte; - u32 high_mmio_mask = shadow_mmio_mask >> 32; - - /* It is valid if the spte is zapped. */ - if (spte == 0ull) - return true; - - /* It is valid if the spte is being zapped. */ - if (sspte.spte_low == 0ull && - (sspte.spte_high & high_mmio_mask) == high_mmio_mask) - return true; - - return false; -} #endif static bool spte_is_locklessly_modifiable(u64 spte) @@ -3291,54 +3268,89 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); } -static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +static bool +__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { - if (direct) - return vcpu_match_mmio_gpa(vcpu, addr); + int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f; - return vcpu_match_mmio_gva(vcpu, addr); + return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) | + ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0); } +static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) +{ + return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level); +} -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - * - It is the mmio spte - * - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) +static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) { - return __check_direct_spte_mmio_pf(spte); + return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); } -static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) +static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +{ + if (direct) + return vcpu_match_mmio_gpa(vcpu, addr); + + return vcpu_match_mmio_gva(vcpu, addr); +} + +/* return true if reserved bit is detected on spte. */ +static bool +walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { struct kvm_shadow_walk_iterator iterator; - u64 spte = 0ull; + u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; + int root, leaf; + bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return spte; + goto exit; walk_shadow_page_lockless_begin(vcpu); - for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) + + for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level; + shadow_walk_okay(&iterator); + __shadow_walk_next(&iterator, spte)) { + leaf = iterator.level; + spte = mmu_spte_get_lockless(iterator.sptep); + + sptes[leaf - 1] = spte; + if (!is_shadow_present_pte(spte)) break; + + reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, + leaf); + } + walk_shadow_page_lockless_end(vcpu); - return spte; + if (reserved) { + pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", + __func__, addr); + while (root >= leaf) { + pr_err("------ spte 0x%llx level %d.\n", + sptes[root - 1], root); + root--; + } + } +exit: + *sptep = spte; + return reserved; } int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; + bool reserved; if (quickly_check_mmio_pf(vcpu, addr, direct)) return RET_MMIO_PF_EMULATE; - spte = walk_shadow_page_get_mmio_spte(vcpu, addr); + reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); + if (unlikely(reserved)) + return RET_MMIO_PF_BUG; if (is_mmio_spte(spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); @@ -3356,13 +3368,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) } /* - * It's ok if the gva is remapped by other cpus on shadow guest, - * it's a BUG if the gfn is not a mmio page. - */ - if (direct && !check_direct_spte_mmio_pf(spte)) - return RET_MMIO_PF_BUG; - - /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ @@ -3604,19 +3609,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp #include "paging_tmpl.h" #undef PTTYPE -static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void +__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, + struct rsvd_bits_validate *rsvd_check, + int maxphyaddr, int level, bool nx, bool gbpages, + bool pse) { - int maxphyaddr = cpuid_maxphyaddr(vcpu); u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; u64 nonleaf_bit8_rsvd = 0; - context->bad_mt_xwr = 0; + rsvd_check->bad_mt_xwr = 0; - if (!context->nx) + if (!nx) exb_bit_rsvd = rsvd_bits(63, 63); - if (!guest_cpuid_has_gbpages(vcpu)) + if (!gbpages) gbpages_bit_rsvd = rsvd_bits(7, 7); /* @@ -3626,80 +3633,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, if (guest_cpuid_is_amd(vcpu)) nonleaf_bit8_rsvd = rsvd_bits(8, 8); - switch (context->root_level) { + switch (level) { case PT32_ROOT_LEVEL: /* no rsvd bits for 2 level 4K page table entries */ - context->rsvd_bits_mask[0][1] = 0; - context->rsvd_bits_mask[0][0] = 0; - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[0][1] = 0; + rsvd_check->rsvd_bits_mask[0][0] = 0; + rsvd_check->rsvd_bits_mask[1][0] = + rsvd_check->rsvd_bits_mask[0][0]; - if (!is_pse(vcpu)) { - context->rsvd_bits_mask[1][1] = 0; + if (!pse) { + rsvd_check->rsvd_bits_mask[1][1] = 0; break; } if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ - context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); + rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ - context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); + rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); break; case PT32E_ROOT_LEVEL: - context->rsvd_bits_mask[0][2] = + rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(maxphyaddr, 63) | rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ - context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62); /* PDE */ - context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62); /* PTE */ - context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[1][0] = + rsvd_check->rsvd_bits_mask[0][0]; break; case PT64_ROOT_LEVEL: - context->rsvd_bits_mask[0][3] = exb_bit_rsvd | - nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[0][2] = exb_bit_rsvd | - nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | + nonleaf_bit8_rsvd | rsvd_bits(7, 7) | + rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | + nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; - context->rsvd_bits_mask[1][2] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[1][3] = + rsvd_check->rsvd_bits_mask[0][3]; + rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 29); - context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[1][0] = + rsvd_check->rsvd_bits_mask[0][0]; break; } } -static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, - struct kvm_mmu *context, bool execonly) +static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) +{ + __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, + cpuid_maxphyaddr(vcpu), context->root_level, + context->nx, guest_cpuid_has_gbpages(vcpu), + is_pse(vcpu)); +} + +static void +__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, + int maxphyaddr, bool execonly) { - int maxphyaddr = cpuid_maxphyaddr(vcpu); int pte; - context->rsvd_bits_mask[0][3] = + rsvd_check->rsvd_bits_mask[0][3] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); - context->rsvd_bits_mask[0][2] = + rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); - context->rsvd_bits_mask[0][1] = + rsvd_check->rsvd_bits_mask[0][1] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); - context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); /* large page */ - context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; - context->rsvd_bits_mask[1][2] = + rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; + rsvd_check->rsvd_bits_mask[1][2] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); - context->rsvd_bits_mask[1][1] = + rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; for (pte = 0; pte < 64; pte++) { int rwx_bits = pte & 7; @@ -3707,10 +3729,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, if (mt == 0x2 || mt == 0x3 || mt == 0x7 || rwx_bits == 0x2 || rwx_bits == 0x6 || (rwx_bits == 0x4 && !execonly)) - context->bad_mt_xwr |= (1ull << pte); + rsvd_check->bad_mt_xwr |= (1ull << pte); } } +static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, bool execonly) +{ + __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check, + cpuid_maxphyaddr(vcpu), execonly); +} + +/* + * the page table on host is the shadow page table for the page + * table in guest or amd nested guest, its mmu features completely + * follow the features in guest. + */ +void +reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) +{ + __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, + context->shadow_root_level, context->nx, + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); +} +EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); + +/* + * the direct page table on host, use as much mmu features as + * possible, however, kvm currently does not do execution-protection. + */ +static void +reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) +{ + if (guest_cpuid_is_amd(vcpu)) + __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, + context->shadow_root_level, false, + cpu_has_gbpages, true); + else + __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, + false); + +} + +/* + * as the comments in reset_shadow_zero_bits_mask() except it + * is the shadow page table for intel nested guest. + */ +static void +reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, bool execonly) +{ + __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, execonly); +} + static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool ept) { @@ -3889,6 +3965,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) update_permission_bitmask(vcpu, context, false); update_last_pte_bitmap(vcpu, context); + reset_tdp_shadow_zero_bits_mask(vcpu, context); } void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) @@ -3916,6 +3993,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) context->base_role.smap_andnot_wp = smap && !is_write_protection(vcpu); context->base_role.smm = is_smm(vcpu); + reset_shadow_zero_bits_mask(vcpu, context); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -3939,6 +4017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) update_permission_bitmask(vcpu, context, true); reset_rsvds_bits_mask_ept(vcpu, context, execonly); + reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); @@ -4860,28 +4939,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) return nr_mmu_pages; } -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) -{ - struct kvm_shadow_walk_iterator iterator; - u64 spte; - int nr_sptes = 0; - - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return nr_sptes; - - walk_shadow_page_lockless_begin(vcpu); - for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { - sptes[iterator.level-1] = spte; - nr_sptes++; - if (!is_shadow_present_pte(spte)) - break; - } - walk_shadow_page_lockless_end(vcpu); - - return nr_sptes; -} -EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); - void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 398d21c0f6dd..e4202e41d535 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); +void +reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); + /* * Return values of handle_mmio_page_fault_common: * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index dc0a84a6f309..9e8bf13572e6 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -672,16 +672,16 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) if (iter.mtrr_disabled) return mtrr_disabled_type(); + /* not contained in any MTRRs. */ + if (type == -1) + return mtrr_default_type(mtrr_state); + /* * We just check one page, partially covered by MTRRs is * impossible. */ WARN_ON(iter.partial_map); - /* not contained in any MTRRs. */ - if (type == -1) - return mtrr_default_type(mtrr_state); - return type; } EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0f67d7e24800..736e6ab8784d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) *access &= mask; } -static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) -{ - int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f; - - return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) | - ((mmu->bad_mt_xwr & (1ull << low6)) != 0); -} - static inline int FNAME(is_present_gpte)(unsigned long pte) { #if PTTYPE != PTTYPE_EPT @@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { - if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) goto no_present; if (!FNAME(is_present_gpte)(gpte)) @@ -353,8 +345,7 @@ retry_walk: if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; - if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, - walker->level))) { + if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 886aa25a7131..39b91127ef07 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_K7_PERFCTRn */ pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); if (pmc) { - if (!msr_info->host_initiated) - data = (s64)data; pmc->counter += data - pmc_read_counter(pmc); return 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8e0c0844c6b9..74d825716f4f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1173,6 +1173,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) return 0; + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && + kvm_read_cr0(vcpu) & X86_CR0_CD) + return _PAGE_NOCACHE; + mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); return mtrr2protval[mtrr]; } @@ -1667,13 +1671,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - /* - * re-enable caching here because the QEMU bios - * does not do it - this results in some delay at - * reboot - */ - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + + /* These are emulated via page tables. */ + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -2106,6 +2107,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu.shadow_root_level = get_npt_level(); + reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 83b7b5cd75d5..da1590ea43fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | #endif CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | - CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | - CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | - CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG | + CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | + CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING | + CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the * hardware. For example, L1 can specify an MSR bitmap - and we @@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(to_vmx(vcpu)); guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); - if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { + if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { pr_debug_ratelimited("%s: tss fixup for long mode. \n", __func__); vmcs_write32(GUEST_TR_AR_BYTES, - (guest_tr_ar & ~AR_TYPE_MASK) - | AR_TYPE_BUSY_64_TSS); + (guest_tr_ar & ~VMX_AR_TYPE_MASK) + | VMX_AR_TYPE_BUSY_64_TSS); } vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); } @@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) return 0; else { int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); - return AR_DPL(ar); + return VMX_AR_DPL(ar); } } @@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) if (cs.unusable) return false; - if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) + if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) return false; if (!cs.s) return false; - if (cs.type & AR_TYPE_WRITEABLE_MASK) { + if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { if (cs.dpl > cs_rpl) return false; } else { @@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) return false; if (!var.present) return false; - if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) { + if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { if (var.dpl < rpl) /* DPL < RPL */ return false; } @@ -5759,73 +5759,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } -static u64 ept_rsvd_mask(u64 spte, int level) -{ - int i; - u64 mask = 0; - - for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) - mask |= (1ULL << i); - - if (level == 4) - /* bits 7:3 reserved */ - mask |= 0xf8; - else if (spte & (1ULL << 7)) - /* - * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively, - * level == 1 if the hypervisor is using the ignored bit 7. - */ - mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE; - else if (level > 1) - /* bits 6:3 reserved */ - mask |= 0x78; - - return mask; -} - -static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, - int level) -{ - printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level); - - /* 010b (write-only) */ - WARN_ON((spte & 0x7) == 0x2); - - /* 110b (write/execute) */ - WARN_ON((spte & 0x7) == 0x6); - - /* 100b (execute-only) and value not supported by logical processor */ - if (!cpu_has_vmx_ept_execute_only()) - WARN_ON((spte & 0x7) == 0x4); - - /* not 000b */ - if ((spte & 0x7)) { - u64 rsvd_bits = spte & ept_rsvd_mask(spte, level); - - if (rsvd_bits != 0) { - printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n", - __func__, rsvd_bits); - WARN_ON(1); - } - - /* bits 5:3 are _not_ reserved for large page or leaf page */ - if ((rsvd_bits & 0x38) == 0) { - u64 ept_mem_type = (spte & 0x38) >> 3; - - if (ept_mem_type == 2 || ept_mem_type == 3 || - ept_mem_type == 7) { - printk(KERN_ERR "%s: ept_mem_type=0x%llx\n", - __func__, ept_mem_type); - WARN_ON(1); - } - } - } -} - static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { - u64 sptes[4]; - int nr_sptes, i, ret; + int ret; gpa_t gpa; gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); @@ -5846,13 +5782,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) return 1; /* It is the real ept misconfig */ - printk(KERN_ERR "EPT: Misconfiguration.\n"); - printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa); - - nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes); - - for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) - ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); + WARN_ON(1); vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; @@ -6246,6 +6176,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } +static int handle_monitor_trap(struct kvm_vcpu *vcpu) +{ + return 1; +} + static int handle_monitor(struct kvm_vcpu *vcpu) { printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); @@ -6408,8 +6343,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) */ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, - u32 vmx_instruction_info, gva_t *ret) + u32 vmx_instruction_info, bool wr, gva_t *ret) { + gva_t off; + bool exn; + struct kvm_segment s; + /* * According to Vol. 3B, "Information for VM Exits Due to Instruction * Execution", on an exit, vmx_instruction_info holds most of the @@ -6434,22 +6373,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Addr = segment_base + offset */ /* offset = base + [index * scale] + displacement */ - *ret = vmx_get_segment_base(vcpu, seg_reg); + off = exit_qualification; /* holds the displacement */ if (base_is_valid) - *ret += kvm_register_read(vcpu, base_reg); + off += kvm_register_read(vcpu, base_reg); if (index_is_valid) - *ret += kvm_register_read(vcpu, index_reg)<<scaling; - *ret += exit_qualification; /* holds the displacement */ + off += kvm_register_read(vcpu, index_reg)<<scaling; + vmx_get_segment(vcpu, &s, seg_reg); + *ret = s.base + off; if (addr_size == 1) /* 32 bit */ *ret &= 0xffffffff; - /* - * TODO: throw #GP (and return 1) in various cases that the VM* - * instructions require it - e.g., offset beyond segment limit, - * unusable or unreadable/unwritable segment, non-canonical 64-bit - * address, and so on. Currently these are not checked. - */ + /* Checks for #GP/#SS exceptions. */ + exn = false; + if (is_protmode(vcpu)) { + /* Protected mode: apply checks for segment validity in the + * following order: + * - segment type check (#GP(0) may be thrown) + * - usability check (#GP(0)/#SS(0)) + * - limit check (#GP(0)/#SS(0)) + */ + if (wr) + /* #GP(0) if the destination operand is located in a + * read-only data segment or any code segment. + */ + exn = ((s.type & 0xa) == 0 || (s.type & 8)); + else + /* #GP(0) if the source operand is located in an + * execute-only code segment + */ + exn = ((s.type & 0xa) == 8); + } + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is an only check for long mode. + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { + /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. + */ + exn = (s.unusable != 0); + /* Protected mode: #GP(0)/#SS(0) if the memory + * operand is outside the segment limit. + */ + exn = exn || (off + sizeof(u64) > s.limit); + } + if (exn) { + kvm_queue_exception_e(vcpu, + seg_reg == VCPU_SREG_SS ? + SS_VECTOR : GP_VECTOR, + 0); + return 1; + } + return 0; } @@ -6471,7 +6451,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, int maxphyaddr = cpuid_maxphyaddr(vcpu); if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmcs_read32(VMX_INSTRUCTION_INFO), &gva)) + vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, @@ -6999,7 +6979,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) field_value); } else { if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, &gva)) + vmx_instruction_info, true, &gva)) return 1; /* _system ok, as nested_vmx_check_permission verified cpl=0 */ kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, @@ -7036,7 +7016,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) (((vmx_instruction_info) >> 3) & 0xf)); else { if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, &gva)) + vmx_instruction_info, false, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { @@ -7128,7 +7108,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return 1; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, &vmcs_gva)) + vmx_instruction_info, true, &vmcs_gva)) return 1; /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, @@ -7184,7 +7164,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) * operand is read even if it isn't needed (e.g., for type==global) */ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, &gva)) + vmx_instruction_info, false, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, sizeof(operand), &e)) { @@ -7282,6 +7262,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, + [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVVPID] = handle_invvpid, @@ -7542,6 +7523,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return true; case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); + case EXIT_REASON_MONITOR_TRAP_FLAG: + return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG); case EXIT_REASON_MONITOR_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); case EXIT_REASON_PAUSE_INSTRUCTION: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5ef2560075bf..4bbc2a1676c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -29,6 +29,7 @@ #include "cpuid.h" #include "assigned-dev.h" #include "pmu.h" +#include "hyperv.h" #include <linux/clocksource.h> #include <linux/interrupt.h> @@ -221,11 +222,9 @@ static void shared_msr_update(unsigned slot, u32 msr) void kvm_define_shared_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_NR_SHARED_MSRS); + shared_msrs_global.msrs[slot] = msr; if (slot >= shared_msrs_global.nr) shared_msrs_global.nr = slot + 1; - shared_msrs_global.msrs[slot] = msr; - /* we need ensured the shared_msr_global have been updated */ - smp_wmb(); } EXPORT_SYMBOL_GPL(kvm_define_shared_msr); @@ -526,7 +525,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) } for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { if (is_present_gpte(pdpte[i]) && - (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { + (pdpte[i] & + vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) { ret = 0; goto out; } @@ -949,6 +949,8 @@ static u32 emulated_msrs[] = { MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, + HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -1217,11 +1219,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, __func__, base_khz, scaled_khz, shift, *pmultiplier); } -static inline u64 get_kernel_ns(void) -{ - return ktime_get_boot_ns(); -} - #ifdef CONFIG_X86_64 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); #endif @@ -1869,123 +1866,6 @@ out: return r; } -static bool kvm_hv_hypercall_enabled(struct kvm *kvm) -{ - return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; -} - -static bool kvm_hv_msr_partition_wide(u32 msr) -{ - bool r = false; - switch (msr) { - case HV_X64_MSR_GUEST_OS_ID: - case HV_X64_MSR_HYPERCALL: - case HV_X64_MSR_REFERENCE_TSC: - case HV_X64_MSR_TIME_REF_COUNT: - r = true; - break; - } - - return r; -} - -static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) -{ - struct kvm *kvm = vcpu->kvm; - - switch (msr) { - case HV_X64_MSR_GUEST_OS_ID: - kvm->arch.hv_guest_os_id = data; - /* setting guest os id to zero disables hypercall page */ - if (!kvm->arch.hv_guest_os_id) - kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; - break; - case HV_X64_MSR_HYPERCALL: { - u64 gfn; - unsigned long addr; - u8 instructions[4]; - - /* if guest os id is not set hypercall should remain disabled */ - if (!kvm->arch.hv_guest_os_id) - break; - if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { - kvm->arch.hv_hypercall = data; - break; - } - gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return 1; - kvm_x86_ops->patch_hypercall(vcpu, instructions); - ((unsigned char *)instructions)[3] = 0xc3; /* ret */ - if (__copy_to_user((void __user *)addr, instructions, 4)) - return 1; - kvm->arch.hv_hypercall = data; - mark_page_dirty(kvm, gfn); - break; - } - case HV_X64_MSR_REFERENCE_TSC: { - u64 gfn; - HV_REFERENCE_TSC_PAGE tsc_ref; - memset(&tsc_ref, 0, sizeof(tsc_ref)); - kvm->arch.hv_tsc_page = data; - if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - break; - gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, - &tsc_ref, sizeof(tsc_ref))) - return 1; - mark_page_dirty(kvm, gfn); - break; - } - default: - vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " - "data 0x%llx\n", msr, data); - return 1; - } - return 0; -} - -static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) -{ - switch (msr) { - case HV_X64_MSR_APIC_ASSIST_PAGE: { - u64 gfn; - unsigned long addr; - - if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { - vcpu->arch.hv_vapic = data; - if (kvm_lapic_enable_pv_eoi(vcpu, 0)) - return 1; - break; - } - gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; - addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); - if (kvm_is_error_hva(addr)) - return 1; - if (__clear_user((void __user *)addr, PAGE_SIZE)) - return 1; - vcpu->arch.hv_vapic = data; - kvm_vcpu_mark_page_dirty(vcpu, gfn); - if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) - return 1; - break; - } - case HV_X64_MSR_EOI: - return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); - case HV_X64_MSR_ICR: - return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); - case HV_X64_MSR_TPR: - return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); - default: - vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " - "data 0x%llx\n", msr, data); - return 1; - } - - return 0; -} - static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; @@ -2105,7 +1985,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (guest_cpuid_has_tsc_adjust(vcpu)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; - kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true); + adjust_tsc_offset_guest(vcpu, adj); } vcpu->arch.ia32_tsc_adjust_msr = data; } @@ -2224,15 +2104,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: - if (kvm_hv_msr_partition_wide(msr)) { - int r; - mutex_lock(&vcpu->kvm->lock); - r = set_msr_hyperv_pw(vcpu, msr, data); - mutex_unlock(&vcpu->kvm->lock); - return r; - } else - return set_msr_hyperv(vcpu, msr, data); - break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_set_msr_common(vcpu, msr, data, + msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. @@ -2315,68 +2190,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) -{ - u64 data = 0; - struct kvm *kvm = vcpu->kvm; - - switch (msr) { - case HV_X64_MSR_GUEST_OS_ID: - data = kvm->arch.hv_guest_os_id; - break; - case HV_X64_MSR_HYPERCALL: - data = kvm->arch.hv_hypercall; - break; - case HV_X64_MSR_TIME_REF_COUNT: { - data = - div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); - break; - } - case HV_X64_MSR_REFERENCE_TSC: - data = kvm->arch.hv_tsc_page; - break; - default: - vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); - return 1; - } - - *pdata = data; - return 0; -} - -static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) -{ - u64 data = 0; - - switch (msr) { - case HV_X64_MSR_VP_INDEX: { - int r; - struct kvm_vcpu *v; - kvm_for_each_vcpu(r, v, vcpu->kvm) { - if (v == vcpu) { - data = r; - break; - } - } - break; - } - case HV_X64_MSR_EOI: - return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); - case HV_X64_MSR_ICR: - return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); - case HV_X64_MSR_TPR: - return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); - case HV_X64_MSR_APIC_ASSIST_PAGE: - data = vcpu->arch.hv_vapic; - break; - default: - vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); - return 1; - } - *pdata = data; - return 0; -} - int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { switch (msr_info->index) { @@ -2493,14 +2306,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x20000000; break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: - if (kvm_hv_msr_partition_wide(msr_info->index)) { - int r; - mutex_lock(&vcpu->kvm->lock); - r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data); - mutex_unlock(&vcpu->kvm->lock); - return r; - } else - return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data); + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_get_msr_common(vcpu, + msr_info->index, &msr_info->data); break; case MSR_IA32_BBL_CR_CTL3: /* This legacy MSR exists but isn't fully documented in current @@ -2651,6 +2460,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_TSC_DEADLINE_TIMER: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_DISABLE_QUIRKS: + case KVM_CAP_SET_BOOT_CPU_ID: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3817,30 +3627,25 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) { mutex_lock(&kvm->slots_lock); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &vpic->dev_master); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &vpic->dev_slave); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &vpic->dev_eclr); + kvm_destroy_pic(vpic); mutex_unlock(&kvm->slots_lock); - kfree(vpic); goto create_irqchip_unlock; } } else goto create_irqchip_unlock; - smp_wmb(); - kvm->arch.vpic = vpic; - smp_wmb(); r = kvm_setup_default_irq_routing(kvm); if (r) { mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->irq_lock); kvm_ioapic_destroy(kvm); - kvm_destroy_pic(kvm); + kvm_destroy_pic(vpic); mutex_unlock(&kvm->irq_lock); mutex_unlock(&kvm->slots_lock); + goto create_irqchip_unlock; } + /* Write kvm->irq_routing before kvm->arch.vpic. */ + smp_wmb(); + kvm->arch.vpic = vpic; create_irqchip_unlock: mutex_unlock(&kvm->lock); break; @@ -3967,6 +3772,15 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_reinject(kvm, &control); break; } + case KVM_SET_BOOT_CPU_ID: + r = 0; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) != 0) + r = -EBUSY; + else + kvm->arch.bsp_vcpu_id = arg; + mutex_unlock(&kvm->lock); + break; case KVM_XEN_HVM_CONFIG: { r = -EFAULT; if (copy_from_user(&kvm->arch.xen_hvm_config, argp, @@ -5882,66 +5696,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); -int kvm_hv_hypercall(struct kvm_vcpu *vcpu) -{ - u64 param, ingpa, outgpa, ret; - uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; - bool fast, longmode; - - /* - * hypercall generates UD from non zero cpl and real mode - * per HYPER-V spec - */ - if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 0; - } - - longmode = is_64_bit_mode(vcpu); - - if (!longmode) { - param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); - ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); - outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); - } -#ifdef CONFIG_X86_64 - else { - param = kvm_register_read(vcpu, VCPU_REGS_RCX); - ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); - outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); - } -#endif - - code = param & 0xffff; - fast = (param >> 16) & 0x1; - rep_cnt = (param >> 32) & 0xfff; - rep_idx = (param >> 48) & 0xfff; - - trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); - - switch (code) { - case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu); - break; - default: - res = HV_STATUS_INVALID_HYPERCALL_CODE; - break; - } - - ret = res | (((u64)rep_done & 0xfff) << 32); - if (longmode) { - kvm_register_write(vcpu, VCPU_REGS_RAX, ret); - } else { - kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); - kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); - } - - return 1; -} - /* * kvm_pv_kick_cpu_op: Kick a vcpu. * @@ -6327,6 +6081,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) static void process_smi(struct kvm_vcpu *vcpu) { struct kvm_segment cs, ds; + struct desc_ptr dt; char buf[512]; u32 cr0; @@ -6359,6 +6114,10 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_x86_ops->set_cr4(vcpu, 0); + /* Undocumented: IDT limit is set to zero on entry to SMM. */ + dt.address = dt.size = 0; + kvm_x86_ops->set_idt(vcpu, &dt); + __kvm_set_dr(vcpu, 7, DR7_FIXED_1); cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; @@ -6513,6 +6272,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu_scan_ioapic(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); + if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -7535,6 +7300,17 @@ void kvm_arch_check_processor_compat(void *rtn) kvm_x86_ops->check_processor_compatibility(rtn); } +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); + +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; +} + bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 0ca2f3e4803c..2f822cd886c2 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -147,6 +147,11 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu, return kvm_register_write(vcpu, reg, val); } +static inline u64 get_kernel_ns(void) +{ + return ktime_get_boot_ns(); +} + static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) { return !(kvm->arch.disabled_quirks & quirk); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index f37e84ab49f3..3d8f2e421466 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -29,7 +29,6 @@ #include <asm/uaccess.h> #include <asm/traps.h> -#include <asm/desc.h> #include <asm/user.h> #include <asm/fpu/internal.h> @@ -181,7 +180,7 @@ void math_emulate(struct math_emu_info *info) math_abort(FPU_info, SIGILL); } - code_descriptor = LDT_DESCRIPTOR(FPU_CS); + code_descriptor = FPU_get_ldt_descriptor(FPU_CS); if (SEG_D_SIZE(code_descriptor)) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 9ccecb61a4fa..5e044d506b7a 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,9 +16,24 @@ #include <linux/kernel.h> #include <linux/mm.h> -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#include <asm/desc.h> +#include <asm/mmu_context.h> + +static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) +{ + static struct desc_struct zero_desc; + struct desc_struct ret = zero_desc; + +#ifdef CONFIG_MODIFY_LDT_SYSCALL + seg >>= 3; + mutex_lock(¤t->mm->context.lock); + if (current->mm->context.ldt && seg < current->mm->context.ldt->size) + ret = current->mm->context.ldt->entries[seg]; + mutex_unlock(¤t->mm->context.lock); +#endif + return ret; +} + #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 6ef5e99380f9..8300db71c2a6 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -20,7 +20,6 @@ #include <linux/stddef.h> #include <asm/uaccess.h> -#include <asm/desc.h> #include "fpu_system.h" #include "exception.h" @@ -158,7 +157,7 @@ static long pm_address(u_char FPU_modrm, u_char segment, addr->selector = PM_REG_(segment); } - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + descriptor = FPU_get_ldt_descriptor(addr->selector); base_address = SEG_BASE_ADDR(descriptor); address = base_address + offset; limit = base_address diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 579a8fd74be0..be2e7a2b10d7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 44 /* number of bytes to jump */ +#define OFFSET1 47 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -278,15 +278,15 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 33 +#define OFFSET2 36 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ /* prog = array->prog[index]; */ - EMIT4(0x48, 0x8D, 0x44, 0xD6); /* lea rax, [rsi + rdx * 8 + 0x50] */ - EMIT1(offsetof(struct bpf_array, prog)); + EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + offsetof(struct bpf_array, prog)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cfba30f27392..e4308fe6afe8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -972,6 +972,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) static int __init arch_parse_efi_cmdline(char *str) { + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); if (parse_option_str(str, "debug")) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0d7dd1f5ac36..9ab52791fed5 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -22,6 +22,7 @@ #include <asm/fpu/internal.h> #include <asm/debugreg.h> #include <asm/cpu.h> +#include <asm/mmu_context.h> #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -153,7 +154,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */ fpu__resume_cpu(); } diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e88fda867a33..484145368a24 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,7 +8,7 @@ config XEN select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE) - depends on X86_TSC + depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -17,7 +17,7 @@ config XEN config XEN_DOM0 def_bool y depends on XEN && PCI_XEN && SWIOTLB_XEN - depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI + depends on X86_IO_APIC && ACPI && PCI config XEN_PVHVM def_bool y diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 7322755f337a..4b6e29ac0968 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -13,13 +13,13 @@ CFLAGS_mmu.o := $(nostackp) obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o platform-pci-unplug.o \ - p2m.o + p2m.o apic.o obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_DOM0) += apic.o vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0b95c9b8283f..11d6fb4e8483 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t pte; unsigned long pfn; struct page *page; + unsigned char dummy; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); @@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte = pfn_pte(pfn, prot); + /* + * Careful: update_va_mapping() will fail if the virtual address + * we're poking isn't populated in the page tables. We don't + * need to worry about the direct map (that's always in the page + * tables), but we need to be careful about vmap space. In + * particular, the top level page table can lazily propagate + * entries between processes, so if we've switched mms since we + * vmapped the target in the first place, we might not have the + * top-level page table entry populated. + * + * We disable preemption because we want the same mm active when + * we probe the target and when we issue the hypercall. We'll + * have the same nominal mm, but if we're a kernel thread, lazy + * mm dropping could change our pgd. + * + * Out of an abundance of caution, this uses __get_user() to fault + * in the target address just in case there's some obscure case + * in which the target address isn't readable. + */ + + preempt_disable(); + + pagefault_disable(); /* Avoid warnings due to being atomic. */ + __get_user(dummy, (unsigned char __user __force *)v); + pagefault_enable(); + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); @@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot) BUG(); } else kmap_flush_unused(); + + preempt_enable(); } static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) @@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; + /* + * We need to mark the all aliases of the LDT pages RO. We + * don't need to call vm_flush_aliases(), though, since that's + * only responsible for flushing aliases out the TLBs, not the + * page tables, and Xen will flush the TLB for us if needed. + * + * To avoid confusing future readers: none of this is necessary + * to load the LDT. The hypervisor only checks this when the + * LDT is faulted in due to subsequent descriptor access. + */ + for(i = 0; i < entries; i += entries_per_page) set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c20fe29e65f4..2292721b1d10 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -101,17 +101,15 @@ struct dom0_vga_console_info; #ifdef CONFIG_XEN_DOM0 void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); -void __init xen_init_apic(void); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) { } -static inline void __init xen_init_apic(void) -{ -} #endif +void __init xen_init_apic(void); + #ifdef CONFIG_XEN_EFI extern void xen_efi_init(void); #else diff --git a/block/blk-settings.c b/block/blk-settings.c index 12600bfffca9..e0057d035200 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -241,8 +241,8 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); * Description: * Enables a low level driver to set a hard upper limit, * max_hw_sectors, on the size of requests. max_hw_sectors is set by - * the device driver based upon the combined capabilities of I/O - * controller and storage device. + * the device driver based upon the capabilities of the I/O + * controller. * * max_sectors is a soft limit imposed by the block layer for * filesystem type requests. This value can be overridden on a diff --git a/crypto/authencesn.c b/crypto/authencesn.c index a3da6770bc9e..b8efe36ce114 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -393,8 +393,6 @@ static int crypto_authenc_esn_genicv(struct aead_request *req, u8 *iv, struct scatterlist *cipher = areq_ctx->cipher; struct scatterlist *hsg = areq_ctx->hsg; struct scatterlist *tsg = areq_ctx->tsg; - struct scatterlist *assoc1; - struct scatterlist *assoc2; unsigned int ivsize = crypto_aead_ivsize(authenc_esn); unsigned int cryptlen = req->cryptlen; struct page *dstp; @@ -412,27 +410,19 @@ static int crypto_authenc_esn_genicv(struct aead_request *req, u8 *iv, cryptlen += ivsize; } - if (sg_is_last(assoc)) - return -EINVAL; - - assoc1 = assoc + 1; - if (sg_is_last(assoc1)) - return -EINVAL; - - assoc2 = assoc + 2; - if (!sg_is_last(assoc2)) + if (assoc->length < 12) return -EINVAL; sg_init_table(hsg, 2); - sg_set_page(hsg, sg_page(assoc), assoc->length, assoc->offset); - sg_set_page(hsg + 1, sg_page(assoc2), assoc2->length, assoc2->offset); + sg_set_page(hsg, sg_page(assoc), 4, assoc->offset); + sg_set_page(hsg + 1, sg_page(assoc), 4, assoc->offset + 8); sg_init_table(tsg, 1); - sg_set_page(tsg, sg_page(assoc1), assoc1->length, assoc1->offset); + sg_set_page(tsg, sg_page(assoc), 4, assoc->offset + 4); areq_ctx->cryptlen = cryptlen; - areq_ctx->headlen = assoc->length + assoc2->length; - areq_ctx->trailen = assoc1->length; + areq_ctx->headlen = 8; + areq_ctx->trailen = 4; areq_ctx->sg = dst; areq_ctx->complete = authenc_esn_geniv_ahash_done; @@ -563,8 +553,6 @@ static int crypto_authenc_esn_iverify(struct aead_request *req, u8 *iv, struct scatterlist *cipher = areq_ctx->cipher; struct scatterlist *hsg = areq_ctx->hsg; struct scatterlist *tsg = areq_ctx->tsg; - struct scatterlist *assoc1; - struct scatterlist *assoc2; unsigned int ivsize = crypto_aead_ivsize(authenc_esn); struct page *srcp; u8 *vsrc; @@ -580,27 +568,19 @@ static int crypto_authenc_esn_iverify(struct aead_request *req, u8 *iv, cryptlen += ivsize; } - if (sg_is_last(assoc)) - return -EINVAL; - - assoc1 = assoc + 1; - if (sg_is_last(assoc1)) - return -EINVAL; - - assoc2 = assoc + 2; - if (!sg_is_last(assoc2)) + if (assoc->length < 12) return -EINVAL; sg_init_table(hsg, 2); - sg_set_page(hsg, sg_page(assoc), assoc->length, assoc->offset); - sg_set_page(hsg + 1, sg_page(assoc2), assoc2->length, assoc2->offset); + sg_set_page(hsg, sg_page(assoc), 4, assoc->offset); + sg_set_page(hsg + 1, sg_page(assoc), 4, assoc->offset + 8); sg_init_table(tsg, 1); - sg_set_page(tsg, sg_page(assoc1), assoc1->length, assoc1->offset); + sg_set_page(tsg, sg_page(assoc), 4, assoc->offset + 4); areq_ctx->cryptlen = cryptlen; - areq_ctx->headlen = assoc->length + assoc2->length; - areq_ctx->trailen = assoc1->length; + areq_ctx->headlen = 8; + areq_ctx->trailen = 4; areq_ctx->sg = src; areq_ctx->complete = authenc_esn_verify_ahash_done; diff --git a/drivers/Kconfig b/drivers/Kconfig index 6e973b8e3a3b..4e2e6aaf0b88 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -184,4 +184,6 @@ source "drivers/android/Kconfig" source "drivers/nvdimm/Kconfig" +source "drivers/nvmem/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index b64b49f6e01b..4c270f5414f0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -165,3 +165,4 @@ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ obj-$(CONFIG_ANDROID) += android/ +obj-$(CONFIG_NVMEM) += nvmem/ diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 717afcdb5f4a..88dbbb115285 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -231,7 +231,7 @@ int acpi_device_set_power(struct acpi_device *device, int state) dev_warn(&device->dev, "Failed to change power state to %s\n", acpi_power_state_string(state)); } else { - device->power.state = state; + device->power.state = target_state; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] transitioned to %s\n", device->pnp.bus_id, diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 628a42c41ab1..cf0fd96a7602 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -702,11 +702,11 @@ static ssize_t flags_show(struct device *dev, u16 flags = to_nfit_memdev(dev)->flags; return sprintf(buf, "%s%s%s%s%s\n", - flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "", - flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "", - flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "", - flags & ACPI_NFIT_MEM_ARMED ? "arm " : "", - flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart " : ""); + flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "", + flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "", + flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "", + flags & ACPI_NFIT_MEM_ARMED ? "not_armed " : "", + flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : ""); } static DEVICE_ATTR_RO(flags); @@ -849,12 +849,12 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0) continue; - dev_info(acpi_desc->dev, "%s: failed: %s%s%s%s\n", + dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n", nvdimm_name(nvdimm), - mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "", - mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "", - mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "", - mem_flags & ACPI_NFIT_MEM_ARMED ? "arm " : ""); + mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "", + mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"", + mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "", + mem_flags & ACPI_NFIT_MEM_ARMED ? " not_armed" : ""); } @@ -1024,7 +1024,7 @@ static void wmb_blk(struct nfit_blk *nfit_blk) wmb_pmem(); } -static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) +static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; u64 offset = nfit_blk->stat_offset + mmio->size * bw; @@ -1032,7 +1032,7 @@ static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) if (mmio->num_lines) offset = to_interleave_offset(offset, mmio); - return readq(mmio->base + offset); + return readl(mmio->base + offset); } static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 815f75ef2411..2922f1f252d5 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/types.h> +#include <linux/workqueue.h> #include <acpi/video.h> ACPI_MODULE_NAME("video"); @@ -41,6 +42,7 @@ void acpi_video_unregister_backlight(void); static bool backlight_notifier_registered; static struct notifier_block backlight_nb; +static struct work_struct backlight_notify_work; static enum acpi_backlight_type acpi_backlight_cmdline = acpi_backlight_undef; static enum acpi_backlight_type acpi_backlight_dmi = acpi_backlight_undef; @@ -262,6 +264,13 @@ static const struct dmi_system_id video_detect_dmi_table[] = { { }, }; +/* This uses a workqueue to avoid various locking ordering issues */ +static void acpi_video_backlight_notify_work(struct work_struct *work) +{ + if (acpi_video_get_backlight_type() != acpi_backlight_video) + acpi_video_unregister_backlight(); +} + static int acpi_video_backlight_notify(struct notifier_block *nb, unsigned long val, void *bd) { @@ -269,9 +278,8 @@ static int acpi_video_backlight_notify(struct notifier_block *nb, /* A raw bl registering may change video -> native */ if (backlight->props.type == BACKLIGHT_RAW && - val == BACKLIGHT_REGISTERED && - acpi_video_get_backlight_type() != acpi_backlight_video) - acpi_video_unregister_backlight(); + val == BACKLIGHT_REGISTERED) + schedule_work(&backlight_notify_work); return NOTIFY_OK; } @@ -304,6 +312,8 @@ enum acpi_backlight_type acpi_video_get_backlight_type(void) acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, find_video, NULL, &video_caps, NULL); + INIT_WORK(&backlight_notify_work, + acpi_video_backlight_notify_work); backlight_nb.notifier_call = acpi_video_backlight_notify; backlight_nb.priority = 0; if (backlight_register_notifier(&backlight_nb) == 0) diff --git a/drivers/ata/ahci_brcmstb.c b/drivers/ata/ahci_brcmstb.c index ce1e3a885981..14b7305d2ba0 100644 --- a/drivers/ata/ahci_brcmstb.c +++ b/drivers/ata/ahci_brcmstb.c @@ -92,7 +92,7 @@ static inline u32 brcm_sata_readreg(void __iomem *addr) * Other architectures (e.g., ARM) either do not support big endian, or * else leave I/O in little endian mode. */ - if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(__BIG_ENDIAN)) + if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) return __raw_readl(addr); else return readl_relaxed(addr); @@ -101,7 +101,7 @@ static inline u32 brcm_sata_readreg(void __iomem *addr) static inline void brcm_sata_writereg(u32 val, void __iomem *addr) { /* See brcm_sata_readreg() comments */ - if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(__BIG_ENDIAN)) + if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) __raw_writel(val, addr); else writel_relaxed(val, addr); @@ -209,6 +209,7 @@ static void brcm_sata_init(struct brcm_ahci_priv *priv) priv->top_ctrl + SATA_TOP_CTRL_BUS_CTRL); } +#ifdef CONFIG_PM_SLEEP static int brcm_ahci_suspend(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); @@ -231,6 +232,7 @@ static int brcm_ahci_resume(struct device *dev) brcm_sata_phys_enable(priv); return ahci_platform_resume(dev); } +#endif static struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index db5d9f79a247..790e0deb278e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -694,11 +694,11 @@ static int ata_rwcmd_protocol(struct ata_taskfile *tf, struct ata_device *dev) * RETURNS: * Block address read from @tf. */ -u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) +u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev) { u64 block = 0; - if (!dev || tf->flags & ATA_TFLAG_LBA) { + if (tf->flags & ATA_TFLAG_LBA) { if (tf->flags & ATA_TFLAG_LBA48) { block |= (u64)tf->hob_lbah << 40; block |= (u64)tf->hob_lbam << 32; @@ -2147,24 +2147,6 @@ static int ata_dev_config_ncq(struct ata_device *dev, return 0; } -static void ata_dev_config_sense_reporting(struct ata_device *dev) -{ - unsigned int err_mask; - - if (!ata_id_has_sense_reporting(dev->id)) - return; - - if (ata_id_sense_reporting_enabled(dev->id)) - return; - - err_mask = ata_dev_set_feature(dev, SETFEATURE_SENSE_DATA, 0x1); - if (err_mask) { - ata_dev_dbg(dev, - "failed to enable Sense Data Reporting, Emask 0x%x\n", - err_mask); - } -} - /** * ata_dev_configure - Configure the specified ATA/ATAPI device * @dev: Target device to configure @@ -2387,7 +2369,7 @@ int ata_dev_configure(struct ata_device *dev) dev->devslp_timing[i] = sata_setting[j]; } } - ata_dev_config_sense_reporting(dev); + dev->cdb_len = 16; } @@ -4248,6 +4230,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 8*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* devices that don't properly handle TRIM commands */ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 7465031a893c..cb0508af1459 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1592,8 +1592,6 @@ static int ata_eh_read_log_10h(struct ata_device *dev, tf->hob_lbah = buf[10]; tf->nsect = buf[12]; tf->hob_nsect = buf[13]; - if (ata_id_has_ncq_autosense(dev->id)) - tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; return 0; } @@ -1630,70 +1628,6 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) } /** - * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT - * @dev: device to perform REQUEST_SENSE_SENSE_DATA_EXT to - * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) - * @dfl_sense_key: default sense key to use - * - * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK - * SENSE. This function is EH helper. - * - * LOCKING: - * Kernel thread context (may sleep). - * - * RETURNS: - * encoded sense data on success, 0 on failure or if sense data - * is not available. - */ -static u32 ata_eh_request_sense(struct ata_queued_cmd *qc, - struct scsi_cmnd *cmd) -{ - struct ata_device *dev = qc->dev; - struct ata_taskfile tf; - unsigned int err_mask; - - if (!cmd) - return 0; - - DPRINTK("ATA request sense\n"); - ata_dev_warn(dev, "request sense\n"); - if (!ata_id_sense_reporting_enabled(dev->id)) { - ata_dev_warn(qc->dev, "sense data reporting disabled\n"); - return 0; - } - ata_tf_init(dev, &tf); - - tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; - tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; - tf.command = ATA_CMD_REQ_SENSE_DATA; - tf.protocol = ATA_PROT_NODATA; - - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); - /* - * ACS-4 states: - * The device may set the SENSE DATA AVAILABLE bit to one in the - * STATUS field and clear the ERROR bit to zero in the STATUS field - * to indicate that the command returned completion without an error - * and the sense data described in table 306 is available. - * - * IOW the 'ATA_SENSE' bit might not be set even though valid - * sense data is available. - * So check for both. - */ - if ((tf.command & ATA_SENSE) || - tf.lbah != 0 || tf.lbam != 0 || tf.lbal != 0) { - ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - ata_dev_warn(dev, "sense data %02x/%02x/%02x\n", - tf.lbah, tf.lbam, tf.lbal); - } else { - ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", - tf.command, err_mask); - } - return err_mask; -} - -/** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) @@ -1855,19 +1789,6 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) memcpy(&qc->result_tf, &tf, sizeof(tf)); qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; - if (qc->result_tf.auxiliary) { - char sense_key, asc, ascq; - - sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; - asc = (qc->result_tf.auxiliary >> 8) & 0xff; - ascq = qc->result_tf.auxiliary & 0xff; - ata_dev_dbg(dev, "NCQ Autosense %02x/%02x/%02x\n", - sense_key, asc, ascq); - ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); - ata_scsi_set_sense_information(qc->scsicmd, &qc->result_tf); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - } - ehc->i.err_mask &= ~AC_ERR_DEV; } @@ -1897,27 +1818,6 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - /* - * Sense data reporting does not work if the - * device fault bit is set. - */ - if ((stat & ATA_SENSE) && !(stat & ATA_DF) && - !(qc->flags & ATA_QCFLAG_SENSE_VALID)) { - if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { - tmp = ata_eh_request_sense(qc, qc->scsicmd); - if (tmp) - qc->err_mask |= tmp; - else - ata_scsi_set_sense_information(qc->scsicmd, tf); - } else { - ata_dev_warn(qc->dev, "sense data available but port frozen\n"); - } - } - - /* Set by NCQ autosense or request sense above */ - if (qc->flags & ATA_QCFLAG_SENSE_VALID) - return 0; - if (stat & (ATA_ERR | ATA_DF)) qc->err_mask |= AC_ERR_DEV; else @@ -2661,15 +2561,14 @@ static void ata_eh_link_report(struct ata_link *link) #ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | - ATA_SENSE | ATA_ERR)) { + ATA_ERR)) { if (res->command & ATA_BUSY) ata_dev_err(qc->dev, "status: { Busy }\n"); else - ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", + ata_dev_err(qc->dev, "status: { %s%s%s%s}\n", res->command & ATA_DRDY ? "DRDY " : "", res->command & ATA_DF ? "DF " : "", res->command & ATA_DRQ ? "DRQ " : "", - res->command & ATA_SENSE ? "SENSE " : "", res->command & ATA_ERR ? "ERR " : ""); } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 641a61a59e89..0d7f0da3a269 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -270,28 +270,13 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); -void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { - if (!cmd) - return; - cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); } -void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, - const struct ata_taskfile *tf) -{ - u64 information; - - if (!cmd) - return; - - information = ata_tf_read_block(tf, NULL); - scsi_set_sense_information(cmd->sense_buffer, information); -} - static ssize_t ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1792,9 +1777,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) ((cdb[2] & 0x20) || need_sense)) { ata_gen_passthru_sense(qc); } else { - if (qc->flags & ATA_QCFLAG_SENSE_VALID) { - cmd->result = SAM_STAT_CHECK_CONDITION; - } else if (!need_sense) { + if (!need_sense) { cmd->result = SAM_STAT_GOOD; } else { /* TODO: decide which descriptor format to use diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index a998a175f9f1..f840ca18a7c0 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -67,8 +67,7 @@ extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag); -extern u64 ata_tf_read_block(const struct ata_taskfile *tf, - struct ata_device *dev); +extern u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev); extern unsigned ata_exec_internal(struct ata_device *dev, struct ata_taskfile *tf, const u8 *cdb, int dma_dir, void *buf, unsigned int buflen, @@ -138,9 +137,6 @@ extern int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht); extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); -extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); -extern void ata_scsi_set_sense_information(struct scsi_cmnd *cmd, - const struct ata_taskfile *tf); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 3a18a8a719b4..fab504fd9cfd 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -1238,8 +1238,12 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_host *host) readl(mmio + PDC_SDRAM_CONTROL); /* Turn on for ECC */ - pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, - PDC_DIMM_SPD_TYPE, &spd0); + if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, + PDC_DIMM_SPD_TYPE, &spd0)) { + pr_err("Failed in i2c read: device=%#x, subaddr=%#x\n", + PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE); + return 1; + } if (spd0 == 0x02) { data |= (0x01 << 16); writel(data, mmio + PDC_SDRAM_CONTROL); @@ -1380,8 +1384,12 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) /* ECC initiliazation. */ - pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, - PDC_DIMM_SPD_TYPE, &spd0); + if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, + PDC_DIMM_SPD_TYPE, &spd0)) { + pr_err("Failed in i2c read: device=%#x, subaddr=%#x\n", + PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE); + return 1; + } if (spd0 == 0x02) { void *buf; VPRINTK("Start ECC initialization\n"); diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c index 5b93852392b8..816de9eaac26 100644 --- a/drivers/auxdisplay/ks0108.c +++ b/drivers/auxdisplay/ks0108.c @@ -23,6 +23,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> @@ -90,17 +92,19 @@ void ks0108_displaystate(unsigned char state) void ks0108_startline(unsigned char startline) { - ks0108_writedata(min(startline,(unsigned char)63) | bit(6) | bit(7)); + ks0108_writedata(min_t(unsigned char, startline, 63) | bit(6) | + bit(7)); } void ks0108_address(unsigned char address) { - ks0108_writedata(min(address,(unsigned char)63) | bit(6)); + ks0108_writedata(min_t(unsigned char, address, 63) | bit(6)); } void ks0108_page(unsigned char page) { - ks0108_writedata(min(page,(unsigned char)7) | bit(3) | bit(4) | bit(5) | bit(7)); + ks0108_writedata(min_t(unsigned char, page, 7) | bit(3) | bit(4) | + bit(5) | bit(7)); } EXPORT_SYMBOL_GPL(ks0108_writedata); @@ -121,52 +125,71 @@ unsigned char ks0108_isinited(void) } EXPORT_SYMBOL_GPL(ks0108_isinited); -/* - * Module Init & Exit - */ - -static int __init ks0108_init(void) +static void ks0108_parport_attach(struct parport *port) { - int result; - int ret = -EINVAL; - - ks0108_parport = parport_find_base(ks0108_port); - if (ks0108_parport == NULL) { - printk(KERN_ERR KS0108_NAME ": ERROR: " - "parport didn't find %i port\n", ks0108_port); - goto none; - } - - ks0108_pardevice = parport_register_device(ks0108_parport, KS0108_NAME, - NULL, NULL, NULL, PARPORT_DEV_EXCL, NULL); - if (ks0108_pardevice == NULL) { - printk(KERN_ERR KS0108_NAME ": ERROR: " - "parport didn't register new device\n"); - goto none; + struct pardev_cb ks0108_cb; + + if (port->base != ks0108_port) + return; + + memset(&ks0108_cb, 0, sizeof(ks0108_cb)); + ks0108_cb.flags = PARPORT_DEV_EXCL; + ks0108_pardevice = parport_register_dev_model(port, KS0108_NAME, + &ks0108_cb, 0); + if (!ks0108_pardevice) { + pr_err("ERROR: parport didn't register new device\n"); + return; } - - result = parport_claim(ks0108_pardevice); - if (result != 0) { - printk(KERN_ERR KS0108_NAME ": ERROR: " - "can't claim %i parport, maybe in use\n", ks0108_port); - ret = result; - goto registered; + if (parport_claim(ks0108_pardevice)) { + pr_err("could not claim access to parport %i. Aborting.\n", + ks0108_port); + goto err_unreg_device; } + ks0108_parport = port; ks0108_inited = 1; - return 0; + return; -registered: +err_unreg_device: parport_unregister_device(ks0108_pardevice); - -none: - return ret; + ks0108_pardevice = NULL; } -static void __exit ks0108_exit(void) +static void ks0108_parport_detach(struct parport *port) { + if (port->base != ks0108_port) + return; + + if (!ks0108_pardevice) { + pr_err("%s: already unregistered.\n", KS0108_NAME); + return; + } + parport_release(ks0108_pardevice); parport_unregister_device(ks0108_pardevice); + ks0108_pardevice = NULL; + ks0108_parport = NULL; +} + +/* + * Module Init & Exit + */ + +static struct parport_driver ks0108_parport_driver = { + .name = "ks0108", + .match_port = ks0108_parport_attach, + .detach = ks0108_parport_detach, + .devmodel = true, +}; + +static int __init ks0108_init(void) +{ + return parport_register_driver(&ks0108_parport_driver); +} + +static void __exit ks0108_exit(void) +{ + parport_unregister_driver(&ks0108_parport_driver); } module_init(ks0108_init); diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 81751a49d8bf..56486d92c4e7 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -296,11 +296,20 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; - present = krealloc(rbnode->cache_present, - BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); - return -ENOMEM; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { + present = krealloc(rbnode->cache_present, + BITS_TO_LONGS(blklen) * sizeof(*present), + GFP_KERNEL); + if (!present) { + kfree(blk); + return -ENOMEM; + } + + memset(present + BITS_TO_LONGS(rbnode->blklen), 0, + (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) + * sizeof(*present)); + } else { + present = rbnode->cache_present; } /* insert the register value in the correct place in the rbnode block */ diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 4a2ef09e6704..f504232c1ee7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3756,6 +3756,14 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; + /* + * For flush requests, request_idx starts at the end of the + * tag space. Since we don't support FLUSH/FUA, simply return + * 0 as there's nothing to be done. + */ + if (request_idx >= MTIP_MAX_COMMAND_SLOTS) + return 0; + cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, &cmd->command_dma, GFP_KERNEL); if (!cmd->command) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d94529d5c8e9..bc67a93aa4f4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -523,6 +523,7 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ +static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request); static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); static void rbd_img_parent_read(struct rbd_obj_request *obj_request); static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); @@ -1818,6 +1819,16 @@ static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) obj_request_done_set(obj_request); } +static void rbd_osd_call_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + + if (obj_request_img_data_test(obj_request)) + rbd_osd_copyup_callback(obj_request); + else + obj_request_done_set(obj_request); +} + static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, struct ceph_msg *msg) { @@ -1866,6 +1877,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_osd_discard_callback(obj_request); break; case CEPH_OSD_OP_CALL: + rbd_osd_call_callback(obj_request); + break; case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: rbd_osd_trivial_callback(obj_request); @@ -2530,13 +2543,15 @@ out_unwind: } static void -rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) +rbd_osd_copyup_callback(struct rbd_obj_request *obj_request) { struct rbd_img_request *img_request; struct rbd_device *rbd_dev; struct page **pages; u32 page_count; + dout("%s: obj %p\n", __func__, obj_request); + rbd_assert(obj_request->type == OBJ_REQUEST_BIO || obj_request->type == OBJ_REQUEST_NODATA); rbd_assert(obj_request_img_data_test(obj_request)); @@ -2563,9 +2578,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) if (!obj_request->result) obj_request->xferred = obj_request->length; - /* Finish up with the normal image object callback */ - - rbd_img_obj_callback(obj_request); + obj_request_done_set(obj_request); } static void @@ -2650,7 +2663,6 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) /* All set, send it off. */ - orig_request->callback = rbd_img_obj_copyup_callback; osdc = &rbd_dev->rbd_client->client->osdc; img_result = rbd_obj_request_submit(osdc, orig_request); if (!img_result) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index ced96777b677..954c0029fb3b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -369,8 +369,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) return; } - if (work_pending(&blkif->persistent_purge_work)) { - pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n"); + if (work_busy(&blkif->persistent_purge_work)) { + pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); return; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6d89ed35d80c..7a8a73f1fc04 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -179,6 +179,7 @@ static DEFINE_SPINLOCK(minor_lock); ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) static int blkfront_setup_indirect(struct blkfront_info *info); +static int blkfront_gather_backend_features(struct blkfront_info *info); static int get_id_from_freelist(struct blkfront_info *info) { @@ -1128,8 +1129,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); - list_add(&indirect_page->lru, &info->indirect_pages); + if (!info->feature_persistent) { + indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); + list_add(&indirect_page->lru, &info->indirect_pages); + } s->indirect_grants[i]->gref = GRANT_INVALID_REF; list_add_tail(&s->indirect_grants[i]->node, &info->grants); } @@ -1519,7 +1522,7 @@ static int blkif_recover(struct blkfront_info *info) info->shadow_free = info->ring.req_prod_pvt; info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; - rc = blkfront_setup_indirect(info); + rc = blkfront_gather_backend_features(info); if (rc) { kfree(copy); return rc; @@ -1720,20 +1723,13 @@ static void blkfront_setup_discard(struct blkfront_info *info) static int blkfront_setup_indirect(struct blkfront_info *info) { - unsigned int indirect_segments, segs; + unsigned int segs; int err, i; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-max-indirect-segments", "%u", &indirect_segments, - NULL); - if (err) { - info->max_indirect_segments = 0; + if (info->max_indirect_segments == 0) segs = BLKIF_MAX_SEGMENTS_PER_REQUEST; - } else { - info->max_indirect_segments = min(indirect_segments, - xen_blkif_max_segments); + else segs = info->max_indirect_segments; - } err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); if (err) @@ -1797,6 +1793,68 @@ out_of_memory: } /* + * Gather all backend feature-* + */ +static int blkfront_gather_backend_features(struct blkfront_info *info) +{ + int err; + int barrier, flush, discard, persistent; + unsigned int indirect_segments; + + info->feature_flush = 0; + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-barrier", "%d", &barrier, + NULL); + + /* + * If there's no "feature-barrier" defined, then it means + * we're dealing with a very old backend which writes + * synchronously; nothing to do. + * + * If there are barriers, then we use flush. + */ + if (!err && barrier) + info->feature_flush = REQ_FLUSH | REQ_FUA; + /* + * And if there is "feature-flush-cache" use that above + * barriers. + */ + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-flush-cache", "%d", &flush, + NULL); + + if (!err && flush) + info->feature_flush = REQ_FLUSH; + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-discard", "%d", &discard, + NULL); + + if (!err && discard) + blkfront_setup_discard(info); + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-persistent", "%u", &persistent, + NULL); + if (err) + info->feature_persistent = 0; + else + info->feature_persistent = persistent; + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-max-indirect-segments", "%u", &indirect_segments, + NULL); + if (err) + info->max_indirect_segments = 0; + else + info->max_indirect_segments = min(indirect_segments, + xen_blkif_max_segments); + + return blkfront_setup_indirect(info); +} + +/* * Invoked when the backend is finally 'ready' (and has told produced * the details about the physical device - #sectors, size, etc). */ @@ -1807,7 +1865,6 @@ static void blkfront_connect(struct blkfront_info *info) unsigned int physical_sector_size; unsigned int binfo; int err; - int barrier, flush, discard, persistent; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -1864,48 +1921,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err != 1) physical_sector_size = sector_size; - info->feature_flush = 0; - - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%d", &barrier, - NULL); - - /* - * If there's no "feature-barrier" defined, then it means - * we're dealing with a very old backend which writes - * synchronously; nothing to do. - * - * If there are barriers, then we use flush. - */ - if (!err && barrier) - info->feature_flush = REQ_FLUSH | REQ_FUA; - /* - * And if there is "feature-flush-cache" use that above - * barriers. - */ - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-flush-cache", "%d", &flush, - NULL); - - if (!err && flush) - info->feature_flush = REQ_FLUSH; - - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-discard", "%d", &discard, - NULL); - - if (!err && discard) - blkfront_setup_discard(info); - - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-persistent", "%u", &persistent, - NULL); - if (err) - info->feature_persistent = 0; - else - info->feature_persistent = persistent; - - err = blkfront_setup_indirect(info); + err = blkfront_gather_backend_features(info); if (err) { xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", info->xbdev->otherend); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fb655e8d1e3b..763301c7828c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -496,10 +496,9 @@ static void zram_meta_free(struct zram_meta *meta, u64 disksize) kfree(meta); } -static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize) +static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) { size_t num_pages; - char pool_name[8]; struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); if (!meta) @@ -512,7 +511,6 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize) goto out_error; } - snprintf(pool_name, sizeof(pool_name), "zram%d", device_id); meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM); if (!meta->mem_pool) { pr_err("Error creating memory pool\n"); @@ -1031,7 +1029,7 @@ static ssize_t disksize_store(struct device *dev, return -EINVAL; disksize = PAGE_ALIGN(disksize); - meta = zram_meta_alloc(zram->disk->first_minor, disksize); + meta = zram_meta_alloc(zram->disk->disk_name, disksize); if (!meta) return -ENOMEM; diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index da8faf78536a..5643b65cee20 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -429,7 +429,7 @@ static int hwrng_fillfn(void *unused) static void start_khwrngd(void) { hwrng_fill = kthread_run(hwrng_fillfn, NULL, "hwrng"); - if (hwrng_fill == ERR_PTR(-ENOMEM)) { + if (IS_ERR(hwrng_fill)) { pr_err("hwrng_fill thread creation failed"); hwrng_fill = NULL; } diff --git a/drivers/char/misc.c b/drivers/char/misc.c index fdb0f9b3fe45..8069b361b8dd 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -243,17 +243,15 @@ int misc_register(struct miscdevice * misc) * @misc: device to unregister * * Unregister a miscellaneous device that was previously - * successfully registered with misc_register(). Success - * is indicated by a zero return, a negative errno code - * indicates an error. + * successfully registered with misc_register(). */ -int misc_deregister(struct miscdevice *misc) +void misc_deregister(struct miscdevice *misc) { int i = DYNAMIC_MINORS - misc->minor - 1; if (WARN_ON(list_empty(&misc->list))) - return -EINVAL; + return; mutex_lock(&misc_mtx); list_del(&misc->list); @@ -261,7 +259,6 @@ int misc_deregister(struct miscdevice *misc) if (i < DYNAMIC_MINORS && i >= 0) clear_bit(i, misc_minors); mutex_unlock(&misc_mtx); - return 0; } EXPORT_SYMBOL(misc_register); @@ -281,10 +278,9 @@ static char *misc_devnode(struct device *dev, umode_t *mode) static int __init misc_init(void) { int err; + struct proc_dir_entry *ret; -#ifdef CONFIG_PROC_FS - proc_create("misc", 0, NULL, &misc_proc_fops); -#endif + ret = proc_create("misc", 0, NULL, &misc_proc_fops); misc_class = class_create(THIS_MODULE, "misc"); err = PTR_ERR(misc_class); if (IS_ERR(misc_class)) @@ -300,7 +296,8 @@ fail_printk: printk("unable to get major %d for misc devices\n", MISC_MAJOR); class_destroy(misc_class); fail_remove: - remove_proc_entry("misc", NULL); + if (ret) + remove_proc_entry("misc", NULL); return err; } subsys_initcall(misc_init); diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c index 9df78e2cc45d..97c2d8d433d6 100644 --- a/drivers/char/nvram.c +++ b/drivers/char/nvram.c @@ -702,7 +702,7 @@ static void atari_proc_infos(unsigned char *nvram, struct seq_file *seq, seq_printf(seq, "%ds%s\n", nvram[10], nvram[10] < 8 ? ", no memory test" : ""); - vmode = (nvram[14] << 8) || nvram[15]; + vmode = (nvram[14] << 8) | nvram[15]; seq_printf(seq, "Video mode : %s colors, %d columns, %s %s monitor\n", colors[vmode & 7], diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index 014c9d90d297..f5a45d887a37 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -430,7 +430,7 @@ static int tosh_probe(void) int i,major,minor,day,year,month,flag; unsigned char signature[7] = { 0x54,0x4f,0x53,0x48,0x49,0x42,0x41 }; SMMRegisters regs; - void __iomem *bios = ioremap_cache(0xf0000, 0x10000); + void __iomem *bios = ioremap(0xf0000, 0x10000); if (!bios) return -ENOMEM; diff --git a/drivers/char/xillybus/xillybus_pcie.c b/drivers/char/xillybus/xillybus_pcie.c index d8266bc2ae35..9418300214e9 100644 --- a/drivers/char/xillybus/xillybus_pcie.c +++ b/drivers/char/xillybus/xillybus_pcie.c @@ -193,14 +193,16 @@ static int xilly_probe(struct pci_dev *pdev, } /* - * In theory, an attempt to set the DMA mask to 64 and dma_using_dac=1 - * is the right thing. But some unclever PCIe drivers report it's OK - * when the hardware drops those 64-bit PCIe packets. So trust - * nobody and use 32 bits DMA addressing in any case. + * Some (old and buggy?) hardware drops 64-bit addressed PCIe packets, + * even when the PCIe driver claims that a 64-bit mask is OK. On the + * other hand, on some architectures, 64-bit addressing is mandatory. + * So go for the 64-bit mask only when failing is the other option. */ if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { endpoint->dma_using_dac = 0; + } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + endpoint->dma_using_dac = 1; } else { dev_err(endpoint->dev, "Failed to set DMA mask. Aborting.\n"); return -ENODEV; diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index 4b93a1efb36d..ac03ba49e9d1 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -126,7 +126,7 @@ PARENTS(pxa3xx_ac97_bus) = { "ring_osc_60mhz", "ac97" }; PARENTS(pxa3xx_sbus) = { "ring_osc_60mhz", "system_bus" }; PARENTS(pxa3xx_smemcbus) = { "ring_osc_60mhz", "smemc" }; -#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENA : &CKENB) +#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENB : &CKENA) #define PXA3XX_CKEN(dev_id, con_id, parents, mult_lp, div_lp, mult_hp, \ div_hp, bit, is_lp, flags) \ PXA_CKEN(dev_id, con_id, bit, parents, mult_lp, div_lp, \ diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index b8ff3c64cc45..c96de14036a0 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -661,6 +661,9 @@ static void sh_cmt_clocksource_suspend(struct clocksource *cs) { struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); + if (!ch->cs_enabled) + return; + sh_cmt_stop(ch, FLAG_CLOCKSOURCE); pm_genpd_syscore_poweroff(&ch->cmt->pdev->dev); } @@ -669,6 +672,9 @@ static void sh_cmt_clocksource_resume(struct clocksource *cs) { struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); + if (!ch->cs_enabled) + return; + pm_genpd_syscore_poweron(&ch->cmt->pdev->dev); sh_cmt_start(ch, FLAG_CLOCKSOURCE); } diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 2d59038dec43..86c7eb66bdfb 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -462,6 +462,7 @@ void __init mxc_timer_init(unsigned long pbase, int irq, enum imx_gpt_type type) BUG_ON(!imxtm->base); imxtm->type = type; + imxtm->irq = irq; _mxc_timer_init(imxtm); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6da25c10bdfd..9bb09ce98d04 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1002,7 +1002,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) int ret = 0; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1019,7 +1019,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) unsigned int j; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1163,11 +1163,14 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; + if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) + goto err_free_rcpumask; + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj, "cpufreq"); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); - goto err_free_rcpumask; + goto err_free_real_cpus; } INIT_LIST_HEAD(&policy->policy_list); @@ -1184,6 +1187,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) return policy; +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1234,6 +1239,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) write_unlock_irqrestore(&cpufreq_driver_lock, flags); cpufreq_policy_put_kobj(policy, notify); + free_cpumask_var(policy->real_cpus); free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->cpus); kfree(policy); @@ -1258,14 +1264,17 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) pr_debug("adding CPU %u\n", cpu); - /* - * Only possible if 'cpu' wasn't physically present earlier and we are - * here from subsys_interface add callback. A hotplug notifier will - * follow and we will handle it like logical CPU hotplug then. For now, - * just create the sysfs link. - */ - if (cpu_is_offline(cpu)) - return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu); + if (cpu_is_offline(cpu)) { + /* + * Only possible if we are here from the subsys_interface add + * callback. A hotplug notifier will follow and we will handle + * it as CPU online then. For now, just create the sysfs link, + * unless there is no policy or the link is already present. + */ + policy = per_cpu(cpufreq_cpu_data, cpu); + return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } if (!down_read_trylock(&cpufreq_rwsem)) return 0; @@ -1307,6 +1316,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* related cpus should atleast have policy->cpus */ cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember which CPUs have been present at the policy creation time. */ + if (!recover_policy) + cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -1420,8 +1433,7 @@ nomem_out: return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_prepare(struct device *dev) { unsigned int cpu = dev->id; int ret = 0; @@ -1437,10 +1449,8 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); - if (ret) { + if (ret) pr_err("%s: Failed to stop governor\n", __func__); - return ret; - } } down_write(&policy->rwsem); @@ -1473,8 +1483,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_finish(struct device *dev) { unsigned int cpu = dev->id; int ret; @@ -1492,10 +1501,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev, /* If cpu is last user of policy, free policy */ if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - if (ret) { + if (ret) pr_err("%s: Failed to exit governor\n", __func__); - return ret; - } } /* @@ -1506,10 +1513,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - /* Free the policy only if the driver is getting removed. */ - if (sif) - cpufreq_policy_free(policy, true); - return 0; } @@ -1521,40 +1524,39 @@ static int __cpufreq_remove_dev_finish(struct device *dev, static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; - int ret; - - /* - * Only possible if 'cpu' is getting physically removed now. A hotplug - * notifier should have already been called and we just need to remove - * link or free policy here. - */ - if (cpu_is_offline(cpu)) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - struct cpumask mask; + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy) - return; + if (!policy) + return; - cpumask_copy(&mask, policy->related_cpus); - cpumask_clear_cpu(cpu, &mask); + if (cpu_online(cpu)) { + __cpufreq_remove_dev_prepare(dev); + __cpufreq_remove_dev_finish(dev); + } - /* - * Free policy only if all policy->related_cpus are removed - * physically. - */ - if (cpumask_intersects(&mask, cpu_present_mask)) { - remove_cpu_dev_symlink(policy, cpu); - return; - } + cpumask_clear_cpu(cpu, policy->real_cpus); + if (cpumask_empty(policy->real_cpus)) { cpufreq_policy_free(policy, true); return; } - ret = __cpufreq_remove_dev_prepare(dev, sif); + if (cpu != policy->kobj_cpu) { + remove_cpu_dev_symlink(policy, cpu); + } else { + /* + * The CPU owning the policy object is going away. Move it to + * another suitable CPU. + */ + unsigned int new_cpu = cpumask_first(policy->real_cpus); + struct device *new_dev = get_cpu_device(new_cpu); - if (!ret) - __cpufreq_remove_dev_finish(dev, sif); + dev_dbg(dev, "%s: Moving policy object to CPU%u\n", __func__, new_cpu); + + sysfs_remove_link(&new_dev->kobj, "cpufreq"); + policy->kobj_cpu = new_cpu; + WARN_ON(kobject_move(&policy->kobj, &new_dev->kobj)); + } } static void handle_update(struct work_struct *work) @@ -2393,11 +2395,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev, NULL); + __cpufreq_remove_dev_prepare(dev); break; case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev, NULL); + __cpufreq_remove_dev_finish(dev); break; case CPU_DOWN_FAILED: diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c index ae5b2bd3a978..fa3dd840a837 100644 --- a/drivers/cpufreq/exynos-cpufreq.c +++ b/drivers/cpufreq/exynos-cpufreq.c @@ -180,7 +180,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) ret = exynos5250_cpufreq_init(exynos_info); } else { pr_err("%s: Unknown SoC type\n", __func__); - return -ENODEV; + ret = -ENODEV; } if (ret) @@ -188,12 +188,14 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) if (exynos_info->set_freq == NULL) { dev_err(&pdev->dev, "No set_freq function (ERR)\n"); + ret = -EINVAL; goto err_vdd_arm; } arm_regulator = regulator_get(NULL, "vdd_arm"); if (IS_ERR(arm_regulator)) { dev_err(&pdev->dev, "failed to get resource vdd_arm\n"); + ret = -EINVAL; goto err_vdd_arm; } @@ -225,7 +227,7 @@ err_cpufreq_reg: regulator_put(arm_regulator); err_vdd_arm: kfree(exynos_info); - return -EINVAL; + return ret; } static struct platform_driver exynos_cpufreq_platdrv = { diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15ada47bb720..fcb929ec5304 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -681,6 +681,7 @@ static struct cpu_defaults knl_params = { .get_max = core_get_max_pstate, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, .set = core_set_pstate, }, }; diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index e362860c2b50..cd593c1f66dc 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -20,7 +20,7 @@ #include <asm/clock.h> #include <asm/idle.h> -#include <asm/mach-loongson/loongson.h> +#include <asm/mach-loongson64/loongson.h> static uint nowait; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index dae1e8099969..f9c78751989e 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -909,13 +909,14 @@ static int ahash_final_ctx(struct ahash_request *req) state->buflen_1; u32 *sh_desc = ctx->sh_desc_fin, *desc; dma_addr_t ptr = ctx->sh_desc_fin_dma; - int sec4_sg_bytes; + int sec4_sg_bytes, sec4_sg_src_index; int digestsize = crypto_ahash_digestsize(ahash); struct ahash_edesc *edesc; int ret = 0; int sh_len; - sec4_sg_bytes = (1 + (buflen ? 1 : 0)) * sizeof(struct sec4_sg_entry); + sec4_sg_src_index = 1 + (buflen ? 1 : 0); + sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ edesc = kmalloc(sizeof(struct ahash_edesc) + DESC_JOB_IO_LEN + @@ -942,7 +943,7 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_bytes - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 7ba495f75370..402631a19a11 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -905,7 +905,6 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt) crypt->mode |= NPE_OP_NOT_IN_PLACE; /* This was never tested by Intel * for more than one dst buffer, I think. */ - BUG_ON(req->dst->length < nbytes); req_ctx->dst = NULL; if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook, flags, DMA_FROM_DEVICE)) diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 08f8d5cd6334..becb738c897b 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -71,7 +71,6 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; - struct nx_sg *in_sg; struct nx_sg *out_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; @@ -97,7 +96,6 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - in_sg = nx_ctx->in_sg; max_sg_len = min_t(u64, nx_ctx->ap->sglen, nx_driver.of.max_sg_len/sizeof(struct nx_sg)); max_sg_len = min_t(u64, max_sg_len, @@ -114,17 +112,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, } do { - /* - * to_process: the SHA256_BLOCK_SIZE data chunk to process in - * this update. This value is also restricted by the sg list - * limits. - */ - to_process = total - to_process; - to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + int used_sgs = 0; + struct nx_sg *in_sg = nx_ctx->in_sg; if (buf_len) { data_len = buf_len; - in_sg = nx_build_sg_list(nx_ctx->in_sg, + in_sg = nx_build_sg_list(in_sg, (u8 *) sctx->buf, &data_len, max_sg_len); @@ -133,15 +126,27 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, rc = -EINVAL; goto out; } + used_sgs = in_sg - nx_ctx->in_sg; } + /* to_process: SHA256_BLOCK_SIZE aligned chunk to be + * processed in this iteration. This value is restricted + * by sg list limits and number of sgs we already used + * for leftover data. (see above) + * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len, + * but because data may not be aligned, we need to account + * for that too. */ + to_process = min_t(u64, total, + (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE); + to_process = to_process & ~(SHA256_BLOCK_SIZE - 1); + data_len = to_process - buf_len; in_sg = nx_build_sg_list(in_sg, (u8 *) data, &data_len, max_sg_len); nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); - to_process = (data_len + buf_len); + to_process = data_len + buf_len; leftover = total - to_process; /* diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index aff0fe58eac0..b6e183d58d73 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -71,7 +71,6 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; - struct nx_sg *in_sg; struct nx_sg *out_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; @@ -97,7 +96,6 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; - in_sg = nx_ctx->in_sg; max_sg_len = min_t(u64, nx_ctx->ap->sglen, nx_driver.of.max_sg_len/sizeof(struct nx_sg)); max_sg_len = min_t(u64, max_sg_len, @@ -114,18 +112,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, } do { - /* - * to_process: the SHA512_BLOCK_SIZE data chunk to process in - * this update. This value is also restricted by the sg list - * limits. - */ - to_process = total - leftover; - to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); - leftover = total - to_process; + int used_sgs = 0; + struct nx_sg *in_sg = nx_ctx->in_sg; if (buf_len) { data_len = buf_len; - in_sg = nx_build_sg_list(nx_ctx->in_sg, + in_sg = nx_build_sg_list(in_sg, (u8 *) sctx->buf, &data_len, max_sg_len); @@ -133,8 +125,20 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, rc = -EINVAL; goto out; } + used_sgs = in_sg - nx_ctx->in_sg; } + /* to_process: SHA512_BLOCK_SIZE aligned chunk to be + * processed in this iteration. This value is restricted + * by sg list limits and number of sgs we already used + * for leftover data. (see above) + * In ideal case, we could allow NX_PAGE_SIZE * max_sg_len, + * but because data may not be aligned, we need to account + * for that too. */ + to_process = min_t(u64, total, + (max_sg_len - 1 - used_sgs) * NX_PAGE_SIZE); + to_process = to_process & ~(SHA512_BLOCK_SIZE - 1); + data_len = to_process - buf_len; in_sg = nx_build_sg_list(in_sg, (u8 *) data, &data_len, max_sg_len); @@ -146,7 +150,7 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, goto out; } - to_process = (data_len + buf_len); + to_process = data_len + buf_len; leftover = total - to_process; /* diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 067402c7c2a9..df427c0e9e7b 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -73,7 +73,8 @@ ICP_QAT_HW_CIPHER_KEY_CONVERT, \ ICP_QAT_HW_CIPHER_DECRYPT) -static atomic_t active_dev; +static DEFINE_MUTEX(algs_lock); +static unsigned int active_devs; struct qat_alg_buf { uint32_t len; @@ -1280,7 +1281,10 @@ static struct crypto_alg qat_algs[] = { { int qat_algs_register(void) { - if (atomic_add_return(1, &active_dev) == 1) { + int ret = 0; + + mutex_lock(&algs_lock); + if (++active_devs == 1) { int i; for (i = 0; i < ARRAY_SIZE(qat_algs); i++) @@ -1289,21 +1293,25 @@ int qat_algs_register(void) CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC : CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; - return crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs)); + ret = crypto_register_algs(qat_algs, ARRAY_SIZE(qat_algs)); } - return 0; + mutex_unlock(&algs_lock); + return ret; } int qat_algs_unregister(void) { - if (atomic_sub_return(1, &active_dev) == 0) - return crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs)); - return 0; + int ret = 0; + + mutex_lock(&algs_lock); + if (--active_devs == 0) + ret = crypto_unregister_algs(qat_algs, ARRAY_SIZE(qat_algs)); + mutex_unlock(&algs_lock); + return ret; } int qat_algs_init(void) { - atomic_set(&active_dev, 0); crypto_get_default_rng(); return 0; } diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 59892126d175..d3629b7482dd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -48,6 +48,8 @@ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) +#define ATC_MAX_DSCR_TRIALS 10 + /* * Initial number of descriptors to allocate for each channel. This could * be increased during dma usage. @@ -285,28 +287,19 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, * * @current_len: the number of bytes left before reading CTRLA * @ctrla: the value of CTRLA - * @desc: the descriptor containing the transfer width */ -static inline int atc_calc_bytes_left(int current_len, u32 ctrla, - struct at_desc *desc) +static inline int atc_calc_bytes_left(int current_len, u32 ctrla) { - return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width); -} + u32 btsize = (ctrla & ATC_BTSIZE_MAX); + u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla); -/** - * atc_calc_bytes_left_from_reg - calculates the number of bytes left according - * to the current value of CTRLA. - * - * @current_len: the number of bytes left before reading CTRLA |